Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix blas_connector.cpp warnings and math_kernel_op.cu warnings #5444

Merged
merged 3 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions source/module_base/blas_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,17 @@ float BlasConnector::dot( const int n, const float *X, const int incX, const flo
{
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
return sdot_(&n, X, &incX, Y, &incY);
}
return sdot_(&n, X, &incX, Y, &incY);
}
}

double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY, base_device::AbacusDevice_t device_type)
{
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
return ddot_(&n, X, &incX, Y, &incY);
}
return ddot_(&n, X, &incX, Y, &incY);
}
}

// C = a * A.? * B.? + b * C
void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k,
Expand Down Expand Up @@ -196,39 +196,39 @@ float BlasConnector::nrm2( const int n, const float *X, const int incX, base_dev
{
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
return snrm2_( &n, X, &incX );
}
return snrm2_( &n, X, &incX );
}
}


double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type )
{
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
return dnrm2_( &n, X, &incX );
}
return dnrm2_( &n, X, &incX );
}
}


double BlasConnector::nrm2( const int n, const std::complex<double> *X, const int incX, base_device::AbacusDevice_t device_type )
{
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
return dznrm2_( &n, X, &incX );
}
return dznrm2_( &n, X, &incX );
}
}

// copies a into b
void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type)
{
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
dcopy_(&n, a, &incx, b, &incy);
}
}
}

void BlasConnector::copy(const long n, const std::complex<double> *a, const int incx, std::complex<double> *b, const int incy, base_device::AbacusDevice_t device_type)
{
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
zcopy_(&n, a, &incx, b, &incy);
}
}
}
12 changes: 6 additions & 6 deletions source/module_hsolver/kernels/cuda/math_kernel_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
namespace hsolver
{
const int warp_size = 32;
const unsigned int full_mask = 0xffffffff;
// const unsigned int full_mask = 0xffffffff;
const int thread_per_block = 256;
}

Expand Down Expand Up @@ -65,11 +65,11 @@ void destoryBLAShandle(){
}
}

template <typename FPTYPE>
__forceinline__ __device__ void warp_reduce(FPTYPE& val) {
for (int offset = 16; offset > 0; offset >>= 1)
val += __shfl_down_sync(full_mask, val, offset);
}
// template <typename FPTYPE>
// __forceinline__ __device__ void warp_reduce(FPTYPE& val) {
// for (int offset = 16; offset > 0; offset >>= 1)
// val += __shfl_down_sync(full_mask, val, offset);
// }

template <typename Real>
__global__ void line_minimize_with_block(
Expand Down
Loading