diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h index c52d9cd9..7a02c7b4 100644 --- a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h +++ b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h @@ -43,6 +43,18 @@ Author: Guido Cossu #include "cublas_v2.h" #endif +#if 0 +#define CUDA_COMPLEX cuDoubleComplex +#define CUDA_FLOAT double +#define MAKE_CUDA_COMPLEX make_cuDoubleComplex +#define CUDA_GEMM cublasZgemm +#else +#define CUDA_COMPLEX cuComplex +#define CUDA_FLOAT float +#define MAKE_CUDA_COMPLEX make_cuComplex +#define CUDA_GEMM cublasCgemm +#endif + namespace Grid { //////////////////////////////////////////////////////////////////////////////// @@ -134,7 +146,7 @@ private: ///////////////////////// #ifdef GRID_CUDA cudaError_t cudaStat; - cuDoubleComplex *w_acc, *evec_acc, *c_acc; + CUDA_COMPLEX *w_acc, *evec_acc, *c_acc; #endif int Nevec_acc; // Number of eigenvectors stored in the buffer evec_acc @@ -264,7 +276,7 @@ public: for (int col=0; col(&w_v[0]); + CUDA_COMPLEX *z = reinterpret_cast(&w_v[0]); // Glog << "col= "<[" << j << "," << i << "] = " @@ -320,9 +332,9 @@ public: } } #else - alpha = make_cuDoubleComplex(-1.0,0.0); - beta = make_cuDoubleComplex(1.0,0.0); - stat = cublasZgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, 12*sites, Nu, Nevec_acc, + alpha = MAKE_CUDA_COMPLEX(-1.0,0.0); + beta = MAKE_CUDA_COMPLEX(1.0,0.0); + stat = CUDA_GEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, 12*sites, Nu, Nevec_acc, &alpha, evec_acc, 12*sites, c_acc, Nevec_acc, &beta, @@ -334,7 +346,7 @@ public: for (int col=0; col(&w_v[0]); + CUDA_COMPLEX *z = reinterpret_cast(&w_v[0]); for (size_t row=0; rowNsimd(); const uint64_t sites = grid->lSites(); - cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(cuDoubleComplex)); + cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(CUDA_COMPLEX)); Glog << "w_acc= "<