1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-24 04:35:55 +01:00

Confirmed double precision working

This commit is contained in:
Chulwoo Jung 2020-07-21 00:48:46 -04:00
parent c55d657736
commit 7e70df27e4

View File

@ -39,7 +39,7 @@ Author: Guido Cossu
#undef USE_LAPACK #undef USE_LAPACK
#define Glog std::cout << GridLogMessage #define Glog std::cout << GridLogMessage
#ifdef GRID_NVCC #ifdef GRID_CUDA
#include "cublas_v2.h" #include "cublas_v2.h"
#endif #endif
@ -132,7 +132,7 @@ private:
///////////////////////// /////////////////////////
// BLAS objects // BLAS objects
///////////////////////// /////////////////////////
#ifdef GRID_NVCC #ifdef GRID_CUDA
cudaError_t cudaStat; cudaError_t cudaStat;
cuDoubleComplex *w_acc, *evec_acc, *c_acc; cuDoubleComplex *w_acc, *evec_acc, *c_acc;
#endif #endif
@ -236,7 +236,7 @@ public:
void orthogonalize_blas(std::vector<Field>& w, std::vector<Field>& evec, int R, int do_print=0) void orthogonalize_blas(std::vector<Field>& w, std::vector<Field>& evec, int R, int do_print=0)
{ {
#ifdef GRID_NVCC #ifdef GRID_CUDA
Glog << "cuBLAS orthogonalize" << std::endl; Glog << "cuBLAS orthogonalize" << std::endl;
typedef typename Field::vector_object vobj; typedef typename Field::vector_object vobj;
@ -262,8 +262,10 @@ public:
} }
#else #else
for (int col=0; col<Nu; ++col) { for (int col=0; col<Nu; ++col) {
auto w_v = w[col].View(); // auto w_v = w[col].View();
autoView( w_v,w[col], AcceleratorWrite);
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]); cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]);
// Glog << "col= "<<col <<" z= "<<z <<std::endl;
for (size_t row=0; row<sites*12; ++row) { for (size_t row=0; row<sites*12; ++row) {
w_acc[col*sites*12+row] = z[row]; w_acc[col*sites*12+row] = z[row];
} }
@ -286,8 +288,10 @@ public:
} }
#else #else
for (int col=0; col<Nevec_acc; ++col) { for (int col=0; col<Nevec_acc; ++col) {
auto evec_v = evec[b*Nevec_acc+col].View(); // auto evec_v = evec[b*Nevec_acc+col].View();
autoView( evec_v,evec[b*Nevec_acc+col], AcceleratorWrite);
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&evec_v[0]); cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&evec_v[0]);
// Glog << "col= "<<col <<" z= "<<z <<std::endl;
for (size_t row=0; row<sites*12; ++row) { for (size_t row=0; row<sites*12; ++row) {
evec_acc[col*sites*12+row] = z[row]; evec_acc[col*sites*12+row] = z[row];
} }
@ -328,7 +332,8 @@ public:
} }
#if 1 #if 1
for (int col=0; col<Nu; ++col) { for (int col=0; col<Nu; ++col) {
auto w_v = w[col].View(); // auto w_v = w[col].View();
autoView( w_v,w[col], AcceleratorWrite);
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]); cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]);
for (size_t row=0; row<sites*12; ++row) { for (size_t row=0; row<sites*12; ++row) {
z[row] = w_acc[col*sites*12+row]; z[row] = w_acc[col*sites*12+row];
@ -418,20 +423,23 @@ for( int i =0;i<total;i++){
std::vector<Field>& evec, std::vector<Field>& evec,
const std::vector<Field>& src, int& Nconv, LanczosType Impl) const std::vector<Field>& src, int& Nconv, LanczosType Impl)
{ {
#ifdef GRID_NVCC #ifdef GRID_CUDA
GridBase *grid = src[0].Grid(); GridBase *grid = src[0].Grid();
grid->show_decomposition(); grid->show_decomposition();
printf("GRID_CUDA\n");
// set eigenvector buffers for the cuBLAS calls // set eigenvector buffers for the cuBLAS calls
//const uint64_t nsimd = grid->Nsimd(); //const uint64_t nsimd = grid->Nsimd();
const uint64_t sites = grid->lSites(); const uint64_t sites = grid->lSites();
cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(cuDoubleComplex)); cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(cuDoubleComplex));
//Glog << cudaStat << std::endl; Glog << "w_acc= "<<w_acc << " "<< cudaStat << std::endl;
cudaStat = cudaMallocManaged((void **)&evec_acc, Nevec_acc*sites*12*sizeof(cuDoubleComplex)); cudaStat = cudaMallocManaged((void **)&evec_acc, Nevec_acc*sites*12*sizeof(cuDoubleComplex));
//Glog << cudaStat << std::endl; Glog << "evec_acc= "<<evec_acc << " "<< cudaStat << std::endl;
cudaStat = cudaMallocManaged((void **)&c_acc, Nu*Nevec_acc*sizeof(cuDoubleComplex)); cudaStat = cudaMallocManaged((void **)&c_acc, Nu*Nevec_acc*sizeof(cuDoubleComplex));
//Glog << cudaStat << std::endl; Glog << "c_acc= "<<c_acc << " "<< cudaStat << std::endl;
// exit(-42);
#endif #endif
switch (Impl) { switch (Impl) {
case LanczosType::irbl: case LanczosType::irbl:
@ -442,7 +450,7 @@ for( int i =0;i<total;i++){
calc_rbl(eval,evec,src,Nconv); calc_rbl(eval,evec,src,Nconv);
break; break;
} }
#ifdef GRID_NVCC #ifdef GRID_CUDA
// free eigenvector buffers for the cuBLAS calls // free eigenvector buffers for the cuBLAS calls
cudaFree(w_acc); cudaFree(w_acc);
cudaFree(evec_acc); cudaFree(evec_acc);
@ -943,11 +951,8 @@ if(!split_test){
Glog << "Gram Schmidt"<< std::endl; Glog << "Gram Schmidt"<< std::endl;
// re-orthogonalization for numerical stability // re-orthogonalization for numerical stability
#if 0 #if 0
for (int u=0; u<Nu; ++u) { orthogonalize(w,Nu,evec,R);
orthogonalize(w[u],evec,R);
}
#else #else
//orthogonalize(w,Nu,evec,R);
orthogonalize_blas(w,evec,R); orthogonalize_blas(w,evec,R);
#endif #endif
// QR part // QR part