mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-25 13:15:55 +01:00
Confirmed double precision working
This commit is contained in:
parent
c55d657736
commit
7e70df27e4
@ -39,7 +39,7 @@ Author: Guido Cossu
|
|||||||
#undef USE_LAPACK
|
#undef USE_LAPACK
|
||||||
#define Glog std::cout << GridLogMessage
|
#define Glog std::cout << GridLogMessage
|
||||||
|
|
||||||
#ifdef GRID_NVCC
|
#ifdef GRID_CUDA
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -132,7 +132,7 @@ private:
|
|||||||
/////////////////////////
|
/////////////////////////
|
||||||
// BLAS objects
|
// BLAS objects
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
#ifdef GRID_NVCC
|
#ifdef GRID_CUDA
|
||||||
cudaError_t cudaStat;
|
cudaError_t cudaStat;
|
||||||
cuDoubleComplex *w_acc, *evec_acc, *c_acc;
|
cuDoubleComplex *w_acc, *evec_acc, *c_acc;
|
||||||
#endif
|
#endif
|
||||||
@ -236,7 +236,7 @@ public:
|
|||||||
|
|
||||||
void orthogonalize_blas(std::vector<Field>& w, std::vector<Field>& evec, int R, int do_print=0)
|
void orthogonalize_blas(std::vector<Field>& w, std::vector<Field>& evec, int R, int do_print=0)
|
||||||
{
|
{
|
||||||
#ifdef GRID_NVCC
|
#ifdef GRID_CUDA
|
||||||
Glog << "cuBLAS orthogonalize" << std::endl;
|
Glog << "cuBLAS orthogonalize" << std::endl;
|
||||||
|
|
||||||
typedef typename Field::vector_object vobj;
|
typedef typename Field::vector_object vobj;
|
||||||
@ -262,8 +262,10 @@ public:
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (int col=0; col<Nu; ++col) {
|
for (int col=0; col<Nu; ++col) {
|
||||||
auto w_v = w[col].View();
|
// auto w_v = w[col].View();
|
||||||
|
autoView( w_v,w[col], AcceleratorWrite);
|
||||||
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]);
|
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]);
|
||||||
|
// Glog << "col= "<<col <<" z= "<<z <<std::endl;
|
||||||
for (size_t row=0; row<sites*12; ++row) {
|
for (size_t row=0; row<sites*12; ++row) {
|
||||||
w_acc[col*sites*12+row] = z[row];
|
w_acc[col*sites*12+row] = z[row];
|
||||||
}
|
}
|
||||||
@ -286,8 +288,10 @@ public:
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (int col=0; col<Nevec_acc; ++col) {
|
for (int col=0; col<Nevec_acc; ++col) {
|
||||||
auto evec_v = evec[b*Nevec_acc+col].View();
|
// auto evec_v = evec[b*Nevec_acc+col].View();
|
||||||
|
autoView( evec_v,evec[b*Nevec_acc+col], AcceleratorWrite);
|
||||||
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&evec_v[0]);
|
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&evec_v[0]);
|
||||||
|
// Glog << "col= "<<col <<" z= "<<z <<std::endl;
|
||||||
for (size_t row=0; row<sites*12; ++row) {
|
for (size_t row=0; row<sites*12; ++row) {
|
||||||
evec_acc[col*sites*12+row] = z[row];
|
evec_acc[col*sites*12+row] = z[row];
|
||||||
}
|
}
|
||||||
@ -328,7 +332,8 @@ public:
|
|||||||
}
|
}
|
||||||
#if 1
|
#if 1
|
||||||
for (int col=0; col<Nu; ++col) {
|
for (int col=0; col<Nu; ++col) {
|
||||||
auto w_v = w[col].View();
|
// auto w_v = w[col].View();
|
||||||
|
autoView( w_v,w[col], AcceleratorWrite);
|
||||||
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]);
|
cuDoubleComplex *z = reinterpret_cast<cuDoubleComplex*>(&w_v[0]);
|
||||||
for (size_t row=0; row<sites*12; ++row) {
|
for (size_t row=0; row<sites*12; ++row) {
|
||||||
z[row] = w_acc[col*sites*12+row];
|
z[row] = w_acc[col*sites*12+row];
|
||||||
@ -418,20 +423,23 @@ for( int i =0;i<total;i++){
|
|||||||
std::vector<Field>& evec,
|
std::vector<Field>& evec,
|
||||||
const std::vector<Field>& src, int& Nconv, LanczosType Impl)
|
const std::vector<Field>& src, int& Nconv, LanczosType Impl)
|
||||||
{
|
{
|
||||||
#ifdef GRID_NVCC
|
#ifdef GRID_CUDA
|
||||||
GridBase *grid = src[0].Grid();
|
GridBase *grid = src[0].Grid();
|
||||||
grid->show_decomposition();
|
grid->show_decomposition();
|
||||||
|
|
||||||
|
printf("GRID_CUDA\n");
|
||||||
|
|
||||||
// set eigenvector buffers for the cuBLAS calls
|
// set eigenvector buffers for the cuBLAS calls
|
||||||
//const uint64_t nsimd = grid->Nsimd();
|
//const uint64_t nsimd = grid->Nsimd();
|
||||||
const uint64_t sites = grid->lSites();
|
const uint64_t sites = grid->lSites();
|
||||||
|
|
||||||
cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(cuDoubleComplex));
|
cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(cuDoubleComplex));
|
||||||
//Glog << cudaStat << std::endl;
|
Glog << "w_acc= "<<w_acc << " "<< cudaStat << std::endl;
|
||||||
cudaStat = cudaMallocManaged((void **)&evec_acc, Nevec_acc*sites*12*sizeof(cuDoubleComplex));
|
cudaStat = cudaMallocManaged((void **)&evec_acc, Nevec_acc*sites*12*sizeof(cuDoubleComplex));
|
||||||
//Glog << cudaStat << std::endl;
|
Glog << "evec_acc= "<<evec_acc << " "<< cudaStat << std::endl;
|
||||||
cudaStat = cudaMallocManaged((void **)&c_acc, Nu*Nevec_acc*sizeof(cuDoubleComplex));
|
cudaStat = cudaMallocManaged((void **)&c_acc, Nu*Nevec_acc*sizeof(cuDoubleComplex));
|
||||||
//Glog << cudaStat << std::endl;
|
Glog << "c_acc= "<<c_acc << " "<< cudaStat << std::endl;
|
||||||
|
// exit(-42);
|
||||||
#endif
|
#endif
|
||||||
switch (Impl) {
|
switch (Impl) {
|
||||||
case LanczosType::irbl:
|
case LanczosType::irbl:
|
||||||
@ -442,7 +450,7 @@ for( int i =0;i<total;i++){
|
|||||||
calc_rbl(eval,evec,src,Nconv);
|
calc_rbl(eval,evec,src,Nconv);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#ifdef GRID_NVCC
|
#ifdef GRID_CUDA
|
||||||
// free eigenvector buffers for the cuBLAS calls
|
// free eigenvector buffers for the cuBLAS calls
|
||||||
cudaFree(w_acc);
|
cudaFree(w_acc);
|
||||||
cudaFree(evec_acc);
|
cudaFree(evec_acc);
|
||||||
@ -943,11 +951,8 @@ if(!split_test){
|
|||||||
Glog << "Gram Schmidt"<< std::endl;
|
Glog << "Gram Schmidt"<< std::endl;
|
||||||
// re-orthogonalization for numerical stability
|
// re-orthogonalization for numerical stability
|
||||||
#if 0
|
#if 0
|
||||||
for (int u=0; u<Nu; ++u) {
|
orthogonalize(w,Nu,evec,R);
|
||||||
orthogonalize(w[u],evec,R);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
//orthogonalize(w,Nu,evec,R);
|
|
||||||
orthogonalize_blas(w,evec,R);
|
orthogonalize_blas(w,evec,R);
|
||||||
#endif
|
#endif
|
||||||
// QR part
|
// QR part
|
||||||
|
Loading…
x
Reference in New Issue
Block a user