mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Minor cleanup
This commit is contained in:
parent
82c1ecf60f
commit
dc6a38f177
@ -7,9 +7,8 @@
|
|||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Chulwoo Jung
|
|
||||||
Author: Yong-Chull Jang <ypj@quark.phy.bnl.gov>
|
Author: Yong-Chull Jang <ypj@quark.phy.bnl.gov>
|
||||||
Author: Guido Cossu
|
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -185,10 +184,6 @@ public:
|
|||||||
{
|
{
|
||||||
RealD nn = norm2(v);
|
RealD nn = norm2(v);
|
||||||
nn = sqrt(nn);
|
nn = sqrt(nn);
|
||||||
#if 0
|
|
||||||
if(if_print && nn < 1e20)
|
|
||||||
Glog<<"normalize: "<<nn<<std::endl;
|
|
||||||
#endif
|
|
||||||
v = v * (1.0/nn);
|
v = v * (1.0/nn);
|
||||||
return nn;
|
return nn;
|
||||||
}
|
}
|
||||||
@ -227,19 +222,7 @@ public:
|
|||||||
for(int j=0; j<k; ++j){
|
for(int j=0; j<k; ++j){
|
||||||
for(int i=0; i<_Nu; ++i){
|
for(int i=0; i<_Nu; ++i){
|
||||||
ip = innerProduct(evec[j],w[i]);
|
ip = innerProduct(evec[j],w[i]);
|
||||||
#if 0
|
|
||||||
if(if_print)
|
|
||||||
if( norm(ip)/norm2(w[i]) > 1e-14)
|
|
||||||
Glog<<"orthogonalize before: "<<i<<" "<<j<<" of "<<k<<" "<< ip <<std::endl;
|
|
||||||
#endif
|
|
||||||
w[i] = w[i] - ip * evec[j];
|
w[i] = w[i] - ip * evec[j];
|
||||||
#if 0
|
|
||||||
if(if_print) {
|
|
||||||
ip = innerProduct(evec[j],w[i]);
|
|
||||||
if( norm(ip)/norm2(w[i]) > 1e-14)
|
|
||||||
Glog<<"orthogonalize after: "<<i<<" "<<j<<" of "<<k<<" "<< ip <<std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}}
|
}}
|
||||||
for(int i=0; i<_Nu; ++i)
|
for(int i=0; i<_Nu; ++i)
|
||||||
assert(normalize(w[i],if_print) !=0);
|
assert(normalize(w[i],if_print) !=0);
|
||||||
@ -265,14 +248,6 @@ public:
|
|||||||
// Glog << "nBatch, Nevec_acc, R, Nu = "
|
// Glog << "nBatch, Nevec_acc, R, Nu = "
|
||||||
// << Nbatch << "," << Nevec_acc << "," << R << "," << Nu << std::endl;
|
// << Nbatch << "," << Nevec_acc << "," << R << "," << Nu << std::endl;
|
||||||
|
|
||||||
#if 0 // a trivial test
|
|
||||||
for (int col=0; col<Nu; ++col) {
|
|
||||||
for (size_t row=0; row<sites*12; ++row) {
|
|
||||||
w_acc[col*sites*12+row].x = 1.0;
|
|
||||||
w_acc[col*sites*12+row].y = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (int col=0; col<Nu; ++col) {
|
for (int col=0; col<Nu; ++col) {
|
||||||
// auto w_v = w[col].View();
|
// auto w_v = w[col].View();
|
||||||
autoView( w_v,w[col], AcceleratorWrite);
|
autoView( w_v,w[col], AcceleratorWrite);
|
||||||
@ -282,7 +257,6 @@ public:
|
|||||||
w_acc[col*sites*12+row] = z[row];
|
w_acc[col*sites*12+row] = z[row];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
cublasHandle_t handle;
|
cublasHandle_t handle;
|
||||||
cublasStatus_t stat;
|
cublasStatus_t stat;
|
||||||
|
|
||||||
@ -291,14 +265,6 @@ public:
|
|||||||
Glog << "cuBLAS Zgemm"<< std::endl;
|
Glog << "cuBLAS Zgemm"<< std::endl;
|
||||||
|
|
||||||
for (int b=0; b<Nbatch; ++b) {
|
for (int b=0; b<Nbatch; ++b) {
|
||||||
#if 0 // a trivial test
|
|
||||||
for (int col=0; col<Nevec_acc; ++col) {
|
|
||||||
for (size_t row=0; row<sites*12; ++row) {
|
|
||||||
evec_acc[col*sites*12+row].x = 1.0;
|
|
||||||
evec_acc[col*sites*12+row].y = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (int col=0; col<Nevec_acc; ++col) {
|
for (int col=0; col<Nevec_acc; ++col) {
|
||||||
// auto evec_v = evec[b*Nevec_acc+col].View();
|
// auto evec_v = evec[b*Nevec_acc+col].View();
|
||||||
autoView( evec_v,evec[b*Nevec_acc+col], AcceleratorWrite);
|
autoView( evec_v,evec[b*Nevec_acc+col], AcceleratorWrite);
|
||||||
@ -308,7 +274,6 @@ public:
|
|||||||
evec_acc[col*sites*12+row] = z[row];
|
evec_acc[col*sites*12+row] = z[row];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
CUDA_COMPLEX alpha = MAKE_CUDA_COMPLEX(1.0,0.0);
|
CUDA_COMPLEX alpha = MAKE_CUDA_COMPLEX(1.0,0.0);
|
||||||
CUDA_COMPLEX beta = MAKE_CUDA_COMPLEX(0.0,0.0);
|
CUDA_COMPLEX beta = MAKE_CUDA_COMPLEX(0.0,0.0);
|
||||||
stat = CUDA_GEMM(handle, CUBLAS_OP_C, CUBLAS_OP_N, Nevec_acc, Nu, 12*sites,
|
stat = CUDA_GEMM(handle, CUBLAS_OP_C, CUBLAS_OP_N, Nevec_acc, Nu, 12*sites,
|
||||||
@ -319,19 +284,6 @@ public:
|
|||||||
//Glog << stat << std::endl;
|
//Glog << stat << std::endl;
|
||||||
|
|
||||||
grid->GlobalSumVector((CUDA_FLOAT*)c_acc,2*Nu*Nevec_acc);
|
grid->GlobalSumVector((CUDA_FLOAT*)c_acc,2*Nu*Nevec_acc);
|
||||||
#if 0
|
|
||||||
for (int i=0; i<Nu; ++i) {
|
|
||||||
for (size_t j=0; j<Nevec_acc; ++j) {
|
|
||||||
CUDA_COMPLEX z = c_acc[i*Nevec_acc+j];
|
|
||||||
MyComplex ip(z.x,z.y);
|
|
||||||
if (do_print) {
|
|
||||||
Glog << "<evec,w>[" << j << "," << i << "] = "
|
|
||||||
<< z.x << " + i " << z.y << std::endl;
|
|
||||||
}
|
|
||||||
w[i] = w[i] - ip * evec[b*Nevec_acc+j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
alpha = MAKE_CUDA_COMPLEX(-1.0,0.0);
|
alpha = MAKE_CUDA_COMPLEX(-1.0,0.0);
|
||||||
beta = MAKE_CUDA_COMPLEX(1.0,0.0);
|
beta = MAKE_CUDA_COMPLEX(1.0,0.0);
|
||||||
stat = CUDA_GEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, 12*sites, Nu, Nevec_acc,
|
stat = CUDA_GEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, 12*sites, Nu, Nevec_acc,
|
||||||
@ -340,9 +292,7 @@ public:
|
|||||||
&beta,
|
&beta,
|
||||||
w_acc, 12*sites);
|
w_acc, 12*sites);
|
||||||
//Glog << stat << std::endl;
|
//Glog << stat << std::endl;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#if 1
|
|
||||||
for (int col=0; col<Nu; ++col) {
|
for (int col=0; col<Nu; ++col) {
|
||||||
// auto w_v = w[col].View();
|
// auto w_v = w[col].View();
|
||||||
autoView( w_v,w[col], AcceleratorWrite);
|
autoView( w_v,w[col], AcceleratorWrite);
|
||||||
@ -351,7 +301,6 @@ public:
|
|||||||
z[row] = w_acc[col*sites*12+row];
|
z[row] = w_acc[col*sites*12+row];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
for (int i=0; i<Nu; ++i) {
|
for (int i=0; i<Nu; ++i) {
|
||||||
assert(normalize(w[i],do_print)!=0);
|
assert(normalize(w[i],do_print)!=0);
|
||||||
}
|
}
|
||||||
@ -368,56 +317,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
void innerProductD (std::vector<ComplexD> &inner, std::vector<Field>& lhs, int llhs, std::vector<Field>& rhs, int lrhs)
|
|
||||||
{
|
|
||||||
typedef typename Field:vector_object vobj;
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
typedef typename vobj::vector_typeD vector_type;
|
|
||||||
GridBase *grid = lhs[0]._grid;
|
|
||||||
assert(grid == rhs[0]._grid;
|
|
||||||
const int pad = 8;
|
|
||||||
int total = llhs*lrhs;
|
|
||||||
assert(inner.size()==total);
|
|
||||||
int sum_size=grid->SumArraySize();
|
|
||||||
|
|
||||||
// std::vector<ComplexD> inner(total);
|
|
||||||
Vector<ComplexD> sumarray(sum_size*pad*total);
|
|
||||||
|
|
||||||
parallel_for(int thr=0;thr<sum_size;thr++){
|
|
||||||
int nwork, mywork, myoff;
|
|
||||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
|
||||||
|
|
||||||
std::vector< decltype(innerProductD(lhs[0]._odata[0],rhs[0]._odata[0])) > vinner(total,zero); // private to thread; sub summation
|
|
||||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
|
||||||
for(int i=0; i<llhs; ++i){
|
|
||||||
for(int j=0; j<lrhs; ++j){
|
|
||||||
vinner[i*k+j] += innerProductD(lhs[i]._odata[ss],rhs[j]._odata[ss]);
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
// All threads sum across SIMD; reduce serial work at end
|
|
||||||
// one write per cacheline with streaming store
|
|
||||||
for(int i=0; i<total; ++i){
|
|
||||||
ComplexD tmp = Reduce(TensorRemove(vinner[i])) ;
|
|
||||||
vstream(sumarray[(i*sum_size+thr)*pad],tmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for( int i =0;i<total;i++){
|
|
||||||
inner[i]=0.0;
|
|
||||||
for(int j=0;j<sum_size;j++){
|
|
||||||
inner[i] += sumarray[(i*sum_size+j)*pad];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for( int i =0;i<total;i++){
|
|
||||||
ComplexD tmp=inner[i];
|
|
||||||
evec[0]._grid->GlobalSum(tmp);
|
|
||||||
inner[i]=tmp;
|
|
||||||
}
|
|
||||||
// return inner;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
void orthogonalize_blockhead(Field& w, std::vector<Field>& evec, int k, int Nu)
|
void orthogonalize_blockhead(Field& w, std::vector<Field>& evec, int k, int Nu)
|
||||||
{
|
{
|
||||||
@ -839,14 +738,6 @@ cudaStat = cudaMallocManaged((void **)&evec_acc, Nevec_acc*sites*12*sizeof(CUDA_
|
|||||||
Glog << fname + " CONVERGED ; Summary :\n";
|
Glog << fname + " CONVERGED ; Summary :\n";
|
||||||
// Sort convered eigenpairs.
|
// Sort convered eigenpairs.
|
||||||
std::vector<Field> Btmp(Nstop,grid); // waste of space replicating
|
std::vector<Field> Btmp(Nstop,grid); // waste of space replicating
|
||||||
#if 0
|
|
||||||
for(int i=0; i<Nconv; ++i) Btmp[i]=0;
|
|
||||||
for(int i=0; i<Nconv; ++i)
|
|
||||||
for(int k = 0; k<Nr; ++k){
|
|
||||||
Btmp[i].Checkerboard() = evec[k].Checkerboard();
|
|
||||||
Btmp[i] += evec[k]*Qt(k,i);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for(int i=0; i<Nstop; ++i){
|
for(int i=0; i<Nstop; ++i){
|
||||||
Btmp[i]=0.;
|
Btmp[i]=0.;
|
||||||
@ -1021,25 +912,6 @@ if(split_test){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Glog << "LinAlg done "<< std::endl;
|
Glog << "LinAlg done "<< std::endl;
|
||||||
#if 0
|
|
||||||
Glog << "Gram Schmidt "<< std::endl;
|
|
||||||
// re-orthogonalization for numerical stability
|
|
||||||
if (b>0) {
|
|
||||||
for (int u=0; u<Nu; ++u) {
|
|
||||||
orthogonalize(w[u],evec,R);
|
|
||||||
}
|
|
||||||
for (int u=1; u<Nu; ++u) {
|
|
||||||
orthogonalize(w[u],w,u);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//if (b>0) {
|
|
||||||
// orthogonalize_blockhead(w[0],evec,b,Nu);
|
|
||||||
// for (int u=1; u<Nu; ++u) {
|
|
||||||
// orthogonalize(w[u],w,u);
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
Glog << "Gram Schmidt done "<< std::endl;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (b < Nm/Nu-1) {
|
if (b < Nm/Nu-1) {
|
||||||
for (int u=0; u<Nu; ++u) {
|
for (int u=0; u<Nu; ++u) {
|
||||||
@ -1118,7 +990,6 @@ if(split_test){
|
|||||||
}
|
}
|
||||||
//std::cout << BlockTriDiag << std::endl;
|
//std::cout << BlockTriDiag << std::endl;
|
||||||
//#ifdef USE_LAPACK
|
//#ifdef USE_LAPACK
|
||||||
#if 1
|
|
||||||
const int size = Nm;
|
const int size = Nm;
|
||||||
MKL_INT NN = Nk;
|
MKL_INT NN = Nk;
|
||||||
// double evals_tmp[NN];
|
// double evals_tmp[NN];
|
||||||
@ -1189,20 +1060,6 @@ if(split_test){
|
|||||||
grid->GlobalSumVector((double*)evec_tmp,2*NN*NN);
|
grid->GlobalSumVector((double*)evec_tmp,2*NN*NN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Safer to sort instead of just reversing it,
|
|
||||||
// but the document of the routine says evals are sorted in increasing order.
|
|
||||||
// qr gives evals in decreasing order.
|
|
||||||
// for(int i=0;i<NN;i++){
|
|
||||||
// lmd [NN-1-i]=evals_tmp[i];
|
|
||||||
// for(int j=0;j<NN;j++){
|
|
||||||
// Qt((NN-1-i),j)=evec_tmp[i][j];
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// MKL_Complex16 *eval_tmp = malloc(NN*sizeof(MKL_Complex16));
|
|
||||||
// MKL_Complex16 *evec_tmp = malloc(NN*NN*sizeof(MKL_Complex16));
|
|
||||||
// MKL_Complex16 *DD = malloc(NN*NN*sizeof(MKL_Complex16));
|
|
||||||
#endif
|
|
||||||
for (int i = 0; i < Nk; i++)
|
for (int i = 0; i < Nk; i++)
|
||||||
eval[Nk-1-i] = evals_tmp[i];
|
eval[Nk-1-i] = evals_tmp[i];
|
||||||
for (int i = 0; i < Nk; i++) {
|
for (int i = 0; i < Nk; i++) {
|
||||||
|
@ -4,9 +4,11 @@
|
|||||||
|
|
||||||
Source file: ./tests/Test_dwf_block_lanczos.cc
|
Source file: ./tests/Test_dwf_block_lanczos.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2022
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Yong-Chull Jang <ypj@quark.phy.bnl.gov>
|
||||||
|
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
Loading…
x
Reference in New Issue
Block a user