1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Merge pull request #210 from grid-test-organisation/feature/gpu-port-develop

Cayley fermion functions for GPUs
This commit is contained in:
Peter Boyle
2019-05-18 19:06:20 +01:00
committed by GitHub
25 changed files with 516 additions and 242 deletions

View File

@ -76,13 +76,20 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "<<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
GridParallelRNG RNG5(FGrid);
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
LatticeFermion src(FGrid); random(RNG5,src);
LatticeFermion result(FGrid);
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
double t0,t1;
typedef typename DomainWallFermionR::Coeff_t Coeff_t;
Vector<Coeff_t> diag = Dw.bs;
Vector<Coeff_t> upper= Dw.cs;
Vector<Coeff_t> lower= Dw.cs;
upper[Ls-1]=-Dw.mass*upper[Ls-1];
lower[0] =-Dw.mass*lower[0];
LatticeFermion r_eo(FGrid);
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
@ -99,13 +106,13 @@ int main (int argc, char ** argv)
r_o = Zero();
#define BENCH_DW(A,in,out) \
Dw.CayleyZeroCounters(); \
Dw. A (in,out); \
#define BENCH_DW(A,...) \
Dw. A (__VA_ARGS__); \
FGrid->Barrier(); \
Dw.CayleyZeroCounters(); \
t0=usecond(); \
for(int i=0;i<ncall;i++){ \
Dw. A (in,out); \
Dw. A (__VA_ARGS__); \
} \
t1=usecond(); \
FGrid->Barrier(); \
@ -114,9 +121,9 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "******************"<<std::endl;
#define BENCH_ZDW(A,in,out) \
zDw.CayleyZeroCounters(); \
zDw. A (in,out); \
FGrid->Barrier(); \
zDw.CayleyZeroCounters(); \
t0=usecond(); \
for(int i=0;i<ncall;i++){ \
zDw. A (in,out); \
@ -128,9 +135,9 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "******************"<<std::endl;
#define BENCH_DW_SSC(A,in,out) \
Dw.CayleyZeroCounters(); \
Dw. A (in,out); \
FGrid->Barrier(); \
Dw.CayleyZeroCounters(); \
t0=usecond(); \
for(int i=0;i<ncall;i++){ \
__SSC_START ; \
@ -143,23 +150,10 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
std::cout<<GridLogMessage << "******************"<<std::endl;
#define BENCH_DW_MEO(A,in,out) \
Dw.CayleyZeroCounters(); \
Dw. A (in,out,0); \
FGrid->Barrier(); \
t0=usecond(); \
for(int i=0;i<ncall;i++){ \
Dw. A (in,out,0); \
} \
t1=usecond(); \
FGrid->Barrier(); \
Dw.CayleyReport(); \
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
std::cout<<GridLogMessage << "******************"<<std::endl;
BENCH_DW_MEO(Dhop ,src,result);
BENCH_DW_MEO(DhopEO ,src_o,r_e);
BENCH_DW(Dhop ,src,result,0);
BENCH_DW(DhopEO ,src_o,r_e,0);
BENCH_DW(Meooe ,src_o,r_e);
BENCH_DW(M5D ,src_o,src_o,r_e,lower,diag,upper);
BENCH_DW(Mooee ,src_o,r_o);
BENCH_DW(MooeeInv,src_o,r_o);
@ -173,7 +167,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionVec5dR::Dhop "<<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
GridParallelRNG RNG5(sFGrid);
GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5);
LatticeFermion src(sFGrid); random(RNG5,src);
LatticeFermion sref(sFGrid);
LatticeFermion result(sFGrid);
@ -184,7 +178,7 @@ int main (int argc, char ** argv)
RealD b=1.5;// Scale factor b+c=2, b-c=1
RealD c=0.5;
std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
Vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
ZMobiusFermionVec5dR zDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
std::cout<<GridLogMessage << "Calling Dhop "<<std::endl;
@ -207,8 +201,8 @@ int main (int argc, char ** argv)
r_e = Zero();
r_o = Zero();
BENCH_DW_MEO(Dhop ,src,result);
BENCH_DW_MEO(DhopEO ,src_o,r_e);
BENCH_DW(Dhop ,src,result,0);
BENCH_DW(DhopEO ,src_o,r_e,0);
BENCH_DW_SSC(Meooe ,src_o,r_e);
BENCH_DW(Mooee ,src_o,r_o);
BENCH_DW(MooeeInv,src_o,r_o);