1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

updating benchmarks for red black 4d for Ls vectorised code

This commit is contained in:
paboyle 2016-07-14 23:44:02 +01:00
parent adbc7c1188
commit 9db2c6525d
2 changed files with 112 additions and 107 deletions

View File

@ -45,9 +45,9 @@ struct scal {
};
bool overlapComms = false;
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
int main (int argc, char ** argv)
@ -70,8 +70,8 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
std::cout << GridLogMessage << "Making s innermost rb grids"<<std::endl;
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
@ -86,6 +86,16 @@ int main (int argc, char ** argv)
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
/* src=zero;
std::vector<int> origin(5,0);
SpinColourVector f=zero;
for(int sp=0;sp<4;sp++){
for(int co=0;co<3;co++){
f()(sp)(co)=Complex(1.0,0.0);
}}
pokeSite(f,src,origin);
*/
ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu(UGrid);
@ -126,19 +136,16 @@ int main (int argc, char ** argv)
RealD mass=0.1;
RealD M5 =1.8;
typename DomainWallFermionR::ImplParams params;
params.overlapCommsCompute = overlapComms;
RealD NP = UGrid->_Nprocessors;
for(int doasm=1;doasm<2;doasm++){
QCD::WilsonKernelsStatic::AsmOpt=doasm;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall =10;
int ncall =100;
if (1) {
double t0=usecond();
@ -164,11 +171,12 @@ int main (int argc, char ** argv)
if (1)
{
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
@ -180,7 +188,7 @@ int main (int argc, char ** argv)
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
@ -207,6 +215,7 @@ int main (int argc, char ** argv)
}
}
std::cout<<"res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealF sum=0;
@ -220,9 +229,11 @@ int main (int argc, char ** argv)
peekSite(normal,result,site);
peekSite(simd,sresult,site);
sum=sum+norm2(normal-simd);
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<normal<<std::endl;
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<simd<<std::endl;
if (norm2(normal-simd) > 1.0e-6 ) {
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
}
}}}}}
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
@ -267,9 +278,9 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,sresult);
pickCheckerboard(Odd ,ssrc_o,sresult);
ssrc_e = ssrc_e - sr_e;
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<<std::endl;
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
ssrc_o = ssrc_o - sr_o;
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<<std::endl;
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
}

View File

@ -125,7 +125,6 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu5d(FGrid);
// replicate across fifth dimension
@ -144,11 +143,10 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
}
#ifdef CHECK
if (1)
{
if (1) {
ref = zero;
for(int mu=0;mu<Nd;mu++){
tmp = U[mu]*Cshift(src,mu+1,1);
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
@ -192,20 +190,19 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
Counter.Report();
}
if ( ! report )
{
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
}
if ( ! report ) {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
}
#ifdef CHECK
err = ref-result;
RealD errd = norm2(err);
if ( errd> 1.0e-4 ) {
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
exit(-1);
}
err = ref-result;
RealD errd = norm2(err);
if ( errd> 1.0e-4 ) {
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
exit(-1);
}
#endif
LatticeFermion src_e (FrbGrid);
@ -231,10 +228,9 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
std::cout<< flops/(t1-t0);
}
}
}
#undef CHECK_SDW
#define CHECK_SDW
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
{
@ -242,7 +238,9 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
@ -276,93 +274,89 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
}
}
RealD mass=0.1;
RealD M5 =1.8;
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5);
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp;
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp;
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
double t0=usecond();
sDw.Dhop(ssrc,sresult,0);
double t1=usecond();
double t0=usecond();
sDw.Dhop(ssrc,sresult,0);
double t1=usecond();
#ifdef TIMERS_OFF
int ncall =10;
int ncall =10;
#else
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
#endif
PerformanceCounter Counter(8);
Counter.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
sDw.Dhop(ssrc,sresult,0);
}
t1=usecond();
Counter.Stop();
PerformanceCounter Counter(8);
Counter.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
sDw.Dhop(ssrc,sresult,0);
}
t1=usecond();
Counter.Stop();
if ( report ) {
Counter.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<"\t"<< flops/(t1-t0);
}
if ( report ) {
Counter.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<"\t"<< flops/(t1-t0);
}
LatticeFermion sr_eo(sFGrid);
LatticeFermion serr(sFGrid);
LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
LatticeFermion sr_o (sFrbGrid);
LatticeFermion sr_eo(sFGrid);
LatticeFermion serr(sFGrid);
LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
LatticeFermion sr_o (sFrbGrid);
pickCheckerboard(Even,ssrc_e,ssrc);
pickCheckerboard(Odd,ssrc_o,ssrc);
setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
sr_e = zero;
sr_o = zero;
pickCheckerboard(Even,ssrc_e,ssrc);
pickCheckerboard(Odd,ssrc_o,ssrc);
setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
sr_e = zero;
sr_o = zero;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
PerformanceCounter CounterSdw(8);
CounterSdw.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
PerformanceCounter CounterSdw(8);
CounterSdw.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
__SSC_STOP;
}
t1=usecond();
CounterSdw.Stop();
__SSC_STOP;
}
t1=usecond();
CounterSdw.Stop();
if ( report ) {
CounterSdw.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
std::cout<<"\t"<< flops/(t1-t0);
}
if ( report ) {
CounterSdw.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
std::cout<<"\t"<< flops/(t1-t0);
}
}