mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-11 22:50:45 +01:00
updating benchmarks for red black 4d for Ls vectorised code
This commit is contained in:
parent
adbc7c1188
commit
9db2c6525d
@ -45,9 +45,9 @@ struct scal {
|
|||||||
};
|
};
|
||||||
|
|
||||||
bool overlapComms = false;
|
bool overlapComms = false;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
|
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
|
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
@ -70,8 +70,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
std::cout << GridLogMessage << "Making s innermost rb grids"<<std::endl;
|
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
@ -86,6 +86,16 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion tmp(FGrid);
|
LatticeFermion tmp(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
LatticeFermion err(FGrid);
|
||||||
|
|
||||||
|
/* src=zero;
|
||||||
|
std::vector<int> origin(5,0);
|
||||||
|
SpinColourVector f=zero;
|
||||||
|
for(int sp=0;sp<4;sp++){
|
||||||
|
for(int co=0;co<3;co++){
|
||||||
|
f()(sp)(co)=Complex(1.0,0.0);
|
||||||
|
}}
|
||||||
|
pokeSite(f,src,origin);
|
||||||
|
*/
|
||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
ColourMatrix cm = Complex(1.0,0.0);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
@ -126,19 +136,16 @@ int main (int argc, char ** argv)
|
|||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
|
||||||
typename DomainWallFermionR::ImplParams params;
|
|
||||||
params.overlapCommsCompute = overlapComms;
|
|
||||||
|
|
||||||
RealD NP = UGrid->_Nprocessors;
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
|
||||||
for(int doasm=1;doasm<2;doasm++){
|
for(int doasm=1;doasm<2;doasm++){
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=doasm;
|
QCD::WilsonKernelsStatic::AsmOpt=doasm;
|
||||||
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
||||||
int ncall =10;
|
int ncall =100;
|
||||||
if (1) {
|
if (1) {
|
||||||
|
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
@ -164,11 +171,12 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
LatticeFermion ssrc(sFGrid);
|
LatticeFermion ssrc(sFGrid);
|
||||||
LatticeFermion sref(sFGrid);
|
LatticeFermion sref(sFGrid);
|
||||||
LatticeFermion sresult(sFGrid);
|
LatticeFermion sresult(sFGrid);
|
||||||
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params);
|
|
||||||
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
for(int x=0;x<latt4[0];x++){
|
for(int x=0;x<latt4[0];x++){
|
||||||
for(int y=0;y<latt4[1];y++){
|
for(int y=0;y<latt4[1];y++){
|
||||||
@ -180,7 +188,7 @@ int main (int argc, char ** argv)
|
|||||||
peekSite(tmp,src,site);
|
peekSite(tmp,src,site);
|
||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
|
std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
__SSC_START;
|
__SSC_START;
|
||||||
@ -207,6 +215,7 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout<<"res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
RealF sum=0;
|
RealF sum=0;
|
||||||
@ -220,9 +229,11 @@ int main (int argc, char ** argv)
|
|||||||
peekSite(normal,result,site);
|
peekSite(normal,result,site);
|
||||||
peekSite(simd,sresult,site);
|
peekSite(simd,sresult,site);
|
||||||
sum=sum+norm2(normal-simd);
|
sum=sum+norm2(normal-simd);
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
if (norm2(normal-simd) > 1.0e-6 ) {
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<normal<<std::endl;
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<simd<<std::endl;
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
|
||||||
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||||
|
}
|
||||||
}}}}}
|
}}}}}
|
||||||
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
|
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
|
||||||
|
|
||||||
@ -267,9 +278,9 @@ int main (int argc, char ** argv)
|
|||||||
pickCheckerboard(Even,ssrc_e,sresult);
|
pickCheckerboard(Even,ssrc_e,sresult);
|
||||||
pickCheckerboard(Odd ,ssrc_o,sresult);
|
pickCheckerboard(Odd ,ssrc_o,sresult);
|
||||||
ssrc_e = ssrc_e - sr_e;
|
ssrc_e = ssrc_e - sr_e;
|
||||||
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<<std::endl;
|
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||||
ssrc_o = ssrc_o - sr_o;
|
ssrc_o = ssrc_o - sr_o;
|
||||||
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<<std::endl;
|
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -125,7 +125,6 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
ColourMatrix cm = Complex(1.0,0.0);
|
||||||
|
|
||||||
|
|
||||||
LatticeGaugeField Umu5d(FGrid);
|
LatticeGaugeField Umu5d(FGrid);
|
||||||
|
|
||||||
// replicate across fifth dimension
|
// replicate across fifth dimension
|
||||||
@ -144,11 +143,10 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CHECK
|
#ifdef CHECK
|
||||||
if (1)
|
if (1) {
|
||||||
{
|
|
||||||
ref = zero;
|
ref = zero;
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||||
|
|
||||||
@ -192,20 +190,19 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
Counter.Report();
|
Counter.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! report )
|
if ( ! report ) {
|
||||||
{
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double flops=1344*volume*ncall;
|
||||||
double flops=1344*volume*ncall;
|
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
|
||||||
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CHECK
|
#ifdef CHECK
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
RealD errd = norm2(err);
|
RealD errd = norm2(err);
|
||||||
if ( errd> 1.0e-4 ) {
|
if ( errd> 1.0e-4 ) {
|
||||||
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
LatticeFermion src_e (FrbGrid);
|
LatticeFermion src_e (FrbGrid);
|
||||||
@ -231,10 +228,9 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
std::cout<< flops/(t1-t0);
|
std::cout<< flops/(t1-t0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef CHECK_SDW
|
#define CHECK_SDW
|
||||||
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -242,7 +238,9 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
|||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
@ -276,93 +274,89 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
LatticeFermion ssrc(sFGrid);
|
LatticeFermion ssrc(sFGrid);
|
||||||
LatticeFermion sref(sFGrid);
|
LatticeFermion sref(sFGrid);
|
||||||
LatticeFermion sresult(sFGrid);
|
LatticeFermion sresult(sFGrid);
|
||||||
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5);
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
for(int x=0;x<latt4[0];x++){
|
for(int x=0;x<latt4[0];x++){
|
||||||
for(int y=0;y<latt4[1];y++){
|
for(int y=0;y<latt4[1];y++){
|
||||||
for(int z=0;z<latt4[2];z++){
|
for(int z=0;z<latt4[2];z++){
|
||||||
for(int t=0;t<latt4[3];t++){
|
for(int t=0;t<latt4[3];t++){
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
std::vector<int> site({s,x,y,z,t});
|
std::vector<int> site({s,x,y,z,t});
|
||||||
SpinColourVector tmp;
|
SpinColourVector tmp;
|
||||||
peekSite(tmp,src,site);
|
peekSite(tmp,src,site);
|
||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
|
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
|
||||||
#ifdef TIMERS_OFF
|
#ifdef TIMERS_OFF
|
||||||
int ncall =10;
|
int ncall =10;
|
||||||
#else
|
#else
|
||||||
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PerformanceCounter Counter(8);
|
PerformanceCounter Counter(8);
|
||||||
Counter.Start();
|
Counter.Start();
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
}
|
}
|
||||||
t1=usecond();
|
t1=usecond();
|
||||||
Counter.Stop();
|
Counter.Stop();
|
||||||
|
|
||||||
|
if ( report ) {
|
||||||
|
Counter.Report();
|
||||||
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
|
}
|
||||||
|
|
||||||
if ( report ) {
|
LatticeFermion sr_eo(sFGrid);
|
||||||
Counter.Report();
|
LatticeFermion serr(sFGrid);
|
||||||
} else {
|
|
||||||
|
LatticeFermion ssrc_e (sFrbGrid);
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
LatticeFermion ssrc_o (sFrbGrid);
|
||||||
double flops=1344*volume*ncall;
|
LatticeFermion sr_e (sFrbGrid);
|
||||||
std::cout<<"\t"<< flops/(t1-t0);
|
LatticeFermion sr_o (sFrbGrid);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
LatticeFermion sr_eo(sFGrid);
|
|
||||||
LatticeFermion serr(sFGrid);
|
|
||||||
|
|
||||||
LatticeFermion ssrc_e (sFrbGrid);
|
|
||||||
LatticeFermion ssrc_o (sFrbGrid);
|
|
||||||
LatticeFermion sr_e (sFrbGrid);
|
|
||||||
LatticeFermion sr_o (sFrbGrid);
|
|
||||||
|
|
||||||
pickCheckerboard(Even,ssrc_e,ssrc);
|
pickCheckerboard(Even,ssrc_e,ssrc);
|
||||||
pickCheckerboard(Odd,ssrc_o,ssrc);
|
pickCheckerboard(Odd,ssrc_o,ssrc);
|
||||||
|
|
||||||
setCheckerboard(sr_eo,ssrc_o);
|
setCheckerboard(sr_eo,ssrc_o);
|
||||||
setCheckerboard(sr_eo,ssrc_e);
|
setCheckerboard(sr_eo,ssrc_e);
|
||||||
|
|
||||||
sr_e = zero;
|
|
||||||
sr_o = zero;
|
|
||||||
|
|
||||||
|
sr_e = zero;
|
||||||
|
sr_o = zero;
|
||||||
|
|
||||||
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
|
PerformanceCounter CounterSdw(8);
|
||||||
|
CounterSdw.Start();
|
||||||
|
t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
PerformanceCounter CounterSdw(8);
|
__SSC_STOP;
|
||||||
CounterSdw.Start();
|
}
|
||||||
t0=usecond();
|
t1=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
CounterSdw.Stop();
|
||||||
__SSC_START;
|
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
|
||||||
__SSC_STOP;
|
|
||||||
}
|
|
||||||
t1=usecond();
|
|
||||||
CounterSdw.Stop();
|
|
||||||
|
|
||||||
if ( report ) {
|
if ( report ) {
|
||||||
CounterSdw.Report();
|
CounterSdw.Report();
|
||||||
} else {
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
std::cout<<"\t"<< flops/(t1-t0);
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user