1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 23:37:06 +01:00

Merge branch 'develop' into feature/hirep

This commit is contained in:
Guido Cossu
2016-08-30 18:21:36 +01:00
503 changed files with 84592 additions and 1780 deletions

View File

@ -25,7 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
@ -196,5 +196,126 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=32;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
std::vector<CartesianCommunicator::CommsRequest_t> empty;
std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_fwd(Nd,empty);
std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_bwd(Nd,empty);
for(int mu=0;mu<4;mu++){
ncomm=0;
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc;
int xmit_to_rank;
int recv_from_rank;
comm_proc=1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromInit(requests_fwd[mu],
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromInit(requests_bwd[mu],
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
{
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
Grid.SendToRecvFromBegin(requests_fwd[mu]);
Grid.SendToRecvFromComplete(requests_fwd[mu]);
Grid.SendToRecvFromBegin(requests_bwd[mu]);
Grid.SendToRecvFromComplete(requests_bwd[mu]);
}
}
Grid.Barrier();
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
{
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
Grid.SendToRecvFromBegin(requests_fwd[mu]);
Grid.SendToRecvFromBegin(requests_bwd[mu]);
Grid.SendToRecvFromComplete(requests_fwd[mu]);
Grid.SendToRecvFromComplete(requests_bwd[mu]);
}
}
Grid.Barrier();
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
}
Grid_finalize();
}

View File

@ -26,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <PerfCount.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
@ -46,9 +45,9 @@ struct scal {
};
bool overlapComms = false;
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
int main (int argc, char ** argv)
@ -71,8 +70,8 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
std::cout << GridLogMessage << "Making s innermost rb grids"<<std::endl;
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
@ -87,6 +86,16 @@ int main (int argc, char ** argv)
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
/* src=zero;
std::vector<int> origin(5,0);
SpinColourVector f=zero;
for(int sp=0;sp<4;sp++){
for(int co=0;co<3;co++){
f()(sp)(co)=Complex(1.0,0.0);
}}
pokeSite(f,src,origin);
*/
ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu(UGrid);
@ -127,19 +136,16 @@ int main (int argc, char ** argv)
RealD mass=0.1;
RealD M5 =1.8;
typename DomainWallFermionR::ImplParams params;
params.overlapCommsCompute = overlapComms;
RealD NP = UGrid->_Nprocessors;
for(int doasm=1;doasm<2;doasm++){
QCD::WilsonKernelsStatic::AsmOpt=doasm;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall =10;
int ncall =100;
if (1) {
double t0=usecond();
@ -165,11 +171,12 @@ int main (int argc, char ** argv)
if (1)
{
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
@ -181,7 +188,7 @@ int main (int argc, char ** argv)
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
@ -208,6 +215,7 @@ int main (int argc, char ** argv)
}
}
std::cout<<"res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealF sum=0;
@ -221,9 +229,11 @@ int main (int argc, char ** argv)
peekSite(normal,result,site);
peekSite(simd,sresult,site);
sum=sum+norm2(normal-simd);
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<normal<<std::endl;
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<simd<<std::endl;
if (norm2(normal-simd) > 1.0e-6 ) {
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
}
}}}}}
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
@ -268,9 +278,9 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,sresult);
pickCheckerboard(Odd ,ssrc_o,sresult);
ssrc_e = ssrc_e - sr_e;
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<<std::endl;
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
ssrc_o = ssrc_o - sr_o;
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<<std::endl;
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
}

View File

@ -26,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <PerfCount.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;

View File

@ -26,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <PerfCount.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
@ -126,7 +125,6 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu5d(FGrid);
// replicate across fifth dimension
@ -145,11 +143,10 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
}
#ifdef CHECK
if (1)
{
if (1) {
ref = zero;
for(int mu=0;mu<Nd;mu++){
tmp = U[mu]*Cshift(src,mu+1,1);
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
@ -193,20 +190,19 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
Counter.Report();
}
if ( ! report )
{
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
}
if ( ! report ) {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
}
#ifdef CHECK
err = ref-result;
RealD errd = norm2(err);
if ( errd> 1.0e-4 ) {
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
exit(-1);
}
err = ref-result;
RealD errd = norm2(err);
if ( errd> 1.0e-4 ) {
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
exit(-1);
}
#endif
LatticeFermion src_e (FrbGrid);
@ -232,10 +228,9 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
std::cout<< flops/(t1-t0);
}
}
}
#undef CHECK_SDW
#define CHECK_SDW
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
{
@ -243,7 +238,9 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
@ -277,93 +274,89 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
}
}
RealD mass=0.1;
RealD M5 =1.8;
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5);
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp;
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp;
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
double t0=usecond();
sDw.Dhop(ssrc,sresult,0);
double t1=usecond();
double t0=usecond();
sDw.Dhop(ssrc,sresult,0);
double t1=usecond();
#ifdef TIMERS_OFF
int ncall =10;
int ncall =10;
#else
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
#endif
PerformanceCounter Counter(8);
Counter.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
sDw.Dhop(ssrc,sresult,0);
}
t1=usecond();
Counter.Stop();
PerformanceCounter Counter(8);
Counter.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
sDw.Dhop(ssrc,sresult,0);
}
t1=usecond();
Counter.Stop();
if ( report ) {
Counter.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<"\t"<< flops/(t1-t0);
}
if ( report ) {
Counter.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<"\t"<< flops/(t1-t0);
}
LatticeFermion sr_eo(sFGrid);
LatticeFermion serr(sFGrid);
LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
LatticeFermion sr_o (sFrbGrid);
LatticeFermion sr_eo(sFGrid);
LatticeFermion serr(sFGrid);
LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
LatticeFermion sr_o (sFrbGrid);
pickCheckerboard(Even,ssrc_e,ssrc);
pickCheckerboard(Odd,ssrc_o,ssrc);
setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
sr_e = zero;
sr_o = zero;
pickCheckerboard(Even,ssrc_e,ssrc);
pickCheckerboard(Odd,ssrc_o,ssrc);
setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
sr_e = zero;
sr_o = zero;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
PerformanceCounter CounterSdw(8);
CounterSdw.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
PerformanceCounter CounterSdw(8);
CounterSdw.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
__SSC_STOP;
}
t1=usecond();
CounterSdw.Stop();
__SSC_STOP;
}
t1=usecond();
CounterSdw.Stop();
if ( report ) {
CounterSdw.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
std::cout<<"\t"<< flops/(t1-t0);
}
if ( report ) {
CounterSdw.Report();
} else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
std::cout<<"\t"<< flops/(t1-t0);
}
}

View File

@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;

View File

@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;

View File

@ -26,7 +26,7 @@ Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;

View File

@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;

View File

@ -25,8 +25,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <PerfCount.h>
#include <Grid/Grid.h>
using namespace Grid;

View File

@ -1,5 +1,5 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS = -I$(top_srcdir)/lib
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
#