mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-21 17:22:03 +01:00
Update for new stencil compression options
This commit is contained in:
@ -166,18 +166,18 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
header();
|
header();
|
||||||
|
|
||||||
for(int lat=8;lat<=maxlat;lat+=4){
|
for(int lat=8;lat<=maxlat;lat+=4){
|
||||||
for(int Ls=8;Ls<=8;Ls*=2){
|
for(int Ls=8;Ls<=8;Ls*=2){
|
||||||
|
|
||||||
Coordinate latt_size ({lat*mpi_layout[0],
|
Coordinate latt_size ({lat*mpi_layout[0],
|
||||||
lat*mpi_layout[1],
|
lat*mpi_layout[1],
|
||||||
lat*mpi_layout[2],
|
lat*mpi_layout[2],
|
||||||
lat*mpi_layout[3]});
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
RealD Nrank = Grid._Nprocessors;
|
RealD Nrank = Grid._Nprocessors;
|
||||||
@ -193,101 +193,6 @@ int main (int argc, char ** argv)
|
|||||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
int ncomm;
|
|
||||||
|
|
||||||
double dbytes;
|
|
||||||
for(int i=0;i<Nloop;i++){
|
|
||||||
double start=usecond();
|
|
||||||
|
|
||||||
dbytes=0;
|
|
||||||
ncomm=0;
|
|
||||||
|
|
||||||
std::vector<CommsRequest_t> requests;
|
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
|
|
||||||
|
|
||||||
if (mpi_layout[mu]>1 ) {
|
|
||||||
|
|
||||||
ncomm++;
|
|
||||||
int comm_proc=1;
|
|
||||||
int xmit_to_rank;
|
|
||||||
int recv_from_rank;
|
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
|
||||||
dbytes+=
|
|
||||||
Grid.StencilSendToRecvFromBegin(requests,
|
|
||||||
(void *)&xbuf[mu][0],
|
|
||||||
xmit_to_rank,1,
|
|
||||||
(void *)&rbuf[mu][0],
|
|
||||||
recv_from_rank,1,
|
|
||||||
bytes,bytes,mu);
|
|
||||||
|
|
||||||
comm_proc = mpi_layout[mu]-1;
|
|
||||||
|
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
|
||||||
dbytes+=
|
|
||||||
Grid.StencilSendToRecvFromBegin(requests,
|
|
||||||
(void *)&xbuf[mu+4][0],
|
|
||||||
xmit_to_rank,1,
|
|
||||||
(void *)&rbuf[mu+4][0],
|
|
||||||
recv_from_rank,1,
|
|
||||||
bytes,bytes,mu+4);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Grid.StencilSendToRecvFromComplete(requests,0);
|
|
||||||
Grid.Barrier();
|
|
||||||
double stop=usecond();
|
|
||||||
t_time[i] = stop-start; // microseconds
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
timestat.statistics(t_time);
|
|
||||||
|
|
||||||
dbytes=dbytes*ppn;
|
|
||||||
double xbytes = dbytes*0.5;
|
|
||||||
// double rbytes = dbytes*0.5;
|
|
||||||
double bidibytes = dbytes;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
|
||||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
|
||||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
|
||||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
|
||||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
|
||||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
|
||||||
header();
|
|
||||||
|
|
||||||
for(int lat=8;lat<=maxlat;lat+=4){
|
|
||||||
for(int Ls=8;Ls<=8;Ls*=2){
|
|
||||||
|
|
||||||
Coordinate latt_size ({lat*mpi_layout[0],
|
|
||||||
lat*mpi_layout[1],
|
|
||||||
lat*mpi_layout[2],
|
|
||||||
lat*mpi_layout[3]});
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
|
||||||
RealD Nrank = Grid._Nprocessors;
|
|
||||||
RealD Nnode = Grid.NodeCount();
|
|
||||||
RealD ppn = Nrank/Nnode;
|
|
||||||
|
|
||||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
|
||||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
|
||||||
Grid.ShmBufferFreeAll();
|
|
||||||
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
|
||||||
for(int d=0;d<8;d++){
|
|
||||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
|
||||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
int ncomm;
|
int ncomm;
|
||||||
double dbytes;
|
double dbytes;
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
@ -296,45 +201,34 @@ int main (int argc, char ** argv)
|
|||||||
std::vector<CommsRequest_t> requests;
|
std::vector<CommsRequest_t> requests;
|
||||||
dbytes=0;
|
dbytes=0;
|
||||||
ncomm=0;
|
ncomm=0;
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
|
for(int dir=0;dir<8;dir++) {
|
||||||
|
|
||||||
|
double tbytes;
|
||||||
|
int mu =dir % 4;
|
||||||
|
|
||||||
if (mpi_layout[mu]>1 ) {
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
ncomm++;
|
ncomm++;
|
||||||
int comm_proc=1;
|
|
||||||
int xmit_to_rank;
|
int xmit_to_rank;
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
|
if ( dir == mu ) {
|
||||||
|
int comm_proc=1;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
} else {
|
||||||
|
int comm_proc = mpi_layout[mu]-1;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
}
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
|
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1,
|
||||||
|
(void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid);
|
||||||
|
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
dbytes+=tbytes;
|
||||||
dbytes+=
|
|
||||||
Grid.StencilSendToRecvFromBegin(requests,
|
|
||||||
(void *)&xbuf[mu][0],
|
|
||||||
xmit_to_rank,1,
|
|
||||||
(void *)&rbuf[mu][0],
|
|
||||||
recv_from_rank,1,
|
|
||||||
bytes,bytes,mu);
|
|
||||||
Grid.StencilSendToRecvFromComplete(requests,mu);
|
|
||||||
requests.resize(0);
|
|
||||||
|
|
||||||
comm_proc = mpi_layout[mu]-1;
|
|
||||||
|
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
|
||||||
dbytes+=
|
|
||||||
Grid.StencilSendToRecvFromBegin(requests,
|
|
||||||
(void *)&xbuf[mu+4][0],
|
|
||||||
xmit_to_rank,1,
|
|
||||||
(void *)&rbuf[mu+4][0],
|
|
||||||
recv_from_rank,1,
|
|
||||||
bytes,bytes,mu+4);
|
|
||||||
Grid.StencilSendToRecvFromComplete(requests,mu+4);
|
|
||||||
requests.resize(0);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Grid.Barrier();
|
Grid.Barrier();
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
t_time[i] = stop-start; // microseconds
|
t_time[i] = stop-start; // microseconds
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timestat.statistics(t_time);
|
timestat.statistics(t_time);
|
||||||
|
@ -32,18 +32,18 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
template<class d>
|
////////////////////////
|
||||||
struct scal {
|
/// Move to domains ////
|
||||||
d internal;
|
////////////////////////
|
||||||
|
|
||||||
|
Gamma::Algebra Gmu [] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ,
|
||||||
|
Gamma::Algebra::GammaT
|
||||||
};
|
};
|
||||||
|
|
||||||
Gamma::Algebra Gmu [] = {
|
void Benchmark(int Ls, Coordinate Dirichlet,bool Sloppy);
|
||||||
Gamma::Algebra::GammaX,
|
|
||||||
Gamma::Algebra::GammaY,
|
|
||||||
Gamma::Algebra::GammaZ,
|
|
||||||
Gamma::Algebra::GammaT
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
@ -52,39 +52,108 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
|
|
||||||
Coordinate latt4 = GridDefaultLatt();
|
int Ls=16;
|
||||||
int Ls=8;
|
for(int i=0;i<argc;i++) {
|
||||||
for(int i=0;i<argc;i++)
|
|
||||||
if(std::string(argv[i]) == "-Ls"){
|
if(std::string(argv[i]) == "-Ls"){
|
||||||
std::stringstream ss(argv[i+1]); ss >> Ls;
|
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////
|
||||||
|
// With comms
|
||||||
|
//////////////////
|
||||||
|
Coordinate Dirichlet(Nd+1,0);
|
||||||
|
|
||||||
|
std::cout << "\n\n\n\n\n\n" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< " Testing with full communication " <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
|
||||||
|
Benchmark(Ls,Dirichlet,false);
|
||||||
|
|
||||||
|
std::cout << "\n\n\n\n\n\n" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< " Testing with sloppy communication " <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
|
||||||
|
Benchmark(Ls,Dirichlet,true);
|
||||||
|
|
||||||
|
//////////////////
|
||||||
|
// Domain decomposed
|
||||||
|
//////////////////
|
||||||
|
/*
|
||||||
|
Coordinate latt4 = GridDefaultLatt();
|
||||||
|
Coordinate mpi = GridDefaultMpi();
|
||||||
|
Coordinate CommDim(Nd);
|
||||||
|
Coordinate shm;
|
||||||
|
GlobalSharedMemory::GetShmDims(mpi,shm);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout << "\n\n\n\n\n\n" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
// std::cout << GridLogMessage<< " Testing without internode communication " <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
|
||||||
|
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
|
||||||
|
Dirichlet[0] = 0;
|
||||||
|
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
|
||||||
|
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
|
||||||
|
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
|
||||||
|
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
|
||||||
|
|
||||||
|
Benchmark(Ls,Dirichlet,false);
|
||||||
|
|
||||||
|
std::cout << "\n\n\n\n\n\n" <<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< " Testing with sloppy communication " <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
|
||||||
|
|
||||||
|
for(int d=0;d<Nd;d++) CommDim[d]= mpi[d]>1 ? 1 : 0;
|
||||||
|
|
||||||
|
Benchmark(Ls,Dirichlet,true);
|
||||||
|
*/
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
void Benchmark(int Ls, Coordinate Dirichlet,bool sloppy)
|
||||||
|
{
|
||||||
|
Coordinate latt4 = GridDefaultLatt();
|
||||||
GridLogLayout();
|
GridLogLayout();
|
||||||
|
|
||||||
long unsigned int single_site_flops = 8*Nc*(7+16*Nc);
|
long unsigned int single_site_flops = 8*Nc*(7+16*Nc);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
#undef SINGLE
|
||||||
|
#ifdef SINGLE
|
||||||
|
typedef vComplexF Simd;
|
||||||
|
typedef LatticeFermionF FermionField;
|
||||||
|
typedef LatticeGaugeFieldF GaugeField;
|
||||||
|
typedef LatticeColourMatrixF ColourMatrixField;
|
||||||
|
typedef DomainWallFermionF FermionAction;
|
||||||
|
#else
|
||||||
|
typedef vComplexD Simd;
|
||||||
|
typedef LatticeFermionD FermionField;
|
||||||
|
typedef LatticeGaugeFieldD GaugeField;
|
||||||
|
typedef LatticeColourMatrixD ColourMatrixField;
|
||||||
|
typedef DomainWallFermionD FermionAction;
|
||||||
|
#endif
|
||||||
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,Simd::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
|
||||||
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG"));
|
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG"));
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG"));
|
GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG"));
|
||||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
|
||||||
|
|
||||||
LatticeFermion src (FGrid); random(RNG5,src);
|
|
||||||
|
FermionField src (FGrid); random(RNG5,src);
|
||||||
#if 0
|
#if 0
|
||||||
src = Zero();
|
src = Zero();
|
||||||
{
|
{
|
||||||
@ -100,46 +169,39 @@ int main (int argc, char ** argv)
|
|||||||
src = src*N2;
|
src = src*N2;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
FermionField result(FGrid); result=Zero();
|
||||||
LatticeFermion result(FGrid); result=Zero();
|
FermionField ref(FGrid); ref=Zero();
|
||||||
LatticeFermion ref(FGrid); ref=Zero();
|
FermionField tmp(FGrid);
|
||||||
LatticeFermion tmp(FGrid);
|
FermionField err(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||||
LatticeGaugeField Umu(UGrid);
|
GaugeField Umu(UGrid);
|
||||||
|
GaugeField UmuCopy(UGrid);
|
||||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
// SU<Nc>::ColdConfiguration(Umu);
|
||||||
|
UmuCopy=Umu;
|
||||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||||
#if 0
|
|
||||||
Umu=1.0;
|
////////////////////////////////////
|
||||||
for(int mu=0;mu<Nd;mu++){
|
// Apply BCs
|
||||||
LatticeColourMatrix ttmp(UGrid);
|
////////////////////////////////////
|
||||||
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
|
Coordinate Block(4);
|
||||||
// if (mu !=2 ) ttmp = 0;
|
for(int d=0;d<4;d++) Block[d]= Dirichlet[d+1];
|
||||||
// ttmp = ttmp* pow(10.0,mu);
|
|
||||||
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
|
std::cout << GridLogMessage << "Applying BCs for Dirichlet Block5 " << Dirichlet << std::endl;
|
||||||
}
|
std::cout << GridLogMessage << "Applying BCs for Dirichlet Block4 " << Block << std::endl;
|
||||||
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
|
|
||||||
#endif
|
DirichletFilter<GaugeField> Filter(Block);
|
||||||
|
Filter.applyFilter(Umu);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Naive wilson implementation
|
// Naive wilson implementation
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// replicate across fifth dimension
|
std::vector<ColourMatrixField> U(4,UGrid);
|
||||||
LatticeGaugeField Umu5d(FGrid);
|
|
||||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
|
||||||
{
|
|
||||||
autoView( Umu5d_v, Umu5d, CpuWrite);
|
|
||||||
autoView( Umu_v , Umu , CpuRead);
|
|
||||||
for(int ss=0;ss<Umu.Grid()->oSites();ss++){
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
Umu5d_v[Ls*ss+s] = Umu_v[ss];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
@ -147,10 +209,28 @@ int main (int argc, char ** argv)
|
|||||||
ref = Zero();
|
ref = Zero();
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = Cshift(src,mu+1,1);
|
||||||
|
{
|
||||||
|
autoView( tmp_v , tmp , CpuWrite);
|
||||||
|
autoView( U_v , U[mu] , CpuRead);
|
||||||
|
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
tmp_v[Ls*ss+s] = U_v[ss]*tmp_v[Ls*ss+s];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
{
|
||||||
|
autoView( tmp_v , tmp , CpuWrite);
|
||||||
|
autoView( U_v , U[mu] , CpuRead);
|
||||||
|
autoView( src_v, src , CpuRead);
|
||||||
|
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
tmp =Cshift(tmp,mu+1,-1);
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
||||||
}
|
}
|
||||||
@ -167,11 +247,9 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionD::Dhop "<<std::endl;
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "<<std::endl;
|
||||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<Simd::Nsimd()<<std::endl;
|
||||||
std::cout << GridLogMessage<< "* VComplex size is "<<sizeof(vComplex)<< " B"<<std::endl;
|
std::cout << GridLogMessage<< "* VComplex size is "<<sizeof(Simd)<< " B"<<std::endl;
|
||||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
|
||||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||||
@ -181,8 +259,14 @@ int main (int argc, char ** argv)
|
|||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
DomainWallFermionD Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
FermionAction::ImplParams p;
|
||||||
int ncall =1000;
|
p.dirichlet=Dirichlet;
|
||||||
|
FermionAction Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,p);
|
||||||
|
Dw.SloppyComms(sloppy);
|
||||||
|
Dw.ImportGauge(Umu);
|
||||||
|
|
||||||
|
int ncall =300;
|
||||||
|
RealD n2e;
|
||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
@ -198,8 +282,8 @@ int main (int argc, char ** argv)
|
|||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=single_site_flops*volume*ncall;
|
double flops=single_site_flops*volume*ncall;
|
||||||
|
|
||||||
auto nsimd = vComplex::Nsimd();
|
auto nsimd = Simd::Nsimd();
|
||||||
auto simdwidth = sizeof(vComplex);
|
auto simdwidth = sizeof(Simd);
|
||||||
|
|
||||||
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
||||||
double data_rf = volume * ((2*Nd+1)*Nd*Nc + 2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
double data_rf = volume * ((2*Nd+1)*Nd*Nc + 2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
||||||
@ -208,28 +292,27 @@ int main (int argc, char ** argv)
|
|||||||
double data_mem = (volume * (2*Nd+1)*Nd*Nc + (volume/Ls) *2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
double data_mem = (volume * (2*Nd+1)*Nd*Nc + (volume/Ls) *2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
|
||||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||||
std::cout<<GridLogMessage << "RF GiB/s (base 2) = "<< 1000000. * data_rf/((t1-t0))<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mem GiB/s (base 2) = "<< 1000000. * data_mem/((t1-t0))<<std::endl;
|
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
n2e = norm2(err);
|
||||||
//exit(0);
|
std::cout<<GridLogMessage << "norm diff "<< n2e<< " Line "<<__LINE__ <<std::endl;
|
||||||
|
|
||||||
if(( norm2(err)>1.0e-4) ) {
|
if(( n2e>1.0e-4) ) {
|
||||||
/*
|
|
||||||
std::cout << "RESULT\n " << result<<std::endl;
|
|
||||||
std::cout << "REF \n " << ref <<std::endl;
|
|
||||||
std::cout << "ERR \n " << err <<std::endl;
|
|
||||||
*/
|
|
||||||
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
|
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
|
std::cout<<GridLogMessage << "RESULT" << std::endl;
|
||||||
|
// std::cout << result<<std::endl;
|
||||||
|
std::cout << norm2(result)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "REF" << std::endl;
|
||||||
|
std::cout << norm2(ref)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "ERR" << std::endl;
|
||||||
|
std::cout << norm2(err)<<std::endl;
|
||||||
|
FGrid->Barrier();
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
assert (norm2(err)< 1.0e-4 );
|
assert (n2e< 1.0e-4 );
|
||||||
}
|
}
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
@ -238,16 +321,30 @@ int main (int argc, char ** argv)
|
|||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
|
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = Cshift(src,mu+1,1);
|
||||||
{
|
{
|
||||||
autoView( ref_v, ref, CpuWrite);
|
autoView( ref_v, ref, CpuWrite);
|
||||||
autoView( tmp_v, tmp, CpuRead);
|
autoView( tmp_v, tmp, CpuRead);
|
||||||
for(int i=0;i<ref_v.size();i++){
|
autoView( U_v , U[mu] , CpuRead);
|
||||||
ref_v[i]+= tmp_v[i] + Gamma(Gmu[mu])*tmp_v[i]; ;
|
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
int i=s+Ls*ss;
|
||||||
|
ref_v[i]+= U_v[ss]*(tmp_v[i] + Gamma(Gmu[mu])*tmp_v[i]); ;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
{
|
||||||
|
autoView( tmp_v , tmp , CpuWrite);
|
||||||
|
autoView( U_v , U[mu] , CpuRead);
|
||||||
|
autoView( src_v, src , CpuRead);
|
||||||
|
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// tmp =adj(U[mu])*src;
|
||||||
tmp =Cshift(tmp,mu+1,-1);
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
{
|
{
|
||||||
autoView( ref_v, ref, CpuWrite);
|
autoView( ref_v, ref, CpuWrite);
|
||||||
@ -259,27 +356,27 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
}
|
}
|
||||||
// dump=1;
|
|
||||||
Dw.Dhop(src,result,1);
|
Dw.Dhop(src,result,DaggerYes);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl;
|
||||||
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
|
n2e= norm2(err);
|
||||||
if((norm2(err)>1.0e-4)){
|
std::cout<<GridLogMessage << "norm dag diff "<< n2e<< " Line "<<__LINE__ <<std::endl;
|
||||||
/*
|
|
||||||
std::cout<< "DAG RESULT\n " <<ref << std::endl;
|
|
||||||
std::cout<< "DAG sRESULT\n " <<result << std::endl;
|
|
||||||
std::cout<< "DAG ERR \n " << err <<std::endl;
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
LatticeFermion src_e (FrbGrid);
|
|
||||||
LatticeFermion src_o (FrbGrid);
|
|
||||||
LatticeFermion r_e (FrbGrid);
|
|
||||||
LatticeFermion r_o (FrbGrid);
|
|
||||||
LatticeFermion r_eo (FGrid);
|
|
||||||
|
|
||||||
|
assert((n2e)<1.0e-4);
|
||||||
|
|
||||||
|
FermionField src_e (FrbGrid);
|
||||||
|
FermionField src_o (FrbGrid);
|
||||||
|
FermionField r_e (FrbGrid);
|
||||||
|
FermionField r_o (FrbGrid);
|
||||||
|
FermionField r_eo (FGrid);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
|
||||||
pickCheckerboard(Even,src_e,src);
|
pickCheckerboard(Even,src_e,src);
|
||||||
@ -291,10 +388,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
// S-direction is INNERMOST and takes no part in the parity.
|
// S-direction is INNERMOST and takes no part in the parity.
|
||||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionD::DhopEO "<<std::endl;
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermion::DhopEO "<<std::endl;
|
||||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<Simd::Nsimd()<<std::endl;
|
||||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
|
||||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||||
@ -308,13 +403,7 @@ int main (int argc, char ** argv)
|
|||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
#ifdef CUDA_PROFILE
|
|
||||||
if(i==10) cudaProfilerStart();
|
|
||||||
#endif
|
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
#ifdef CUDA_PROFILE
|
|
||||||
if(i==20) cudaProfilerStop();
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
@ -338,14 +427,9 @@ int main (int argc, char ** argv)
|
|||||||
setCheckerboard(r_eo,r_e);
|
setCheckerboard(r_eo,r_e);
|
||||||
|
|
||||||
err = r_eo-result;
|
err = r_eo-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
n2e= norm2(err);
|
||||||
if((norm2(err)>1.0e-4)){
|
std::cout<<GridLogMessage << "norm diff "<< n2e<<std::endl;
|
||||||
/*
|
assert(n2e<1.0e-4);
|
||||||
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
|
|
||||||
std::cout<< "Deo REF\n " <<result << std::endl;
|
|
||||||
std::cout<< "Deo ERR \n " << err <<std::endl;
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
pickCheckerboard(Even,src_e,err);
|
pickCheckerboard(Even,src_e,err);
|
||||||
pickCheckerboard(Odd,src_o,err);
|
pickCheckerboard(Odd,src_o,err);
|
||||||
@ -354,6 +438,4 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
assert(norm2(src_e)<1.0e-4);
|
assert(norm2(src_e)<1.0e-4);
|
||||||
assert(norm2(src_o)<1.0e-4);
|
assert(norm2(src_o)<1.0e-4);
|
||||||
Grid_finalize();
|
|
||||||
exit(0);
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user