mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Merge branch 'develop' into release/v0.6.0
This commit is contained in:
commit
604f0ea2f6
@ -193,6 +193,7 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Nloop=100;
|
Nloop=100;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
@ -271,5 +272,90 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Nloop=100;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
Grid.ShmBufferFreeAll();
|
||||||
|
for(int d=0;d<8;d++){
|
||||||
|
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
}
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||||
|
|
||||||
|
ncomm=0;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
|
ncomm++;
|
||||||
|
int comm_proc=1;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
// Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
// requests.resize(0);
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
requests.resize(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
|
||||||
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
|
||||||
|
double dbytes = bytes;
|
||||||
|
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
double time = stop-start; // microseconds
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -57,7 +57,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
std::vector<int> latt4 = GridDefaultLatt();
|
std::vector<int> latt4 = GridDefaultLatt();
|
||||||
const int Ls=16;
|
const int Ls=8;
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
@ -138,7 +138,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int ncall =100;
|
int ncall =100;
|
||||||
if (1) {
|
if (1) {
|
||||||
|
FGrid->Barrier();
|
||||||
Dw.ZeroCounters();
|
Dw.ZeroCounters();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
@ -147,6 +147,7 @@ int main (int argc, char ** argv)
|
|||||||
__SSC_STOP;
|
__SSC_STOP;
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
@ -158,7 +159,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
assert (norm2(err)< 1.0e-5 );
|
assert (norm2(err)< 1.0e-4 );
|
||||||
Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,6 +194,7 @@ int main (int argc, char ** argv)
|
|||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
||||||
|
FGrid->Barrier();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
sDw.ZeroCounters();
|
sDw.ZeroCounters();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
@ -201,6 +203,7 @@ int main (int argc, char ** argv)
|
|||||||
__SSC_STOP;
|
__SSC_STOP;
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
@ -211,12 +214,12 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
if(0){
|
if(0){
|
||||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
PerformanceCounter Counter(i);
|
PerformanceCounter Counter(i);
|
||||||
Counter.Start();
|
Counter.Start();
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
Counter.Stop();
|
Counter.Stop();
|
||||||
Counter.Report();
|
Counter.Report();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,7 +243,7 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}}}}}
|
}}}}}
|
||||||
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||||
assert (sum< 1.0e-5 );
|
assert (sum< 1.0e-4 );
|
||||||
|
|
||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
@ -271,6 +274,7 @@ int main (int argc, char ** argv)
|
|||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
|
||||||
|
FGrid->Barrier();
|
||||||
sDw.ZeroCounters();
|
sDw.ZeroCounters();
|
||||||
sDw.stat.init("DhopEO");
|
sDw.stat.init("DhopEO");
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
@ -278,6 +282,7 @@ int main (int argc, char ** argv)
|
|||||||
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
|
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
sDw.stat.print();
|
sDw.stat.print();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
@ -301,7 +306,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
error+= norm2(ssrc_o);
|
error+= norm2(ssrc_o);
|
||||||
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||||
if(error>1.0e-5) {
|
if(error>1.0e-4) {
|
||||||
setCheckerboard(ssrc,ssrc_o);
|
setCheckerboard(ssrc,ssrc_o);
|
||||||
setCheckerboard(ssrc,ssrc_e);
|
setCheckerboard(ssrc,ssrc_e);
|
||||||
std::cout<< ssrc << std::endl;
|
std::cout<< ssrc << std::endl;
|
||||||
@ -337,7 +342,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
assert(norm2(err)<1.0e-5);
|
assert(norm2(err)<1.0e-4);
|
||||||
LatticeFermion src_e (FrbGrid);
|
LatticeFermion src_e (FrbGrid);
|
||||||
LatticeFermion src_o (FrbGrid);
|
LatticeFermion src_o (FrbGrid);
|
||||||
LatticeFermion r_e (FrbGrid);
|
LatticeFermion r_e (FrbGrid);
|
||||||
@ -363,11 +368,13 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
{
|
{
|
||||||
Dw.ZeroCounters();
|
Dw.ZeroCounters();
|
||||||
|
FGrid->Barrier();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
@ -389,14 +396,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
err = r_eo-result;
|
err = r_eo-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
assert(norm2(err)<1.0e-5);
|
assert(norm2(err)<1.0e-4);
|
||||||
|
|
||||||
pickCheckerboard(Even,src_e,err);
|
pickCheckerboard(Even,src_e,err);
|
||||||
pickCheckerboard(Odd,src_o,err);
|
pickCheckerboard(Odd,src_o,err);
|
||||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||||
assert(norm2(src_e)<1.0e-5);
|
assert(norm2(src_e)<1.0e-4);
|
||||||
assert(norm2(src_o)<1.0e-5);
|
assert(norm2(src_o)<1.0e-4);
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -69,8 +69,8 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
|
||||||
int Lmax=32;
|
int Lmax=16;
|
||||||
int dmin=0;
|
int dmin=2;
|
||||||
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
||||||
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
||||||
for (int L=8;L<=Lmax;L*=2){
|
for (int L=8;L<=Lmax;L*=2){
|
||||||
|
@ -369,7 +369,7 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
|
|
||||||
void Grid_finalize(void)
|
void Grid_finalize(void)
|
||||||
{
|
{
|
||||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
Grid_unquiesce_nodes();
|
Grid_unquiesce_nodes();
|
||||||
#endif
|
#endif
|
||||||
|
@ -93,7 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
void Grid_quiesce_nodes(void) {
|
void Grid_quiesce_nodes(void) {
|
||||||
int me = 0;
|
int me = 0;
|
||||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3)
|
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L)
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
@ -154,7 +154,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
|||||||
<< LinalgTimer.Elapsed();
|
<< LinalgTimer.Elapsed();
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
||||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -116,7 +116,7 @@ class NerscHmcRunnerTemplate {
|
|||||||
NoSmearing<Gimpl> SmearingPolicy;
|
NoSmearing<Gimpl> SmearingPolicy;
|
||||||
typedef MinimumNorm2<GaugeField, NoSmearing<Gimpl>, RepresentationsPolicy >
|
typedef MinimumNorm2<GaugeField, NoSmearing<Gimpl>, RepresentationsPolicy >
|
||||||
IntegratorType; // change here to change the algorithm
|
IntegratorType; // change here to change the algorithm
|
||||||
IntegratorParameters MDpar(20, 1.0);
|
IntegratorParameters MDpar(40, 1.0);
|
||||||
IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy);
|
IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy);
|
||||||
|
|
||||||
// Checkpoint strategy
|
// Checkpoint strategy
|
||||||
|
@ -68,7 +68,7 @@ class HmcRunner : public NerscHmcRunner {
|
|||||||
TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
|
TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
|
||||||
|
|
||||||
// Set smearing (true/false), default: false
|
// Set smearing (true/false), default: false
|
||||||
Nf2.is_smeared = true;
|
Nf2.is_smeared = false;
|
||||||
|
|
||||||
// Collect actions
|
// Collect actions
|
||||||
ActionLevel<LatticeGaugeField> Level1(1);
|
ActionLevel<LatticeGaugeField> Level1(1);
|
||||||
|
Loading…
Reference in New Issue
Block a user