mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-03 02:35:55 +01:00
Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
c897878776
@ -35,24 +35,25 @@ using namespace Grid::QCD;
|
|||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
#define LMAX (40)
|
#define LMAX (16)
|
||||||
|
#define LMIN (16)
|
||||||
#define LINC (4)
|
#define LINC (4)
|
||||||
|
|
||||||
int64_t Nloop=20;
|
int64_t Nloop=2000;
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
int64_t threads = GridThread::GetThreads();
|
int64_t threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
#if 1
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=4;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -147,30 +148,31 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=2;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
|
|
||||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int64_t i=0;i<Nloop;i++){
|
for(int64_t i=0;i<Nloop;i++){
|
||||||
mac(z,x,y);
|
mac(z,x,y);
|
||||||
}
|
|
||||||
double stop=usecond();
|
|
||||||
double time = (stop-start)/Nloop*1000.0;
|
|
||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
|
||||||
double flops=Nc*Nc*(6+8+8)*vol;
|
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
double time = (stop-start)/Nloop*1000.0;
|
||||||
|
|
||||||
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
||||||
@ -178,7 +180,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -190,7 +192,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||||
|
|
||||||
for(int mu=0;mu<=4;mu++){
|
for(int mu=0;mu<4;mu++){
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int64_t i=0;i<Nloop;i++){
|
for(int64_t i=0;i<Nloop;i++){
|
||||||
z = PeriodicBC::CovShiftForward(x,mu,y);
|
z = PeriodicBC::CovShiftForward(x,mu,y);
|
||||||
@ -198,11 +200,12 @@ int main (int argc, char ** argv)
|
|||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000.0;
|
double time = (stop-start)/Nloop*1000.0;
|
||||||
|
|
||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(6+8+8)*vol;
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
|||||||
|
|
||||||
|
|
||||||
if ( !comm_dim ) {
|
if ( !comm_dim ) {
|
||||||
// std::cout << "Cshift_local" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||||
} else if ( splice_dim ) {
|
} else if ( splice_dim ) {
|
||||||
// std::cout << "Cshift_comms_simd" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||||
} else {
|
} else {
|
||||||
// std::cout << "Cshift_comms" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||||
Cshift_comms(ret,rhs,dimension,shift);
|
Cshift_comms(ret,rhs,dimension,shift);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
|
|||||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
|
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
|
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||||
} else {
|
} else {
|
||||||
|
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||||
}
|
}
|
||||||
@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int simd_layout = grid->_simd_layout[dimension];
|
int simd_layout = grid->_simd_layout[dimension];
|
||||||
int comm_dim = grid->_processors[dimension] >1 ;
|
int comm_dim = grid->_processors[dimension] >1 ;
|
||||||
|
|
||||||
|
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||||
|
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||||
|
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||||
|
|
||||||
assert(comm_dim==1);
|
assert(comm_dim==1);
|
||||||
assert(simd_layout==2);
|
assert(simd_layout==2);
|
||||||
assert(shift>=0);
|
assert(shift>=0);
|
||||||
|
@ -257,7 +257,11 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Lattice(Lattice&& r){ // move constructor
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata=std::move(r._odata);
|
||||||
|
}
|
||||||
|
|
||||||
virtual ~Lattice(void) = default;
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
@ -286,6 +290,24 @@ public:
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
strong_inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata.resize(_grid->oSites());// essential
|
||||||
|
|
||||||
|
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
strong_inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||||
|
{
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata =std::move(r._odata);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user