1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
paboyle 2018-04-26 23:00:46 +01:00
commit 3e0eff6468
9 changed files with 249 additions and 148 deletions

View File

@ -55,7 +55,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=96; uint64_t lmax=64;
#define NLOOP (10*lmax*lmax*lmax*lmax/vol) #define NLOOP (10*lmax*lmax*lmax*lmax/vol)
for(int lat=8;lat<=lmax;lat+=8){ for(int lat=8;lat<=lmax;lat+=8){

View File

@ -35,7 +35,7 @@ using namespace Grid::QCD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
#define LMAX (16) #define LMAX (32)
#define LMIN (16) #define LMIN (16)
#define LINC (4) #define LINC (4)
@ -46,14 +46,14 @@ int main (int argc, char ** argv)
int64_t threads = GridThread::GetThreads(); int64_t threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
#if 1
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=4;lat<=LMAX;lat+=LINC){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -85,7 +85,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=LINC){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -116,7 +116,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=LINC){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -147,7 +147,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=LINC){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -171,8 +171,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl; std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
} }
#endif
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
@ -193,19 +191,63 @@ int main (int argc, char ** argv)
LatticeColourMatrix y(&Grid); random(pRNG,y); LatticeColourMatrix y(&Grid); random(pRNG,y);
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
double start=usecond();
for(int64_t i=0;i<Nloop;i++){
z = PeriodicBC::CovShiftForward(x,mu,y);
}
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
}
#if 1
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
LatticeColourMatrix tmp(&Grid);
for(int mu=0;mu<4;mu++){
double tshift=0;
double tmult =0;
double start=usecond(); double start=usecond();
for(int64_t i=0;i<Nloop;i++){ for(int64_t i=0;i<Nloop;i++){
z = PeriodicBC::CovShiftForward(x,mu,y); tshift-=usecond();
tmp = Cshift(y,mu,-1);
tshift+=usecond();
tmult-=usecond();
z = x*tmp;
tmult+=usecond();
} }
double stop=usecond(); double stop=usecond();
double time = (stop-start)/Nloop*1000.0; double time = (stop-start)/Nloop;
tshift = tshift/Nloop;
tmult = tmult /Nloop;
double bytes=3*vol*Nc*Nc*sizeof(Complex); double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol; double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
time = time * 1000; // convert to NS for GB/s
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl; std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
} }
} }
#endif
Grid_finalize(); Grid_finalize();
} }

View File

@ -277,7 +277,9 @@ public:
uint8_t *cp = (uint8_t *)ptr; uint8_t *cp = (uint8_t *)ptr;
if ( ptr ) { if ( ptr ) {
// One touch per 4k page, static OMP loop to catch same loop order // One touch per 4k page, static OMP loop to catch same loop order
#ifdef GRID_OMP
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
#endif
for(size_type n=0;n<bytes;n+=4096){ for(size_type n=0;n<bytes;n+=4096){
cp[n]=0; cp[n]=0;
} }

View File

@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension]; int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int ent = 0;
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int stride=rhs._grid->_slice_stride[dimension]; int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) { if ( cbmask == 0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o = n*stride; int o = n*stride;
int bo = n*e2; int bo = n*e2;
buffer[off+bo+b]=rhs._odata[so+o+b]; table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
} }
} }
} else { } else {
int bo=0; int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o = n*stride; int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b); int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb &cbmask ) { if ( ocb &cbmask ) {
table.push_back(std::pair<int,int> (bo++,o+b)); table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
} }
} }
} }
parallel_for(int i=0;i<table.size();i++){ }
buffer[off+table[i].first]=rhs._odata[so+table[i].second]; parallel_for(int i=0;i<ent;i++){
} buffer[table[i].first]=rhs._odata[table[i].second];
} }
} }
@ -140,31 +142,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
int e1=rhs._grid->_slice_nblock[dimension]; int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension]; int stride=rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent =0;
if ( cbmask ==0x3 ) { if ( cbmask ==0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension]; int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension]; int bo =n*rhs._grid->_slice_block[dimension];
rhs._odata[so+o+b]=buffer[bo+b]; table[ent++] = std::pair<int,int>(so+o+b,bo);
} }
} }
} else { } else {
std::vector<std::pair<int,int> > table;
int bo=0; int bo=0;
for(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension]; int o =n*rhs._grid->_slice_stride[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) { if ( ocb & cbmask ) {
table.push_back(std::pair<int,int> (so+o+b,bo++)); table[ent++]=std::pair<int,int> (so+o+b,bo++);
} }
} }
} }
parallel_for(int i=0;i<table.size();i++){ }
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
rhs._odata[table[i].first]=buffer[table[i].second]; parallel_for(int i=0;i<ent;i++){
} rhs._odata[table[i].first]=buffer[table[i].second];
} }
} }
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int stride = rhs._grid->_slice_stride[dimension]; int stride = rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent=0;
if(cbmask == 0x3 ){ if(cbmask == 0x3 ){
parallel_for_nest2(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o =n*stride+b; int o =n*stride+b;
//lhs._odata[lo+o]=rhs._odata[ro+o]; table[ent++] = std::pair<int,int>(lo+o,ro+o);
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
} }
} }
} else { } else {
parallel_for_nest2(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o =n*stride+b; int o =n*stride+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o); int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) { if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o]; table[ent++] = std::pair<int,int>(lo+o,ro+o);
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
} }
} }
} }
} }
parallel_for(int i=0;i<ent;i++){
lhs._odata[table[i].first]=rhs._odata[table[i].second];
}
} }
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
int e2=rhs._grid->_slice_block [dimension]; int e2=rhs._grid->_slice_block [dimension];
int stride = rhs._grid->_slice_stride[dimension]; int stride = rhs._grid->_slice_stride[dimension];
parallel_for_nest2(int n=0;n<e1;n++){ static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
for(int b=0;b<e2;b++){ int ent=0;
double t_tab,t_perm;
if ( cbmask == 0x3 ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride; int o =n*stride;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b); int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) { if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type); }}
} }
}} parallel_for(int i=0;i<ent;i++){
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
}
} }
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
double t_local;
if ( sshift[0] == sshift[1] ) { if ( sshift[0] == sshift[1] ) {
Cshift_local(ret,rhs,dimension,shift,0x3); Cshift_local(ret,rhs,dimension,shift,0x3);
} else { } else {
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
} }
} }
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask) template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{ {
GridBase *grid = rhs._grid; GridBase *grid = rhs._grid;
int fd = grid->_fdimensions[dimension]; int fd = grid->_fdimensions[dimension];
@ -325,11 +348,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
// FIXME : This must change where we have a
// Rotate slice.
// Document how this works ; why didn't I do this when I first wrote it...
// wrap is whether sshift > rd. // wrap is whether sshift > rd.
// num is sshift mod rd. // num is sshift mod rd.
// //
@ -365,10 +384,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist); if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
else Copy_plane(ret,rhs,dimension,x,sx,cbmask); else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
} }
return ret;
} }
} }
#endif #endif

View File

@ -256,13 +256,42 @@ public:
_odata[ss]=r._odata[ss]; _odata[ss]=r._odata[ss];
} }
} }
Lattice(Lattice&& r){ // move constructor Lattice(Lattice&& r){ // move constructor
_grid = r._grid; _grid = r._grid;
checkerboard = r.checkerboard; checkerboard = r.checkerboard;
_odata=std::move(r._odata); _odata=std::move(r._odata);
} }
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
{
_grid = r._grid;
checkerboard = r.checkerboard;
_odata =std::move(r._odata);
return *this;
}
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
virtual ~Lattice(void) = default; virtual ~Lattice(void) = default;
void reset(GridBase* grid) { void reset(GridBase* grid) {
@ -281,33 +310,6 @@ public:
return *this; return *this;
} }
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
strong_inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
return *this;
}
strong_inline Lattice<vobj> & operator = (Lattice<vobj> && r)
{
_grid = r._grid;
checkerboard = r.checkerboard;
_odata =std::move(r._odata);
return *this;
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation // *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) { template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {

View File

@ -110,11 +110,11 @@ class BinaryIO {
lsites = 1; lsites = 1;
} }
#pragma omp parallel PARALLEL_REGION
{ {
uint32_t nersc_csum_thr = 0; uint32_t nersc_csum_thr = 0;
#pragma omp for PARALLEL_FOR_LOOP_INTERN
for (uint64_t local_site = 0; local_site < lsites; local_site++) for (uint64_t local_site = 0; local_site < lsites; local_site++)
{ {
uint32_t *site_buf = (uint32_t *)&fbuf[local_site]; uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
@ -124,7 +124,7 @@ class BinaryIO {
} }
} }
#pragma omp critical PARALLEL_CRITICAL
{ {
nersc_csum += nersc_csum_thr; nersc_csum += nersc_csum_thr;
} }
@ -146,14 +146,14 @@ class BinaryIO {
std::vector<int> local_start =grid->LocalStarts(); std::vector<int> local_start =grid->LocalStarts();
std::vector<int> global_vol =grid->FullDimensions(); std::vector<int> global_vol =grid->FullDimensions();
#pragma omp parallel PARALLEL_REGION
{ {
std::vector<int> coor(nd); std::vector<int> coor(nd);
uint32_t scidac_csuma_thr=0; uint32_t scidac_csuma_thr=0;
uint32_t scidac_csumb_thr=0; uint32_t scidac_csumb_thr=0;
uint32_t site_crc=0; uint32_t site_crc=0;
#pragma omp for PARALLEL_FOR_LOOP_INTERN
for(uint64_t local_site=0;local_site<lsites;local_site++){ for(uint64_t local_site=0;local_site<lsites;local_site++){
uint32_t * site_buf = (uint32_t *)&fbuf[local_site]; uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
@ -183,7 +183,7 @@ class BinaryIO {
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31); scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
} }
#pragma omp critical PARALLEL_CRITICAL
{ {
scidac_csuma^= scidac_csuma_thr; scidac_csuma^= scidac_csuma_thr;
scidac_csumb^= scidac_csumb_thr; scidac_csumb^= scidac_csumb_thr;

View File

@ -114,18 +114,26 @@ class Integrator {
// input U actually not used in the fundamental case // input U actually not used in the fundamental case
// Fundamental updates, include smearing // Fundamental updates, include smearing
for (int a = 0; a < as[level].actions.size(); ++a) { for (int a = 0; a < as[level].actions.size(); ++a) {
double start_full = usecond();
Field force(U._grid); Field force(U._grid);
conformable(U._grid, Mom._grid); conformable(U._grid, Mom._grid);
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl; std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
Real force_abs = std::sqrt(norm2(force)/U._grid->gSites()); Real force_abs = std::sqrt(norm2(force)/U._grid->gSites());
std::cout << GridLogIntegrator << "Force average: " << force_abs << std::endl; std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
Mom -= force * ep; Mom -= force * ep;
double end_full = usecond();
double time_full = (end_full - start_full) / 1e3;
double time_force = (end_force - start_force) / 1e3;
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
} }
// Force from the other representations // Force from the other representations

View File

@ -6,30 +6,33 @@
#ifndef GAUGE_CONFIG_ #ifndef GAUGE_CONFIG_
#define GAUGE_CONFIG_ #define GAUGE_CONFIG_
namespace Grid { namespace Grid
{
namespace QCD { namespace QCD
{
//trivial class for no smearing //trivial class for no smearing
template< class Impl > template <class Impl>
class NoSmearing { class NoSmearing
{
public: public:
INHERIT_FIELD_TYPES(Impl); INHERIT_FIELD_TYPES(Impl);
Field* ThinField; Field *ThinField;
NoSmearing(): ThinField(NULL) {} NoSmearing() : ThinField(NULL) {}
void set_Field(Field& U) { ThinField = &U; } void set_Field(Field &U) { ThinField = &U; }
void smeared_force(Field&) const {} void smeared_force(Field &) const {}
Field& get_SmearedU() { return *ThinField; } Field &get_SmearedU() { return *ThinField; }
Field& get_U(bool smeared = false) { Field &get_U(bool smeared = false)
{
return *ThinField; return *ThinField;
} }
}; };
/*! /*!
@ -44,32 +47,36 @@ public:
It stores a list of smeared configurations. It stores a list of smeared configurations.
*/ */
template <class Gimpl> template <class Gimpl>
class SmearedConfiguration { class SmearedConfiguration
public: {
public:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
private: private:
const unsigned int smearingLevels; const unsigned int smearingLevels;
Smear_Stout<Gimpl> StoutSmearing; Smear_Stout<Gimpl> StoutSmearing;
std::vector<GaugeField> SmearedSet; std::vector<GaugeField> SmearedSet;
// Member functions // Member functions
//==================================================================== //====================================================================
void fill_smearedSet(GaugeField& U) { void fill_smearedSet(GaugeField &U)
ThinLinks = &U; // attach the smearing routine to the field U {
ThinLinks = &U; // attach the smearing routine to the field U
// check the pointer is not null // check the pointer is not null
if (ThinLinks == NULL) if (ThinLinks == NULL)
std::cout << GridLogError std::cout << GridLogError
<< "[SmearedConfiguration] Error in ThinLinks pointer\n"; << "[SmearedConfiguration] Error in ThinLinks pointer\n";
if (smearingLevels > 0) { if (smearingLevels > 0)
{
std::cout << GridLogDebug std::cout << GridLogDebug
<< "[SmearedConfiguration] Filling SmearedSet\n"; << "[SmearedConfiguration] Filling SmearedSet\n";
GaugeField previous_u(ThinLinks->_grid); GaugeField previous_u(ThinLinks->_grid);
previous_u = *ThinLinks; previous_u = *ThinLinks;
for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) { for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl)
{
StoutSmearing.smear(SmearedSet[smearLvl], previous_u); StoutSmearing.smear(SmearedSet[smearLvl], previous_u);
previous_u = SmearedSet[smearLvl]; previous_u = SmearedSet[smearLvl];
@ -81,9 +88,10 @@ class SmearedConfiguration {
} }
} }
//==================================================================== //====================================================================
GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime, GaugeField AnalyticSmearedForce(const GaugeField &SigmaKPrime,
const GaugeField& GaugeK) const { const GaugeField &GaugeK) const
GridBase* grid = GaugeK._grid; {
GridBase *grid = GaugeK._grid;
GaugeField C(grid), SigmaK(grid), iLambda(grid); GaugeField C(grid), SigmaK(grid), iLambda(grid);
GaugeLinkField iLambda_mu(grid); GaugeLinkField iLambda_mu(grid);
GaugeLinkField iQ(grid), e_iQ(grid); GaugeLinkField iQ(grid), e_iQ(grid);
@ -94,7 +102,8 @@ class SmearedConfiguration {
SigmaK = zero; SigmaK = zero;
iLambda = zero; iLambda = zero;
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++)
{
Cmu = peekLorentz(C, mu); Cmu = peekLorentz(C, mu);
GaugeKmu = peekLorentz(GaugeK, mu); GaugeKmu = peekLorentz(GaugeK, mu);
SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu); SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu);
@ -104,20 +113,22 @@ class SmearedConfiguration {
pokeLorentz(iLambda, iLambda_mu, mu); pokeLorentz(iLambda, iLambda_mu, mu);
} }
StoutSmearing.derivative(SigmaK, iLambda, StoutSmearing.derivative(SigmaK, iLambda,
GaugeK); // derivative of SmearBase GaugeK); // derivative of SmearBase
return SigmaK; return SigmaK;
} }
/*! @brief Returns smeared configuration at level 'Level' */ /*! @brief Returns smeared configuration at level 'Level' */
const GaugeField& get_smeared_conf(int Level) const { const GaugeField &get_smeared_conf(int Level) const
{
return SmearedSet[Level]; return SmearedSet[Level];
} }
//==================================================================== //====================================================================
void set_iLambda(GaugeLinkField& iLambda, GaugeLinkField& e_iQ, void set_iLambda(GaugeLinkField &iLambda, GaugeLinkField &e_iQ,
const GaugeLinkField& iQ, const GaugeLinkField& Sigmap, const GaugeLinkField &iQ, const GaugeLinkField &Sigmap,
const GaugeLinkField& GaugeK) const { const GaugeLinkField &GaugeK) const
GridBase* grid = iQ._grid; {
GridBase *grid = iQ._grid;
GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid); GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid);
GaugeLinkField unity(grid); GaugeLinkField unity(grid);
unity = 1.0; unity = 1.0;
@ -206,15 +217,15 @@ class SmearedConfiguration {
} }
//==================================================================== //====================================================================
public: public:
GaugeField* GaugeField *
ThinLinks; /*!< @brief Pointer to the thin ThinLinks; /* Pointer to the thin links configuration */
links configuration */
/*! @brief Standard constructor */ /* Standard constructor */
SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear, SmearedConfiguration(GridCartesian *UGrid, unsigned int Nsmear,
Smear_Stout<Gimpl>& Stout) Smear_Stout<Gimpl> &Stout)
: smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) { : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL)
{
for (unsigned int i = 0; i < smearingLevels; ++i) for (unsigned int i = 0; i < smearingLevels; ++i)
SmearedSet.push_back(*(new GaugeField(UGrid))); SmearedSet.push_back(*(new GaugeField(UGrid)));
} }
@ -223,21 +234,29 @@ class SmearedConfiguration {
SmearedConfiguration() SmearedConfiguration()
: smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {} : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
// attach the smeared routines to the thin links U and fill the smeared set // attach the smeared routines to the thin links U and fill the smeared set
void set_Field(GaugeField& U) { fill_smearedSet(U); } void set_Field(GaugeField &U)
{
double start = usecond();
fill_smearedSet(U);
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "Smearing in " << time << " ms" << std::endl;
}
//==================================================================== //====================================================================
void smeared_force(GaugeField& SigmaTilde) const { void smeared_force(GaugeField &SigmaTilde) const
if (smearingLevels > 0) { {
if (smearingLevels > 0)
{
double start = usecond();
GaugeField force = SigmaTilde; // actually = U*SigmaTilde GaugeField force = SigmaTilde; // actually = U*SigmaTilde
GaugeLinkField tmp_mu(SigmaTilde._grid); GaugeLinkField tmp_mu(SigmaTilde._grid);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++)
{
// to get just SigmaTilde // to get just SigmaTilde
tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * peekLorentz(force, mu);
peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu); pokeLorentz(force, tmp_mu, mu);
} }
@ -246,33 +265,43 @@ class SmearedConfiguration {
force = AnalyticSmearedForce(force, *ThinLinks); force = AnalyticSmearedForce(force, *ThinLinks);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++)
{
tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu); tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu);
pokeLorentz(SigmaTilde, tmp_mu, mu); pokeLorentz(SigmaTilde, tmp_mu, mu);
} }
} // if smearingLevels = 0 do nothing double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
} }
//==================================================================== //====================================================================
GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; } GaugeField &get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
GaugeField& get_U(bool smeared = false) { GaugeField &get_U(bool smeared = false)
{
// get the config, thin links by default // get the config, thin links by default
if (smeared) { if (smeared)
if (smearingLevels) { {
if (smearingLevels)
{
RealD impl_plaq = RealD impl_plaq =
WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]); WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]);
std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq
<< std::endl; << std::endl;
return get_SmearedU(); return get_SmearedU();
}
} else { else
{
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks); RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
<< std::endl; << std::endl;
return *ThinLinks; return *ThinLinks;
} }
} else { }
else
{
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks); RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
<< std::endl; << std::endl;

View File

@ -49,6 +49,8 @@ int main (int argc, char ** argv)
const int Ls=8; const int Ls=8;
std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@ -90,24 +92,23 @@ int main (int argc, char ** argv)
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf); SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f); SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
std::cout << "Starting mixed CG" << std::endl; std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
mCG(src_o,result_o); mCG(src_o,result_o);
std::cout << "Starting regular CG" << std::endl; std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000); ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
CG(HermOpEO,src_o,result_o_2); CG(HermOpEO,src_o,result_o_2);
LatticeFermionD diff_o(FrbGrid); LatticeFermionD diff_o(FrbGrid);
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
std::cout << "Diff between mixed and regular CG: " << diff << std::endl; std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
#ifdef HAVE_LIME #ifdef HAVE_LIME
if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){ if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){
std::string file1("./Propagator1"); std::string file1("./Propagator1");
std::string file2("./Propagator2");
emptyUserRecord record; emptyUserRecord record;
uint32_t nersc_csum; uint32_t nersc_csum;
uint32_t scidac_csuma; uint32_t scidac_csuma;
@ -121,12 +122,12 @@ int main (int argc, char ** argv)
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o,file1,munge, 0, format, BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o,file1,munge, 0, format,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
std::cout << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl; std::cout << GridLogMessage << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o_2,file1,munge, 0, format, BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o_2,file1,munge, 0, format,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
std::cout << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl; std::cout << GridLogMessage << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
} }
#endif #endif