1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 04:37:05 +01:00

Merge branch 'develop' into feature/hadrons

This commit is contained in:
2018-05-01 14:07:32 +01:00
15 changed files with 694 additions and 104 deletions

View File

@ -479,15 +479,13 @@ until convergence
Field B(grid); B.checkerboard = evec[0].checkerboard;
// power of two search pattern; not every evalue in eval2 is assessed.
int allconv =1;
for(int jj = 1; jj<=Nstop; jj*=2){
int j = Nstop-jj;
RealD e = eval2_copy[j]; // Discard the evalue
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
if ( j > Nconv ) {
Nconv=j+1;
jj=Nstop; // Terminate the scan
}
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
allconv=0;
}
}
// Do evec[0] for good measure
@ -495,8 +493,10 @@ until convergence
int j=0;
RealD e = eval2_copy[0];
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
}
if ( allconv ) Nconv = Nstop;
// test if we converged, if so, terminate
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
// if( Nconv>=Nstop || beta_k < betastp){

View File

@ -48,6 +48,7 @@ struct LanczosParams : Serializable {
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
bool, saveEvecs,
bool, doFine,
bool, doFineRead,
bool, doCoarse,

View File

@ -277,7 +277,9 @@ public:
uint8_t *cp = (uint8_t *)ptr;
if ( ptr ) {
// One touch per 4k page, static OMP loop to catch same loop order
#ifdef GRID_OMP
#pragma omp parallel for schedule(static)
#endif
for(size_type n=0;n<bytes;n+=4096){
cp[n]=0;
}

View File

@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int ent = 0;
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int bo = n*e2;
buffer[off+bo+b]=rhs._odata[so+o+b];
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
}
}
} else {
int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb &cbmask ) {
table.push_back(std::pair<int,int> (bo++,o+b));
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
}
}
}
parallel_for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
}
}
parallel_for(int i=0;i<ent;i++){
buffer[table[i].first]=rhs._odata[table[i].second];
}
}
@ -140,31 +142,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent =0;
if ( cbmask ==0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
rhs._odata[so+o+b]=buffer[bo+b];
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
}
}
} else {
std::vector<std::pair<int,int> > table;
int bo=0;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
table.push_back(std::pair<int,int> (so+o+b,bo++));
table[ent++]=std::pair<int,int> (so+o+b,bo++);
}
}
}
parallel_for(int i=0;i<table.size();i++){
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
rhs._odata[table[i].first]=buffer[table[i].second];
}
}
parallel_for(int i=0;i<ent;i++){
rhs._odata[table[i].first]=buffer[table[i].second];
}
}
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs._grid->_slice_block[dimension];
int stride = rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent=0;
if(cbmask == 0x3 ){
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
} else {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
}
}
parallel_for(int i=0;i<ent;i++){
lhs._odata[table[i].first]=rhs._odata[table[i].second];
}
}
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
int e2=rhs._grid->_slice_block [dimension];
int stride = rhs._grid->_slice_stride[dimension];
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent=0;
double t_tab,t_perm;
if ( cbmask == 0x3 ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
}
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
}
}}
parallel_for(int i=0;i<ent;i++){
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
}
}
//////////////////////////////////////////////////////
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
double t_local;
if ( sshift[0] == sshift[1] ) {
Cshift_local(ret,rhs,dimension,shift,0x3);
} else {
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
}
}
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{
GridBase *grid = rhs._grid;
int fd = grid->_fdimensions[dimension];
@ -325,11 +348,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd;
// FIXME : This must change where we have a
// Rotate slice.
// Document how this works ; why didn't I do this when I first wrote it...
// wrap is whether sshift > rd.
// num is sshift mod rd.
//
@ -365,10 +384,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
}
return ret;
}
}
#endif

View File

@ -256,13 +256,42 @@ public:
_odata[ss]=r._odata[ss];
}
}
Lattice(Lattice&& r){ // move constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata=std::move(r._odata);
}
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
{
_grid = r._grid;
checkerboard = r.checkerboard;
_odata =std::move(r._odata);
return *this;
}
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
virtual ~Lattice(void) = default;
void reset(GridBase* grid) {
@ -281,33 +310,6 @@ public:
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
strong_inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
return *this;
}
strong_inline Lattice<vobj> & operator = (Lattice<vobj> && r)
{
_grid = r._grid;
checkerboard = r.checkerboard;
_odata =std::move(r._odata);
return *this;
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {

View File

@ -179,7 +179,7 @@ namespace Grid {
return ret;
}
#define DECLARE_RELATIONAL(op,functor) \
#define DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
{\
@ -198,11 +198,6 @@ namespace Grid {
typedef typename vsimd::scalar_type scalar;\
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
}\
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
} \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
{ \
@ -212,14 +207,21 @@ namespace Grid {
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
{ \
return lhs op rhs._internal; \
}
} \
#define DECLARE_RELATIONAL(op,functor) \
DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
}
DECLARE_RELATIONAL(<,slt);
DECLARE_RELATIONAL(<=,sle);
DECLARE_RELATIONAL(>,sgt);
DECLARE_RELATIONAL(>=,sge);
DECLARE_RELATIONAL(==,seq);
DECLARE_RELATIONAL_EQ(==,seq);
DECLARE_RELATIONAL(!=,sne);
#undef DECLARE_RELATIONAL

View File

@ -110,11 +110,11 @@ class BinaryIO {
lsites = 1;
}
#pragma omp parallel
PARALLEL_REGION
{
uint32_t nersc_csum_thr = 0;
#pragma omp for
PARALLEL_FOR_LOOP_INTERN
for (uint64_t local_site = 0; local_site < lsites; local_site++)
{
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
@ -124,7 +124,7 @@ class BinaryIO {
}
}
#pragma omp critical
PARALLEL_CRITICAL
{
nersc_csum += nersc_csum_thr;
}
@ -146,14 +146,14 @@ class BinaryIO {
std::vector<int> local_start =grid->LocalStarts();
std::vector<int> global_vol =grid->FullDimensions();
#pragma omp parallel
PARALLEL_REGION
{
std::vector<int> coor(nd);
uint32_t scidac_csuma_thr=0;
uint32_t scidac_csumb_thr=0;
uint32_t site_crc=0;
#pragma omp for
PARALLEL_FOR_LOOP_INTERN
for(uint64_t local_site=0;local_site<lsites;local_site++){
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
@ -183,7 +183,7 @@ class BinaryIO {
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
}
#pragma omp critical
PARALLEL_CRITICAL
{
scidac_csuma^= scidac_csuma_thr;
scidac_csumb^= scidac_csumb_thr;

View File

@ -40,7 +40,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for schedule(static) collapse(2)")
#define PARALLEL_REGION _Pragma("omp parallel")
#define PARALLEL_CRITICAL _Pragma("omp critical")
#else