1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-10 19:36:56 +01:00

Sizable improvement in multigrid for unsquared.

6000 matmuls CG unprec
2000 matmuls CG prec (4000 eo muls)
1050 matmuls PGCR on 16^3 x 32 x 8 m=.01

Substantial effort on timing and logging infrastructure
This commit is contained in:
Peter Boyle
2015-07-24 01:31:13 +09:00
parent 11c99d5e66
commit d1afebf71e
67 changed files with 945 additions and 753 deletions

View File

@ -132,18 +132,18 @@ inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
assert(cb==lat.checkerboard);
}
cb=lat.checkerboard;
// std::cout<<"Lattice leaf cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
}
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
{
// std::cout<<"Non lattice leaf cb"<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
}
template <typename Op, typename T1>
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
{
CBFromExpression(cb,std::get<0>(expr.second));// recurse
// std::cout<<"Unary node cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
}
template <typename Op, typename T1, typename T2>
@ -151,7 +151,7 @@ inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &ex
{
CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb,std::get<1>(expr.second));
// std::cout<<"Binary node cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
}
template <typename Op, typename T1, typename T2, typename T3>
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
@ -159,7 +159,7 @@ inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3
CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb,std::get<1>(expr.second));
CBFromExpression(cb,std::get<2>(expr.second));
// std::cout<<"Trinary node cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
}
////////////////////////////////////////////
@ -370,7 +370,7 @@ using namespace Grid;
tmp.func(eval(0,v1),eval(0,v2));
auto var = v1+v2;
std::cout<<typeid(var).name()<<std::endl;
std::cout<<GridLogMessage<<typeid(var).name()<<std::endl;
v3=v1+v2;
v3=v1+v2+v1*v2;

View File

@ -221,7 +221,7 @@ PARALLEL_FOR_LOOP
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
std::cout<<"Lattice operator ="<<std::endl;
std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];

View File

@ -125,7 +125,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
assert(grid!=NULL);
// FIXME
std::cout<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();

View File

@ -233,7 +233,8 @@ namespace Grid {
int words=sizeof(scalar_object)/sizeof(scalar_type);
std::vector<scalar_object> buf(Nsimd);
PARALLEL_FOR_LOOP
for(int ss=0;ss<osites;ss++){
for(int si=0;si<Nsimd;si++){

View File

@ -23,7 +23,7 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
half.checkerboard = cb;
int ssh=0;
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
@ -40,7 +40,7 @@ PARALLEL_FOR_LOOP
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
int cb = half.checkerboard;
int ssh=0;
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
@ -158,6 +158,7 @@ template<class vobj,class CComplex>
fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner);
PARALLEL_FOR_LOOP
for(int ss=0;ss<coarse->oSites();ss++){
CoarseInner._odata[ss] = coarse_inner._odata[ss];
}