mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Got unpreconditioned conjugate gradient to run and converge on a random (uniform random,
not even SU(3) for now) gauge field. Convergence history is correctly indepdendent of decomposition on 1,2,4,8,16 mpi tasks. Found a couple of simd bugs which required fixed and enhanced the Grid_simd.cc test suite. Implemented the Mdag, M, MdagM, Meooe Mooee schur type stuff in the wilson dop.
This commit is contained in:
		@@ -102,7 +102,7 @@ template <class arg> struct name\
 | 
			
		||||
 | 
			
		||||
GridUnopClass(UnarySub,-a);
 | 
			
		||||
GridUnopClass(UnaryAdj,adj(a));
 | 
			
		||||
GridUnopClass(UnaryConj,conj(a));
 | 
			
		||||
GridUnopClass(UnaryConj,conjugate(a));
 | 
			
		||||
GridUnopClass(UnaryTrace,trace(a));
 | 
			
		||||
GridUnopClass(UnaryTranspose,transpose(a));
 | 
			
		||||
 | 
			
		||||
@@ -178,7 +178,7 @@ template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,con
 | 
			
		||||
 | 
			
		||||
GRID_DEF_UNOP(operator -,UnarySub);
 | 
			
		||||
GRID_DEF_UNOP(adj,UnaryAdj);
 | 
			
		||||
GRID_DEF_UNOP(conj,UnaryConj);
 | 
			
		||||
GRID_DEF_UNOP(conjugate,UnaryConj);
 | 
			
		||||
GRID_DEF_UNOP(trace,UnaryTrace);
 | 
			
		||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -144,14 +144,44 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class sobj,class vobj> strong_inline
 | 
			
		||||
  void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
  void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
 | 
			
		||||
    conformable(x,y);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      vobj tmp = a*lhs._odata[ss];
 | 
			
		||||
      vstream(ret._odata[ss],tmp+rhs._odata[ss]);
 | 
			
		||||
    for(int ss=0;ss<x._grid->oSites();ss++){
 | 
			
		||||
      vobj tmp = a*x._odata[ss]+y._odata[ss];
 | 
			
		||||
      vstream(ret._odata[ss],tmp);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  template<class sobj,class vobj> strong_inline
 | 
			
		||||
  void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
 | 
			
		||||
    conformable(x,y);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<x._grid->oSites();ss++){
 | 
			
		||||
      vobj tmp = a*x._odata[ss]+b*y._odata[ss];
 | 
			
		||||
      vstream(ret._odata[ss],tmp);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class sobj,class vobj> strong_inline
 | 
			
		||||
  RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
 | 
			
		||||
    conformable(x,y);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<x._grid->oSites();ss++){
 | 
			
		||||
      vobj tmp = a*x._odata[ss]+y._odata[ss];
 | 
			
		||||
      vstream(ret._odata[ss],tmp);
 | 
			
		||||
    }
 | 
			
		||||
    return norm2(ret);
 | 
			
		||||
  }
 | 
			
		||||
  template<class sobj,class vobj> strong_inline
 | 
			
		||||
  RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
 | 
			
		||||
    conformable(x,y);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<x._grid->oSites();ss++){
 | 
			
		||||
      vobj tmp = a*x._odata[ss]+b*y._odata[ss];
 | 
			
		||||
      vstream(ret._odata[ss],tmp);
 | 
			
		||||
    }
 | 
			
		||||
    return norm2(ret); // FIXME implement parallel norm in ss loop
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ namespace Grid {
 | 
			
		||||
// Functionality:
 | 
			
		||||
//     -=,+=,*=,()
 | 
			
		||||
//     add,+,sub,-,mult,mac,*
 | 
			
		||||
//     adj,conj
 | 
			
		||||
//     adj,conjugate
 | 
			
		||||
//     real,imag
 | 
			
		||||
//     transpose,transposeIndex  
 | 
			
		||||
//     trace,traceIndex
 | 
			
		||||
 
 | 
			
		||||
@@ -18,11 +18,11 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class vobj> inline Lattice<vobj> conj(const Lattice<vobj> &lhs){
 | 
			
		||||
    template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = conj(lhs._odata[ss]);
 | 
			
		||||
            ret._odata[ss] = conjugate(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user