1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 20:57:06 +01:00

Got unpreconditioned conjugate gradient to run and converge on a random (uniform random,

not even SU(3) for now) gauge field. Convergence history is correctly indepdendent of decomposition
on 1,2,4,8,16 mpi tasks.
Found a couple of simd bugs which required fixed and enhanced the Grid_simd.cc test suite.
Implemented the Mdag, M, MdagM, Meooe Mooee schur type stuff in the wilson dop.
This commit is contained in:
Peter Boyle
2015-05-19 13:57:35 +01:00
parent 6f387b4916
commit a6e1ea216d
33 changed files with 566 additions and 316 deletions

View File

@ -102,7 +102,7 @@ template <class arg> struct name\
GridUnopClass(UnarySub,-a);
GridUnopClass(UnaryAdj,adj(a));
GridUnopClass(UnaryConj,conj(a));
GridUnopClass(UnaryConj,conjugate(a));
GridUnopClass(UnaryTrace,trace(a));
GridUnopClass(UnaryTranspose,transpose(a));
@ -178,7 +178,7 @@ template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,con
GRID_DEF_UNOP(operator -,UnarySub);
GRID_DEF_UNOP(adj,UnaryAdj);
GRID_DEF_UNOP(conj,UnaryConj);
GRID_DEF_UNOP(conjugate,UnaryConj);
GRID_DEF_UNOP(trace,UnaryTrace);
GRID_DEF_UNOP(transpose,UnaryTranspose);

View File

@ -144,14 +144,44 @@ PARALLEL_FOR_LOOP
}
template<class sobj,class vobj> strong_inline
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
vobj tmp = a*lhs._odata[ss];
vstream(ret._odata[ss],tmp+rhs._odata[ss]);
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
}
}
template<class sobj,class vobj> strong_inline
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
}
}
template<class sobj,class vobj> strong_inline
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
}
return norm2(ret);
}
template<class sobj,class vobj> strong_inline
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
}
return norm2(ret); // FIXME implement parallel norm in ss loop
}
}
#endif

View File

@ -9,7 +9,7 @@ namespace Grid {
// Functionality:
// -=,+=,*=,()
// add,+,sub,-,mult,mac,*
// adj,conj
// adj,conjugate
// real,imag
// transpose,transposeIndex
// trace,traceIndex

View File

@ -18,11 +18,11 @@ PARALLEL_FOR_LOOP
return ret;
};
template<class vobj> inline Lattice<vobj> conj(const Lattice<vobj> &lhs){
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = conj(lhs._odata[ss]);
ret._odata[ss] = conjugate(lhs._odata[ss]);
}
return ret;
};