1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 15:27:06 +01:00

Got unpreconditioned conjugate gradient to run and converge on a random (uniform random,

not even SU(3) for now) gauge field. Convergence history is correctly indepdendent of decomposition
on 1,2,4,8,16 mpi tasks.
Found a couple of simd bugs which required fixed and enhanced the Grid_simd.cc test suite.
Implemented the Mdag, M, MdagM, Meooe Mooee schur type stuff in the wilson dop.
This commit is contained in:
Peter Boyle
2015-05-19 13:57:35 +01:00
parent 6f387b4916
commit a6e1ea216d
33 changed files with 566 additions and 316 deletions

View File

@ -92,13 +92,13 @@ namespace Grid {
};
///////////////////////////////////////////////
// mult, sub, add, adj,conj, mac functions
// mult, sub, add, adj,conjugate, mac functions
///////////////////////////////////////////////
friend inline void mult(vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) + (*r);}
friend inline vRealF adj(const vRealF &in) { return in; }
friend inline vRealF conj(const vRealF &in){ return in; }
friend inline vRealF conjugate(const vRealF &in){ return in; }
friend inline void mac (vRealF &y,const vRealF a,const vRealF x){
#if defined (AVX1) || defined (SSE4)
@ -133,11 +133,12 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vRealF &y,vRealF b,int perm)
{
Gpermute<vRealF>(y,b,perm);
}
/*
friend inline void merge(vRealF &y,std::vector<RealF *> &extracted)
{
Gmerge<vRealF,RealF >(y,extracted);
@ -155,7 +156,6 @@ namespace Grid {
Gextract<vRealF,RealF>(y,extracted);
}
*/
/////////////////////////////////////////////////////
// Broadcast a value across Nsimd copies.
@ -243,33 +243,26 @@ friend inline void vstore(const vRealF &ret, float *a){
}
friend inline RealF Reduce(const vRealF & in)
{
#if defined (SSE4)
// FIXME Hack
const RealF * ptr = (const RealF *) &in;
RealF ret = 0;
for(int i=0;i<vRealF::Nsimd();i++){
ret = ret+ptr[i];
}
return ret;
#ifdef SSE4
vRealF v1,v2;
permute(v1,in,0); // sse 128; quad single
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
return v1.v[0];
#endif
#if defined (AVX1) || defined(AVX2)
__attribute__ ((aligned(32))) float c_[16];
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01);
__m256 hadd = _mm256_hadd_ps(in.v,tmp);
tmp = _mm256_permute2f128_ps(hadd,hadd,0x01);
hadd = _mm256_hadd_ps(tmp,tmp);
_mm256_store_ps(c_,hadd);
return (float)c_[0];
#if defined(AVX1) || defined (AVX2)
vRealF v1,v2;
permute(v1,in,0); // avx 256; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
return v1.v[0];
#endif
#ifdef AVX512
return _mm512_reduce_add_ps(in.v);
/*
__attribute__ ((aligned(64))) float c_[16];
_mm512_store_ps(c_,in.v);
return c_[0]+c_[1]+c_[2]+c_[3]+c_[4]+c_[5]+c_[6]+c_[7]
+c_[8]+c_[9]+c_[10]+c_[11]+c_[12]+c_[13]+c_[14]+c_[15];
*/
#endif
#ifdef QPX
#endif
@ -291,7 +284,7 @@ friend inline void vstore(const vRealF &ret, float *a){
public:
static inline int Nsimd(void) { return sizeof(fvec)/sizeof(float);}
};
inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conj(l)*r; }
inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conjugate(l)*r; }
inline void zeroit(vRealF &z){ vzero(z);}
inline vRealF outerProduct(const vRealF &l, const vRealF& r)