1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 23:37:06 +01:00

Got unpreconditioned conjugate gradient to run and converge on a random (uniform random,

not even SU(3) for now) gauge field. Convergence history is correctly indepdendent of decomposition
on 1,2,4,8,16 mpi tasks.
Found a couple of simd bugs which required fixed and enhanced the Grid_simd.cc test suite.
Implemented the Mdag, M, MdagM, Meooe Mooee schur type stuff in the wilson dop.
This commit is contained in:
Peter Boyle
2015-05-19 13:57:35 +01:00
parent 6f387b4916
commit a6e1ea216d
33 changed files with 566 additions and 316 deletions

View File

@ -32,7 +32,7 @@ namespace Grid {
friend inline void mult(vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) + (*r);}
friend inline vComplexD adj(const vComplexD &in){ return conj(in); }
friend inline vComplexD adj(const vComplexD &in){ return conjugate(in); }
//////////////////////////////////
// Initialise to 1,0,i
@ -166,11 +166,11 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vComplexD &y,vComplexD b,int perm)
{
Gpermute<vComplexD>(y,b,perm);
}
/*
friend inline void merge(vComplexD &y,std::vector<ComplexD *> &extracted)
{
Gmerge<vComplexD,ComplexD >(y,extracted);
@ -269,7 +269,7 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
////////////////////////
// Conjugate
////////////////////////
friend inline vComplexD conj(const vComplexD &in){
friend inline vComplexD conjugate(const vComplexD &in){
vComplexD ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
@ -345,17 +345,17 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
// REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in)
{
#if defined SSE4
return ComplexD(in.v[0], in.v[1]); // inefficient
#ifdef SSE4
return ComplexD(in.v[0],in.v[1]);
#endif
#if defined(AVX1) || defined (AVX2)
vComplexD v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexD(v1.v[0],v1.v[1]);
#endif
#if defined (AVX1) || defined(AVX2)
// return std::complex<double>(_mm256_mask_reduce_add_pd(0x55, in.v),_mm256_mask_reduce_add_pd(0xAA, in.v));
__attribute__ ((aligned(32))) double c_[4];
_mm256_store_pd(c_,in.v);
return ComplexD(c_[0]+c_[2],c_[1]+c_[3]);
#endif
#ifdef AVX512
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
#endif
#ifdef QPX
#endif
@ -387,7 +387,7 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
};
inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conj(l)*r; }
inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conjugate(l)*r; }
typedef vComplexD vDComplex;