1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

Reworking to keep intel compiler happy

This commit is contained in:
Peter Boyle 2015-05-19 21:29:07 +01:00
parent ee8cf77071
commit efc0d1e0b9
8 changed files with 62 additions and 24 deletions

View File

@ -16,7 +16,7 @@ Grid_wilson_cg_unprec_LDADD = -lGrid
Grid_comms_SOURCES = Grid_comms.cc Grid_comms_SOURCES = Grid_comms.cc
Grid_comms_LDADD = -lGrid Grid_comms_LDADD = -lGrid
Grid_su3_SOURCES = Grid_su3.cc Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc
Grid_su3_LDADD = -lGrid Grid_su3_LDADD = -lGrid
Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc

View File

@ -25,7 +25,7 @@ namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> { template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
public: public:
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2); virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2)=0;
void AdjOp(const Field &in, Field &out) { void AdjOp(const Field &in, Field &out) {
Op(in,out); Op(in,out);
}; };

View File

@ -31,7 +31,7 @@ public:
bigfloat(const double d) { mpf_init_set_d(x, d); } bigfloat(const double d) { mpf_init_set_d(x, d); }
bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); } bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
~bigfloat(void) { mpf_clear(x); } ~bigfloat(void) { mpf_clear(x); }
operator const double (void) const { return (double)mpf_get_d(x); } operator double (void) const { return (double)mpf_get_d(x); }
static void setDefaultPrecision(unsigned long dprec) { static void setDefaultPrecision(unsigned long dprec) {
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec); unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
mpf_set_default_prec(bprec); mpf_set_default_prec(bprec);

View File

@ -345,20 +345,30 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
// REDUCE FIXME must be a cleaner implementation // REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in) friend inline ComplexD Reduce(const vComplexD & in)
{ {
vComplexD v1,v2;
union {
zvec v;
double f[sizeof(zvec)/sizeof(double)];
} conv;
#ifdef SSE4 #ifdef SSE4
return ComplexD(in.v[0],in.v[1]); v1=in;
#endif #endif
#if defined(AVX1) || defined (AVX2) #if defined(AVX1) || defined (AVX2)
vComplexD v1;
permute(v1,in,0); // sse 128; paired complex single permute(v1,in,0); // sse 128; paired complex single
v1=v1+in; v1=v1+in;
return ComplexD(v1.v[0],v1.v[1]);
#endif #endif
#ifdef AVX512 #ifdef AVX512
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v)); permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
permute(v2,v1,1); // avx 256; quad complex single
v1=v1+v2;
#endif #endif
#ifdef QPX #ifdef QPX
#error
#endif #endif
conv.v = v1.v;
return ComplexD(conv.f[0],conv.f[1]);
} }
// Unary negation // Unary negation

View File

@ -234,26 +234,34 @@ namespace Grid {
} }
friend inline ComplexF Reduce(const vComplexF & in) friend inline ComplexF Reduce(const vComplexF & in)
{ {
vComplexF v1,v2;
union {
cvec v;
float f[sizeof(cvec)/sizeof(float)];
} conv;
#ifdef SSE4 #ifdef SSE4
vComplexF v1;
permute(v1,in,0); // sse 128; paired complex single permute(v1,in,0); // sse 128; paired complex single
v1=v1+in; v1=v1+in;
return ComplexF(v1.v[0],v1.v[1]);
#endif #endif
#if defined(AVX1) || defined (AVX2) #if defined(AVX1) || defined (AVX2)
vComplexF v1,v2;
permute(v1,in,0); // sse 128; paired complex single permute(v1,in,0); // sse 128; paired complex single
v1=v1+in; v1=v1+in;
permute(v2,v1,1); // avx 256; quad complex single permute(v2,v1,1); // avx 256; quad complex single
v1=v1+v2; v1=v1+v2;
return ComplexF(v1.v[0],v1.v[1]);
#endif #endif
#ifdef AVX512 #ifdef AVX512
return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v)); permute(v1,in,0); // avx512 octo-complex single
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
#endif #endif
#ifdef QPX #ifdef QPX
#error #error
#endif #endif
conv.v = v1.v;
return ComplexF(conv.f[0],conv.f[1]);
} }
friend inline vComplexF operator * (const ComplexF &a, vComplexF b){ friend inline vComplexF operator * (const ComplexF &a, vComplexF b){

View File

@ -210,25 +210,33 @@ namespace Grid {
friend inline RealD Reduce(const vRealD & in) friend inline RealD Reduce(const vRealD & in)
{ {
vRealD v1,v2;
union {
dvec v;
double f[sizeof(dvec)/sizeof(double)];
} conv;
#ifdef SSE4 #ifdef SSE4
vRealD v1;
permute(v1,in,0); // sse 128; paired real double permute(v1,in,0); // sse 128; paired real double
v1=v1+in; v1=v1+in;
return RealD(v1.v[0]);
#endif #endif
#if defined(AVX1) || defined (AVX2) #if defined(AVX1) || defined (AVX2)
vRealD v1,v2;
permute(v1,in,0); // avx 256; quad double permute(v1,in,0); // avx 256; quad double
v1=v1+in; v1=v1+in;
permute(v2,v1,1); permute(v2,v1,1);
v1=v1+v2; v1=v1+v2;
return v1.v[0];
#endif #endif
#ifdef AVX512 #ifdef AVX512
return _mm512_reduce_add_pd(in.v); permute(v1,in,0); // avx 512; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
#endif #endif
#ifdef QPX #ifdef QPX
#endif #endif
conv.v=v1.v;
return conv.f[0];
} }
// *=,+=,-= operators // *=,+=,-= operators

View File

@ -243,29 +243,39 @@ friend inline void vstore(const vRealF &ret, float *a){
} }
friend inline RealF Reduce(const vRealF & in) friend inline RealF Reduce(const vRealF & in)
{ {
#ifdef SSE4
vRealF v1,v2; vRealF v1,v2;
union {
fvec v;
float f[sizeof(fvec)/sizeof(double)];
} conv;
#ifdef SSE4
permute(v1,in,0); // sse 128; quad single permute(v1,in,0); // sse 128; quad single
v1=v1+in; v1=v1+in;
permute(v2,v1,1); permute(v2,v1,1);
v1=v1+v2; v1=v1+v2;
return v1.v[0];
#endif #endif
#if defined(AVX1) || defined (AVX2) #if defined(AVX1) || defined (AVX2)
vRealF v1,v2;
permute(v1,in,0); // avx 256; octo-double permute(v1,in,0); // avx 256; octo-double
v1=v1+in; v1=v1+in;
permute(v2,v1,1); permute(v2,v1,1);
v1=v1+v2; v1=v1+v2;
permute(v2,v1,2); permute(v2,v1,2);
v1=v1+v2; v1=v1+v2;
return v1.v[0];
#endif #endif
#ifdef AVX512 #ifdef AVX512
return _mm512_reduce_add_ps(in.v); permute(v1,in,0); // avx 256; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
permute(v2,v1,3);
v1=v1+v2;
#endif #endif
#ifdef QPX #ifdef QPX
#endif #endif
conv.v=v1.v;
return conv.f[0];
} }
// *=,+=,-= operators // *=,+=,-= operators

View File

@ -13,7 +13,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> latt_size ({16,16,16,32}); std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> clatt_size ({4,4,4,8}); std::vector<int> clatt_size ({4,4,4,8});
int orthodir=3; int orthodir=3;
int orthosz =latt_size[orthodir]; int orthosz =latt_size[orthodir];
@ -44,13 +44,15 @@ int main (int argc, char ** argv)
// (1+2+3)=6 = N(N-1)/2 terms // (1+2+3)=6 = N(N-1)/2 terms
LatticeComplex Plaq(&Fine); LatticeComplex Plaq(&Fine);
LatticeComplex cPlaq(&Coarse); LatticeComplex cPlaq(&Coarse);
Plaq = zero; Plaq = zero;
#if 1
for(int mu=1;mu<Nd;mu++){ for(int mu=1;mu<Nd;mu++){
for(int nu=0;nu<mu;nu++){ for(int nu=0;nu<mu;nu++){
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu])); Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
} }
} }
#endif
double vol = Fine.gSites(); double vol = Fine.gSites();
Complex PlaqScale(1.0/vol/6.0/3.0); Complex PlaqScale(1.0/vol/6.0/3.0);
std::cout <<"PlaqScale" << PlaqScale<<std::endl; std::cout <<"PlaqScale" << PlaqScale<<std::endl;