mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Reworking to keep intel compiler happy
This commit is contained in:
		@@ -16,7 +16,7 @@ Grid_wilson_cg_unprec_LDADD = -lGrid
 | 
			
		||||
Grid_comms_SOURCES = Grid_comms.cc
 | 
			
		||||
Grid_comms_LDADD = -lGrid
 | 
			
		||||
 | 
			
		||||
Grid_su3_SOURCES = Grid_su3.cc
 | 
			
		||||
Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc
 | 
			
		||||
Grid_su3_LDADD = -lGrid
 | 
			
		||||
 | 
			
		||||
Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
 | 
			
		||||
 
 | 
			
		||||
@@ -25,7 +25,7 @@ namespace Grid {
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
 | 
			
		||||
    public:
 | 
			
		||||
      virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2);
 | 
			
		||||
      virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2)=0;
 | 
			
		||||
      void AdjOp(const Field &in, Field &out) {
 | 
			
		||||
	Op(in,out);
 | 
			
		||||
      };
 | 
			
		||||
 
 | 
			
		||||
@@ -31,7 +31,7 @@ public:
 | 
			
		||||
  bigfloat(const double d) { mpf_init_set_d(x, d); }  
 | 
			
		||||
  bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
 | 
			
		||||
  ~bigfloat(void) { mpf_clear(x); }
 | 
			
		||||
  operator const double (void) const { return (double)mpf_get_d(x); }
 | 
			
		||||
  operator double (void) const { return (double)mpf_get_d(x); }
 | 
			
		||||
  static void setDefaultPrecision(unsigned long dprec) {
 | 
			
		||||
    unsigned long bprec =  (unsigned long)(3.321928094 * (double)dprec);
 | 
			
		||||
    mpf_set_default_prec(bprec);
 | 
			
		||||
 
 | 
			
		||||
@@ -345,20 +345,30 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
 | 
			
		||||
// REDUCE FIXME must be a cleaner implementation
 | 
			
		||||
       friend inline ComplexD Reduce(const vComplexD & in)
 | 
			
		||||
       { 
 | 
			
		||||
	 vComplexD v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   zvec v;
 | 
			
		||||
	   double f[sizeof(zvec)/sizeof(double)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
	   
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 return ComplexD(in.v[0],in.v[1]);
 | 
			
		||||
	 v1=in;
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vComplexD v1;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 return ComplexD(v1.v[0],v1.v[1]);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
	 return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); // avx 256; quad complex single
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif 
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#error
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v = v1.v;
 | 
			
		||||
	 return ComplexD(conv.f[0],conv.f[1]);
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        // Unary negation
 | 
			
		||||
 
 | 
			
		||||
@@ -234,26 +234,34 @@ namespace Grid {
 | 
			
		||||
	}
 | 
			
		||||
	friend inline ComplexF Reduce(const vComplexF & in)
 | 
			
		||||
	{
 | 
			
		||||
	 vComplexF v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   cvec v;
 | 
			
		||||
	   float f[sizeof(cvec)/sizeof(float)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 vComplexF v1;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 return ComplexF(v1.v[0],v1.v[1]);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vComplexF v1,v2;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); // avx 256; quad complex single
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return ComplexF(v1.v[0],v1.v[1]);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
            return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
 | 
			
		||||
	 permute(v1,in,0); // avx512 octo-complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#error
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v = v1.v;
 | 
			
		||||
	 return ComplexF(conv.f[0],conv.f[1]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        friend inline vComplexF operator * (const ComplexF &a, vComplexF b){
 | 
			
		||||
 
 | 
			
		||||
@@ -210,25 +210,33 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
       friend inline RealD Reduce(const vRealD & in)
 | 
			
		||||
       {
 | 
			
		||||
	 vRealD v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   dvec v;
 | 
			
		||||
	   double f[sizeof(dvec)/sizeof(double)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 vRealD v1;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired real double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 return RealD(v1.v[0]);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vRealD v1,v2;
 | 
			
		||||
	 permute(v1,in,0); // avx 256; quad double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return v1.v[0];
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
            return _mm512_reduce_add_pd(in.v);
 | 
			
		||||
	 permute(v1,in,0); // avx 512; octo-double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v=v1.v;
 | 
			
		||||
	 return conv.f[0];
 | 
			
		||||
       }
 | 
			
		||||
 | 
			
		||||
        // *=,+=,-= operators
 | 
			
		||||
 
 | 
			
		||||
@@ -243,29 +243,39 @@ friend inline void vstore(const vRealF &ret, float *a){
 | 
			
		||||
        }
 | 
			
		||||
       friend inline RealF Reduce(const vRealF & in)
 | 
			
		||||
       {
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 vRealF v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   fvec v;
 | 
			
		||||
	   float f[sizeof(fvec)/sizeof(double)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 permute(v1,in,0); // sse 128; quad single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return v1.v[0];
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vRealF v1,v2;
 | 
			
		||||
	 permute(v1,in,0); // avx 256; octo-double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return v1.v[0];
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
            return _mm512_reduce_add_ps(in.v);
 | 
			
		||||
	 permute(v1,in,0); // avx 256; octo-double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,3); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v=v1.v;
 | 
			
		||||
	 return conv.f[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // *=,+=,-= operators
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
  std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
  std::vector<int> latt_size  ({16,16,16,32});
 | 
			
		||||
  std::vector<int> latt_size   = GridDefaultLatt();
 | 
			
		||||
  std::vector<int> clatt_size  ({4,4,4,8});
 | 
			
		||||
  int orthodir=3;
 | 
			
		||||
  int orthosz =latt_size[orthodir];
 | 
			
		||||
@@ -44,13 +44,15 @@ int main (int argc, char ** argv)
 | 
			
		||||
  // (1+2+3)=6 = N(N-1)/2 terms
 | 
			
		||||
  LatticeComplex Plaq(&Fine);
 | 
			
		||||
  LatticeComplex cPlaq(&Coarse);
 | 
			
		||||
 | 
			
		||||
  Plaq = zero;
 | 
			
		||||
#if 1
 | 
			
		||||
  for(int mu=1;mu<Nd;mu++){
 | 
			
		||||
    for(int nu=0;nu<mu;nu++){
 | 
			
		||||
      Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
#endif
 | 
			
		||||
  double vol = Fine.gSites();
 | 
			
		||||
  Complex PlaqScale(1.0/vol/6.0/3.0);
 | 
			
		||||
  std::cout <<"PlaqScale" << PlaqScale<<std::endl;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user