mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
Reworking to keep intel compiler happy
This commit is contained in:
parent
ee8cf77071
commit
efc0d1e0b9
@ -16,7 +16,7 @@ Grid_wilson_cg_unprec_LDADD = -lGrid
|
|||||||
Grid_comms_SOURCES = Grid_comms.cc
|
Grid_comms_SOURCES = Grid_comms.cc
|
||||||
Grid_comms_LDADD = -lGrid
|
Grid_comms_LDADD = -lGrid
|
||||||
|
|
||||||
Grid_su3_SOURCES = Grid_su3.cc
|
Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc
|
||||||
Grid_su3_LDADD = -lGrid
|
Grid_su3_LDADD = -lGrid
|
||||||
|
|
||||||
Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
|
Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
|
||||||
|
@ -25,7 +25,7 @@ namespace Grid {
|
|||||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
|
template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
|
||||||
public:
|
public:
|
||||||
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2);
|
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2)=0;
|
||||||
void AdjOp(const Field &in, Field &out) {
|
void AdjOp(const Field &in, Field &out) {
|
||||||
Op(in,out);
|
Op(in,out);
|
||||||
};
|
};
|
||||||
|
@ -31,7 +31,7 @@ public:
|
|||||||
bigfloat(const double d) { mpf_init_set_d(x, d); }
|
bigfloat(const double d) { mpf_init_set_d(x, d); }
|
||||||
bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
|
bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
|
||||||
~bigfloat(void) { mpf_clear(x); }
|
~bigfloat(void) { mpf_clear(x); }
|
||||||
operator const double (void) const { return (double)mpf_get_d(x); }
|
operator double (void) const { return (double)mpf_get_d(x); }
|
||||||
static void setDefaultPrecision(unsigned long dprec) {
|
static void setDefaultPrecision(unsigned long dprec) {
|
||||||
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
|
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
|
||||||
mpf_set_default_prec(bprec);
|
mpf_set_default_prec(bprec);
|
||||||
|
@ -345,20 +345,30 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
|||||||
// REDUCE FIXME must be a cleaner implementation
|
// REDUCE FIXME must be a cleaner implementation
|
||||||
friend inline ComplexD Reduce(const vComplexD & in)
|
friend inline ComplexD Reduce(const vComplexD & in)
|
||||||
{
|
{
|
||||||
|
vComplexD v1,v2;
|
||||||
|
union {
|
||||||
|
zvec v;
|
||||||
|
double f[sizeof(zvec)/sizeof(double)];
|
||||||
|
} conv;
|
||||||
|
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
return ComplexD(in.v[0],in.v[1]);
|
v1=in;
|
||||||
#endif
|
#endif
|
||||||
#if defined(AVX1) || defined (AVX2)
|
#if defined(AVX1) || defined (AVX2)
|
||||||
vComplexD v1;
|
|
||||||
permute(v1,in,0); // sse 128; paired complex single
|
permute(v1,in,0); // sse 128; paired complex single
|
||||||
v1=v1+in;
|
v1=v1+in;
|
||||||
return ComplexD(v1.v[0],v1.v[1]);
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
|
permute(v1,in,0); // sse 128; paired complex single
|
||||||
|
v1=v1+in;
|
||||||
|
permute(v2,v1,1); // avx 256; quad complex single
|
||||||
|
v1=v1+v2;
|
||||||
#endif
|
#endif
|
||||||
#ifdef QPX
|
#ifdef QPX
|
||||||
|
#error
|
||||||
#endif
|
#endif
|
||||||
|
conv.v = v1.v;
|
||||||
|
return ComplexD(conv.f[0],conv.f[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unary negation
|
// Unary negation
|
||||||
|
@ -234,26 +234,34 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
friend inline ComplexF Reduce(const vComplexF & in)
|
friend inline ComplexF Reduce(const vComplexF & in)
|
||||||
{
|
{
|
||||||
|
vComplexF v1,v2;
|
||||||
|
union {
|
||||||
|
cvec v;
|
||||||
|
float f[sizeof(cvec)/sizeof(float)];
|
||||||
|
} conv;
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
vComplexF v1;
|
|
||||||
permute(v1,in,0); // sse 128; paired complex single
|
permute(v1,in,0); // sse 128; paired complex single
|
||||||
v1=v1+in;
|
v1=v1+in;
|
||||||
return ComplexF(v1.v[0],v1.v[1]);
|
|
||||||
#endif
|
#endif
|
||||||
#if defined(AVX1) || defined (AVX2)
|
#if defined(AVX1) || defined (AVX2)
|
||||||
vComplexF v1,v2;
|
|
||||||
permute(v1,in,0); // sse 128; paired complex single
|
permute(v1,in,0); // sse 128; paired complex single
|
||||||
v1=v1+in;
|
v1=v1+in;
|
||||||
permute(v2,v1,1); // avx 256; quad complex single
|
permute(v2,v1,1); // avx 256; quad complex single
|
||||||
v1=v1+v2;
|
v1=v1+v2;
|
||||||
return ComplexF(v1.v[0],v1.v[1]);
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
|
permute(v1,in,0); // avx512 octo-complex single
|
||||||
|
v1=v1+in;
|
||||||
|
permute(v2,v1,1);
|
||||||
|
v1=v1+v2;
|
||||||
|
permute(v2,v1,2);
|
||||||
|
v1=v1+v2;
|
||||||
#endif
|
#endif
|
||||||
#ifdef QPX
|
#ifdef QPX
|
||||||
#error
|
#error
|
||||||
#endif
|
#endif
|
||||||
|
conv.v = v1.v;
|
||||||
|
return ComplexF(conv.f[0],conv.f[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend inline vComplexF operator * (const ComplexF &a, vComplexF b){
|
friend inline vComplexF operator * (const ComplexF &a, vComplexF b){
|
||||||
|
@ -210,25 +210,33 @@ namespace Grid {
|
|||||||
|
|
||||||
friend inline RealD Reduce(const vRealD & in)
|
friend inline RealD Reduce(const vRealD & in)
|
||||||
{
|
{
|
||||||
|
vRealD v1,v2;
|
||||||
|
union {
|
||||||
|
dvec v;
|
||||||
|
double f[sizeof(dvec)/sizeof(double)];
|
||||||
|
} conv;
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
vRealD v1;
|
|
||||||
permute(v1,in,0); // sse 128; paired real double
|
permute(v1,in,0); // sse 128; paired real double
|
||||||
v1=v1+in;
|
v1=v1+in;
|
||||||
return RealD(v1.v[0]);
|
|
||||||
#endif
|
#endif
|
||||||
#if defined(AVX1) || defined (AVX2)
|
#if defined(AVX1) || defined (AVX2)
|
||||||
vRealD v1,v2;
|
|
||||||
permute(v1,in,0); // avx 256; quad double
|
permute(v1,in,0); // avx 256; quad double
|
||||||
v1=v1+in;
|
v1=v1+in;
|
||||||
permute(v2,v1,1);
|
permute(v2,v1,1);
|
||||||
v1=v1+v2;
|
v1=v1+v2;
|
||||||
return v1.v[0];
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
return _mm512_reduce_add_pd(in.v);
|
permute(v1,in,0); // avx 512; octo-double
|
||||||
|
v1=v1+in;
|
||||||
|
permute(v2,v1,1);
|
||||||
|
v1=v1+v2;
|
||||||
|
permute(v2,v1,2);
|
||||||
|
v1=v1+v2;
|
||||||
#endif
|
#endif
|
||||||
#ifdef QPX
|
#ifdef QPX
|
||||||
#endif
|
#endif
|
||||||
|
conv.v=v1.v;
|
||||||
|
return conv.f[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// *=,+=,-= operators
|
// *=,+=,-= operators
|
||||||
|
@ -243,29 +243,39 @@ friend inline void vstore(const vRealF &ret, float *a){
|
|||||||
}
|
}
|
||||||
friend inline RealF Reduce(const vRealF & in)
|
friend inline RealF Reduce(const vRealF & in)
|
||||||
{
|
{
|
||||||
#ifdef SSE4
|
|
||||||
vRealF v1,v2;
|
vRealF v1,v2;
|
||||||
|
union {
|
||||||
|
fvec v;
|
||||||
|
float f[sizeof(fvec)/sizeof(double)];
|
||||||
|
} conv;
|
||||||
|
#ifdef SSE4
|
||||||
permute(v1,in,0); // sse 128; quad single
|
permute(v1,in,0); // sse 128; quad single
|
||||||
v1=v1+in;
|
v1=v1+in;
|
||||||
permute(v2,v1,1);
|
permute(v2,v1,1);
|
||||||
v1=v1+v2;
|
v1=v1+v2;
|
||||||
return v1.v[0];
|
|
||||||
#endif
|
#endif
|
||||||
#if defined(AVX1) || defined (AVX2)
|
#if defined(AVX1) || defined (AVX2)
|
||||||
vRealF v1,v2;
|
|
||||||
permute(v1,in,0); // avx 256; octo-double
|
permute(v1,in,0); // avx 256; octo-double
|
||||||
v1=v1+in;
|
v1=v1+in;
|
||||||
permute(v2,v1,1);
|
permute(v2,v1,1);
|
||||||
v1=v1+v2;
|
v1=v1+v2;
|
||||||
permute(v2,v1,2);
|
permute(v2,v1,2);
|
||||||
v1=v1+v2;
|
v1=v1+v2;
|
||||||
return v1.v[0];
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
return _mm512_reduce_add_ps(in.v);
|
permute(v1,in,0); // avx 256; octo-double
|
||||||
|
v1=v1+in;
|
||||||
|
permute(v2,v1,1);
|
||||||
|
v1=v1+v2;
|
||||||
|
permute(v2,v1,2);
|
||||||
|
v1=v1+v2;
|
||||||
|
permute(v2,v1,3);
|
||||||
|
v1=v1+v2;
|
||||||
#endif
|
#endif
|
||||||
#ifdef QPX
|
#ifdef QPX
|
||||||
#endif
|
#endif
|
||||||
|
conv.v=v1.v;
|
||||||
|
return conv.f[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// *=,+=,-= operators
|
// *=,+=,-= operators
|
||||||
|
@ -13,7 +13,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
std::vector<int> latt_size ({16,16,16,32});
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> clatt_size ({4,4,4,8});
|
std::vector<int> clatt_size ({4,4,4,8});
|
||||||
int orthodir=3;
|
int orthodir=3;
|
||||||
int orthosz =latt_size[orthodir];
|
int orthosz =latt_size[orthodir];
|
||||||
@ -44,13 +44,15 @@ int main (int argc, char ** argv)
|
|||||||
// (1+2+3)=6 = N(N-1)/2 terms
|
// (1+2+3)=6 = N(N-1)/2 terms
|
||||||
LatticeComplex Plaq(&Fine);
|
LatticeComplex Plaq(&Fine);
|
||||||
LatticeComplex cPlaq(&Coarse);
|
LatticeComplex cPlaq(&Coarse);
|
||||||
|
|
||||||
Plaq = zero;
|
Plaq = zero;
|
||||||
|
#if 1
|
||||||
for(int mu=1;mu<Nd;mu++){
|
for(int mu=1;mu<Nd;mu++){
|
||||||
for(int nu=0;nu<mu;nu++){
|
for(int nu=0;nu<mu;nu++){
|
||||||
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
|
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
double vol = Fine.gSites();
|
double vol = Fine.gSites();
|
||||||
Complex PlaqScale(1.0/vol/6.0/3.0);
|
Complex PlaqScale(1.0/vol/6.0/3.0);
|
||||||
std::cout <<"PlaqScale" << PlaqScale<<std::endl;
|
std::cout <<"PlaqScale" << PlaqScale<<std::endl;
|
||||||
|
Loading…
Reference in New Issue
Block a user