mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-25 13:15:55 +01:00
Verified
This commit is contained in:
parent
bf7e3f20d4
commit
1c30e9a961
@ -115,7 +115,7 @@ int main (int argc, char ** argv)
|
|||||||
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
|
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
||||||
int ncall=100000;
|
int ncall=1000;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
Ds.Dhop(src,result,0);
|
Ds.Dhop(src,result,0);
|
||||||
|
@ -338,12 +338,12 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
|
|||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), sss, sss, in, out);
|
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), sss, sss, in, out);
|
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -228,9 +228,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
|||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
|
|
||||||
int LLs = in._grid->_rdimensions[0];
|
int LLs = in._grid->_rdimensions[0];
|
||||||
|
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchange(in,compressor);
|
||||||
|
|
||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
@ -241,28 +239,11 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
|||||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
|
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#if 1
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
for(int i=0;i<10;i++){
|
|
||||||
int len = U._grid->oSites();
|
|
||||||
int me,mywork,myoff;
|
|
||||||
GridThread::GetWorkBarrier(len,me, mywork,myoff);
|
|
||||||
for (int ss = myoff; ss < myoff+mywork; ss++) {
|
|
||||||
int sU=ss;
|
|
||||||
int sF=LLs*sU;
|
|
||||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
|
||||||
}
|
|
||||||
GridThread::ThreadBarrier();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,32 +186,31 @@ template <class Impl>
|
|||||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int LLs, int sU,
|
||||||
const FermionField &in, FermionField &out) {
|
const FermionField &in, FermionField &out) {
|
||||||
int dag(1);
|
|
||||||
SiteSpinor naik;
|
SiteSpinor naik;
|
||||||
SiteSpinor naive;
|
SiteSpinor naive;
|
||||||
int oneLink =0;
|
int oneLink =0;
|
||||||
int threeLink=1;
|
int threeLink=1;
|
||||||
Real scale;
|
int dag=1;
|
||||||
if(dag) scale = -1.0;
|
|
||||||
else scale = 1.0;
|
|
||||||
|
|
||||||
switch(Opt) {
|
switch(Opt) {
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
|
//FIXME; move the sign into the Asm routine
|
||||||
case OptInlineAsm:
|
case OptInlineAsm:
|
||||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
||||||
|
for(int s=0;s<LLs;s++) {
|
||||||
|
int sF=s+LLs*sU;
|
||||||
|
out._odata[sF]=-out._odata[sF];
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case OptHandUnroll:
|
case OptHandUnroll:
|
||||||
DhopSiteDepthHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||||
break;
|
break;
|
||||||
case OptGeneric:
|
case OptGeneric:
|
||||||
for(int s=0;s<LLs;s++){
|
for(int s=0;s<LLs;s++){
|
||||||
|
|
||||||
int sF=s+LLs*sU;
|
int sF=s+LLs*sU;
|
||||||
|
|
||||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||||
out._odata[sF] =scale*(naive+naik);
|
out._odata[sF] =-naive-naik;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -223,17 +222,13 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out) {
|
int sU, const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
int dag(0);
|
|
||||||
|
|
||||||
int oneLink =0;
|
int oneLink =0;
|
||||||
int threeLink=1;
|
int threeLink=1;
|
||||||
SiteSpinor naik;
|
SiteSpinor naik;
|
||||||
SiteSpinor naive;
|
SiteSpinor naive;
|
||||||
static int once;
|
int dag=0;
|
||||||
int sF=LLs*sU;
|
|
||||||
|
|
||||||
switch(Opt) {
|
switch(Opt) {
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
case OptInlineAsm:
|
case OptInlineAsm:
|
||||||
@ -241,22 +236,23 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
|
|||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case OptHandUnroll:
|
case OptHandUnroll:
|
||||||
DhopSiteDepthHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||||
break;
|
break;
|
||||||
case OptGeneric:
|
case OptGeneric:
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
for(int s=0;s<LLs;s++){
|
||||||
|
int sF=LLs*sU+s;
|
||||||
|
// assert(sF<in._odata.size());
|
||||||
|
// assert(sU< U._odata.size());
|
||||||
|
// assert(sF>=0); assert(sU>=0);
|
||||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||||
out._odata[sF] =naive+naik;
|
out._odata[sF] =naive+naik;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -57,11 +57,11 @@ public:
|
|||||||
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
|
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
|
||||||
|
|
||||||
|
|
||||||
void DhopSiteDepthHandLocal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
|
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
|
||||||
|
|
||||||
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
|
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
|
||||||
int Lls, int sU, const FermionField &in, FermionField &out, int dag);
|
int LLs, int sU, const FermionField &in, FermionField &out, int dag);
|
||||||
|
|
||||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
|
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
int LLs, int sU, const FermionField &in, FermionField &out);
|
||||||
|
@ -762,6 +762,14 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
|||||||
VPERM0(Chi_11,Chi_11) \
|
VPERM0(Chi_11,Chi_11) \
|
||||||
VPERM0(Chi_12,Chi_12) );
|
VPERM0(Chi_12,Chi_12) );
|
||||||
|
|
||||||
|
#define PERMUTE01 \
|
||||||
|
if ( p0 ) { PERMUTE_DIR3; }\
|
||||||
|
if ( p1 ) { PERMUTE_DIR2; }
|
||||||
|
|
||||||
|
#define PERMUTE23 \
|
||||||
|
if ( p2 ) { PERMUTE_DIR1; }\
|
||||||
|
if ( p3 ) { PERMUTE_DIR0; }
|
||||||
|
|
||||||
// This is the single precision 5th direction vectorised kernel
|
// This is the single precision 5th direction vectorised kernel
|
||||||
|
|
||||||
#include <simd/Intel512single.h>
|
#include <simd/Intel512single.h>
|
||||||
@ -790,30 +798,45 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
|||||||
int sF=s+LLs*sU;
|
int sF=s+LLs*sU;
|
||||||
// Xp, Yp, Zp, Tp
|
// Xp, Yp, Zp, Tp
|
||||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_ADD_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_ADD_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_ADD_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
addr0 = (uint64_t) &out._odata[sF];
|
addr0 = (uint64_t) &out._odata[sF];
|
||||||
REDUCE(addr0);
|
REDUCEa(addr0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
assert(0);
|
assert(0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#include <simd/Intel512double.h>
|
#include <simd/Intel512double.h>
|
||||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
@ -840,23 +863,39 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
|||||||
int sF=s+LLs*sU;
|
int sF=s+LLs*sU;
|
||||||
// Xp, Yp, Zp, Tp
|
// Xp, Yp, Zp, Tp
|
||||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_ADD_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_ADD_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHIa(addr0,addr1);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
PERMUTE01;
|
||||||
|
MULT_ADD_XYZT(gauge0,gauge1);
|
||||||
|
LOAD_CHIa(addr2,addr3);
|
||||||
|
PERMUTE23;
|
||||||
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
addr0 = (uint64_t) &out._odata[sF];
|
addr0 = (uint64_t) &out._odata[sF];
|
||||||
REDUCE(addr0);
|
REDUCEa(addr0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -91,10 +91,10 @@ namespace QCD {
|
|||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out, int dag) {
|
int sU, const FermionField &in, FermionField &out, int dag)
|
||||||
|
{
|
||||||
SiteSpinor naik;
|
SiteSpinor naik;
|
||||||
SiteSpinor naive;
|
SiteSpinor naive;
|
||||||
int oneLink =0;
|
int oneLink =0;
|
||||||
@ -105,20 +105,17 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
if(dag) scale = -1.0;
|
if(dag) scale = -1.0;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
for(int s=0;s<LLs;s++){
|
||||||
|
|
||||||
int sF=s+LLs*sU;
|
int sF=s+LLs*sU;
|
||||||
DhopSiteDepthHandLocal(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
DhopSiteDepthHand(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||||
DhopSiteDepthHandLocal(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
DhopSiteDepthHand(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||||
out._odata[sF] =scale*(naive+naik);
|
out._odata[sF] =scale*(naive+naik);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteDepthHandLocal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
SiteSpinor *buf, int sF,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) {
|
int sU, const FermionField &in, SiteSpinor &out,int threeLink)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -301,7 +298,6 @@ void StaggeredKernels<Impl>::DhopSiteDepthHandLocal(StencilImpl &st, LebesgueOrd
|
|||||||
vstream(out()()(2),even_2+odd_2);
|
vstream(out()()(2),even_2+odd_2);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
|
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
|
||||||
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
|
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
|
||||||
|
@ -57,34 +57,33 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
std::vector<int> seeds({1,2,3,4});
|
std::vector<int> seeds({1,2,3,4});
|
||||||
/*
|
|
||||||
GridParallelRNG pRNG4(UGrid);
|
GridParallelRNG pRNG4(UGrid);
|
||||||
GridParallelRNG pRNG5(FGrid);
|
GridParallelRNG pRNG5(FGrid);
|
||||||
pRNG4.SeedFixedIntegers(seeds);
|
pRNG4.SeedFixedIntegers(seeds);
|
||||||
pRNG5.SeedFixedIntegers(seeds);
|
pRNG5.SeedFixedIntegers(seeds);
|
||||||
*/
|
|
||||||
typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;
|
typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;
|
||||||
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
|
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
|
||||||
typename ImprovedStaggeredFermion5DR::ImplParams params;
|
typename ImprovedStaggeredFermion5DR::ImplParams params;
|
||||||
|
|
||||||
FermionField src (FGrid); src=zero;
|
FermionField src (FGrid);
|
||||||
|
random(pRNG5,src);
|
||||||
// random(pRNG5,src);
|
|
||||||
/*
|
/*
|
||||||
std::vector<int> site({0,0,0,0,0});
|
std::vector<int> site({0,1,2,0,0});
|
||||||
ColourVector cv = zero;
|
ColourVector cv = zero;
|
||||||
cv()()(0)=1.0;
|
cv()()(0)=1.0;
|
||||||
src = zero;
|
src = zero;
|
||||||
pokeSite(cv,src,site);
|
pokeSite(cv,src,site);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
FermionField result(FGrid); result=zero;
|
FermionField result(FGrid); result=zero;
|
||||||
FermionField tmp(FGrid); tmp=zero;
|
FermionField tmp(FGrid); tmp=zero;
|
||||||
FermionField err(FGrid); tmp=zero;
|
FermionField err(FGrid); tmp=zero;
|
||||||
FermionField phi (FGrid); phi=1.0;//random(pRNG5,phi);
|
FermionField phi (FGrid); random(pRNG5,phi);
|
||||||
FermionField chi (FGrid); chi=1.0;//random(pRNG5,chi);
|
FermionField chi (FGrid); random(pRNG5,chi);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); Umu=1.0; //SU3::HotConfiguration(pRNG4,Umu);
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
SU3::HotConfiguration(pRNG4,Umu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
for(int mu=1;mu<4;mu++){
|
for(int mu=1;mu<4;mu++){
|
||||||
@ -127,7 +126,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling vectorised staggered operator"<<std::endl;
|
std::cout<<GridLogMessage << "Calling vectorised staggered operator"<<std::endl;
|
||||||
|
|
||||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
|
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user