1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-11 11:56:56 +01:00

Merge commit '899ca41cb8c8f47771bfd37cd895cbc2184e5560'

This commit is contained in:
2015-11-16 18:16:25 +00:00
55 changed files with 5608 additions and 8801 deletions

View File

@ -86,16 +86,16 @@ Test_dwf_hdcr_SOURCES=Test_dwf_hdcr.cc
Test_dwf_hdcr_LDADD=-lGrid
#Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
#Test_dwf_lanczos_LDADD=-lGrid
Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
Test_dwf_lanczos_LDADD=-lGrid
Test_gamma_SOURCES=Test_gamma.cc
Test_gamma_LDADD=-lGrid
#Test_gparity_SOURCES=Test_gparity.cc
#Test_gparity_LDADD=-lGrid
Test_gparity_SOURCES=Test_gparity.cc
Test_gparity_LDADD=-lGrid
#Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
@ -190,6 +190,10 @@ Test_stencil_SOURCES=Test_stencil.cc
Test_stencil_LDADD=-lGrid
Test_synthetic_lanczos_SOURCES=Test_synthetic_lanczos.cc
Test_synthetic_lanczos_LDADD=-lGrid
Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc
Test_wilson_cg_prec_LDADD=-lGrid

View File

@ -54,27 +54,27 @@ int main (int argc, char ** argv)
TComplex cm;
for(int dir=0;dir<Nd;dir++){
if ( dir!=1 ) continue;
// if ( dir!=1 ) continue;
for(int shift=0;shift<latt_size[dir];shift++){
std::cout<<GridLogMessage<<"Shifting by "<<shift<<" in direction"<<dir<<std::endl;
// std::cout<<GridLogMessage<<"Even grid"<<std::endl;
std::cout<<GridLogMessage<<"Even grid"<<std::endl;
ShiftUe = Cshift(Ue,dir,shift); // Shift everything cb by cb
// std::cout<<GridLogMessage << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
std::cout<<GridLogMessage << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
// std::cout<<GridLogMessage<<"Odd grid"<<std::endl;
std::cout<<GridLogMessage<<"Odd grid"<<std::endl;
ShiftUo = Cshift(Uo,dir,shift);
// std::cout<<GridLogMessage << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
std::cout<<GridLogMessage << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
// std::cout<<GridLogMessage<<"Recombined Even/Odd grids"<<std::endl;
std::cout<<GridLogMessage<<"Recombined Even/Odd grids"<<std::endl;
setCheckerboard(rbShiftU,ShiftUe);
setCheckerboard(rbShiftU,ShiftUo);
// std::cout<<GridLogMessage << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
std::cout<<GridLogMessage << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
// std::cout<<GridLogMessage<<"Full grid shift"<<std::endl;
std::cout<<GridLogMessage<<"Full grid shift"<<std::endl;
ShiftU = Cshift(U,dir,shift); // Shift everything
// std::cout<<GridLogMessage << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
std::cout<<GridLogMessage << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
std::vector<int> coor(4);
@ -105,18 +105,18 @@ int main (int argc, char ** argv)
Fine.CoorFromIndex(peer,index,latt_size);
if (nrm > 0){
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exit(-1);
}
}}}}
int exx=0;
std::cout<<GridLogMessage << "Checking the checkerboard shift"<<std::endl;
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
@ -144,20 +144,21 @@ int main (int argc, char ** argv)
Fine.CoorFromIndex(peer,index,latt_size);
if (nrm > 0){
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exit(-1);
} else if (0) {
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exx=1;
} else if (1) {
std::cout<<GridLogMessage<<"PASS shift "<< shift<<" in dir "<< dir
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
}
}}}}
if (exx) exit(-1);
}
}

View File

@ -35,21 +35,26 @@ int main (int argc, char ** argv)
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
const int Nk = 10;
const int Np = 1;
RealD enorm = 1.0;
RealD vthrs = 1;
const int Nit= 1000;
const int Nk = 30;
const int Np = 10;
const int Nm = Nk+Np;
const int MaxIt= 10000;
RealD resid = 1.0e-8;
ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,PolyX,
Nk,Np,enorm,vthrs,Nit);
std::vector<double> Coeffs(1,1.0);
Polynomial<LatticeFermion> PolyX(Coeffs);
ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,PolyX,Nk,Nm,resid,MaxIt);
std::vector<RealD> eval(Nk);
std::vector<LatticeFermion> evec(Nk,FGrid);
std::vector<RealD> eval(Nm);
std::vector<LatticeFermion> evec(Nm,FGrid);
for(int i=0;i<Nm;i++){
std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
};
int Nconv;
IRL.calc(eval,evec,
src,
Nsbt,
Nconv);

View File

@ -298,7 +298,7 @@ int main (int argc, char ** argv)
c = scm()(1,1)(1,2);
scm()(1,1)(2,1) = c;
pokeIndex<ColourIndex> (c_m,c,0,0);
// pokeIndex<ColourIndex> (c_m,c,0,0);
}
FooBar = Bar;

View File

@ -8,6 +8,10 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
// typedef LatticeColourMatrix Field;
typedef LatticeComplex Field;
typedef typename Field::vector_object vobj;
typedef typename vobj::scalar_object sobj;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
@ -18,23 +22,40 @@ int main (int argc, char ** argv)
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
GridParallelRNG fRNG(&Fine);
// fRNG.SeedRandomDevice();
std::vector<int> seeds({1,2,3,4});
fRNG.SeedFixedIntegers(seeds);
LatticeColourMatrix Foo(&Fine);
LatticeColourMatrix Bar(&Fine);
LatticeColourMatrix Check(&Fine);
LatticeColourMatrix Diff(&Fine);
Field Foo(&Fine);
Field Bar(&Fine);
Field Check(&Fine);
Field Diff(&Fine);
LatticeComplex lex(&Fine);
lex = zero;
random(fRNG,Foo);
gaussian(fRNG,Bar);
/*
Integer stride =1000;
{
double nrm;
LatticeComplex coor(&Fine);
for(int d=0;d<Nd;d++){
LatticeCoordinate(coor,d);
lex = lex + coor*stride;
stride=stride/10;
}
Foo=lex;
}
*/
for(int dir=0;dir<4;dir++){
for(int disp=0;disp<Fine._fdimensions[dir];disp++){
std::cout<<GridLogMessage << "Using stencil to shift dim "<<dir<< " by "<<disp<<std::endl;
std::cout<< std::fixed <<GridLogMessage << "Using stencil to shift dim "<<dir<< " by "<<disp<<std::endl;
// start to test the Cartesian npoint stencil infrastructure
int npoint=1;
std::vector<int> directions(npoint,dir);
@ -48,8 +69,8 @@ int main (int argc, char ** argv)
ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
}
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
SimpleCompressor<vColourMatrix> compress;
std::vector<vobj,alignedAllocator<vobj> > comm_buf(myStencil._unified_buffer_size);
SimpleCompressor<vobj> compress;
myStencil.HaloExchange(Foo,comm_buf,compress);
Bar = Cshift(Foo,dir,disp);
@ -75,9 +96,114 @@ int main (int argc, char ** argv)
Real nrm = norm2(Diff);
std::cout<<GridLogMessage<<"N2diff ="<<nrm<<" "<<nrmC<<" " <<nrmB<<std::endl;
Real snrmC =0;
Real snrmB =0;
Real snrm =0;
std::vector<int> coor(4);
for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
for(coor[2]=0;coor[2]<latt_size[2]/mpi_layout[2];coor[2]++){
for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
RealD diff;
sobj check,bar;
peekSite(check,Check,coor);
peekSite(bar,Bar,coor);
sobj ddiff;
ddiff = check -bar;
diff =norm2(ddiff);
if ( diff > 0){
std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]
<<") " <<check<<" vs "<<bar<<std::endl;
}
}}}}
}
}
std::cout<<GridLogMessage<<"Testing RedBlack\n ";
Field EFoo(&rbFine);
Field OFoo(&rbFine);
Field ECheck(&rbFine);
Field OCheck(&rbFine);
pickCheckerboard(Even,EFoo,Foo);
pickCheckerboard(Odd ,OFoo,Foo);
for(int dir=0;dir<4;dir++){
for(int disp=0;disp<rbFine._fdimensions[dir];disp++){
std::cout<<GridLogMessage << "Using stencil to shift rb dim "<<dir<< " by "<<disp<<std::endl;
// start to test the Cartesian npoint stencil infrastructure
int npoint=1;
std::vector<int> directions(npoint,dir);
std::vector<int> displacements(npoint,disp);
CartesianStencil EStencil(&rbFine,npoint,Even,directions,displacements);
CartesianStencil OStencil(&rbFine,npoint,Odd,directions,displacements);
std::vector<int> ocoor(4);
for(int o=0;o<Fine.oSites();o++){
Fine.oCoorFromOindex(ocoor,o);
ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
}
std::vector<vobj,alignedAllocator<vobj> > Ecomm_buf(EStencil._unified_buffer_size);
std::vector<vobj,alignedAllocator<vobj> > Ocomm_buf(OStencil._unified_buffer_size);
SimpleCompressor<vobj> compress;
EStencil.HaloExchange(EFoo,Ecomm_buf,compress);
OStencil.HaloExchange(OFoo,Ocomm_buf,compress);
Bar = Cshift(Foo,dir,disp);
if ( disp & 0x1 ) {
ECheck.checkerboard = Even;
OCheck.checkerboard = Odd;
} else {
ECheck.checkerboard = Odd;
OCheck.checkerboard = Even;
}
// Implement a stencil code that should agree with that darn cshift!
for(int i=0;i<OCheck._grid->oSites();i++){
int permute_type;
StencilEntry *SE;
SE = EStencil.GetEntry(permute_type,0,i);
std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
if ( SE->_is_local && SE->_permute )
permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type);
else if (SE->_is_local)
OCheck._odata[i] = EFoo._odata[SE->_offset];
else
OCheck._odata[i] = Ecomm_buf[SE->_offset];
}
for(int i=0;i<ECheck._grid->oSites();i++){
int permute_type;
StencilEntry *SE;
SE = OStencil.GetEntry(permute_type,0,i);
std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
if ( SE->_is_local && SE->_permute )
permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type);
else if (SE->_is_local)
ECheck._odata[i] = OFoo._odata[SE->_offset];
else
ECheck._odata[i] = Ocomm_buf[SE->_offset];
}
setCheckerboard(Check,ECheck);
setCheckerboard(Check,OCheck);
Real nrmC = norm2(Check);
Real nrmB = norm2(Bar);
Diff = Check-Bar;
Real nrm = norm2(Diff);
std::cout<<GridLogMessage<<"RB N2diff ="<<nrm<<" "<<nrmC<<" " <<nrmB<<std::endl;
std::vector<int> coor(4);
for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
@ -85,33 +211,22 @@ int main (int argc, char ** argv)
for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
Complex diff;
ColourMatrix check,bar;
RealD diff;
sobj check,bar;
peekSite(check,Check,coor);
peekSite(bar,Bar,coor);
for(int r=0;r<3;r++){
for(int c=0;c<3;c++){
diff =check()()(r,c)-bar()()(r,c);
double nn=real(conjugate(diff)*diff);
if ( nn > 0){
printf("Coor (%d %d %d %d) \t rc %d%d \t %le (%le,%le) %le\n",
coor[0],coor[1],coor[2],coor[3],r,c,
nn,
real(check()()(r,c)),
imag(check()()(r,c)),
real(bar()()(r,c))
);
}
snrmC=snrmC+real(conjugate(check()()(r,c))*check()()(r,c));
snrmB=snrmB+real(conjugate(bar()()(r,c))*bar()()(r,c));
snrm=snrm+nn;
}}
sobj ddiff;
ddiff = check -bar;
diff =norm2(ddiff);
if ( diff > 0){
std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3] <<") "
<<"shift "<<disp<<" dir "<< dir
<< " stencil impl " <<check<<" vs cshift impl "<<bar<<std::endl;
}
}}}}
std::cout<<GridLogMessage<<"scalar N2diff = "<<snrm<<" " <<snrmC<<" "<<snrmB<<std::endl;
}
}

View File

@ -0,0 +1,123 @@
#include <fenv.h>
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
static int
FEenableexcept (unsigned int excepts)
{
static fenv_t fenv;
unsigned int new_excepts = excepts & FE_ALL_EXCEPT,
old_excepts; // previous masks
if ( fegetenv (&fenv) ) return -1;
old_excepts = fenv.__control & FE_ALL_EXCEPT;
// unmask
fenv.__control &= ~new_excepts;
fenv.__mxcsr &= ~(new_excepts << 7);
return ( fesetenv (&fenv) ? -1 : old_excepts );
}
template<class Field> class DumbOperator : public LinearOperatorBase<Field> {
public:
LatticeComplex scale;
DumbOperator(GridBase *grid) : scale(grid)
{
GridParallelRNG pRNG(grid);
std::vector<int> seeds({5,6,7,8});
pRNG.SeedFixedIntegers(seeds);
random(pRNG,scale);
scale = exp(-real(scale)*6.0);
std::cout << " True matrix \n"<< scale <<std::endl;
}
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {};
void OpDir (const Field &in, Field &out,int dir,int disp){};
void Op (const Field &in, Field &out){
out = scale * in;
}
void AdjOp (const Field &in, Field &out){
out = scale * in;
}
void HermOp(const Field &in, Field &out){
double n1, n2;
HermOpAndNorm(in,out,n1,n2);
}
void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){
ComplexD dot;
out = scale * in;
dot= innerProduct(in,out);
n1=real(dot);
dot = innerProduct(out,out);
n2=real(dot);
}
};
int main (int argc, char ** argv)
{
FEenableexcept(FE_ALL_EXCEPT & ~FE_INEXACT);
Grid_init(&argc,&argv);
GridCartesian *grid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridParallelRNG RNG(grid);
std::vector<int> seeds({1,2,3,4});
RNG.SeedFixedIntegers(seeds);
RealD alpha = 1.0;
RealD beta = 0.03;
RealD mu = 0.0;
int order = 11;
ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order);
std::ofstream file("pooh.dat");
Cheby.csv(file);
HermOpOperatorFunction<LatticeComplex> X;
DumbOperator<LatticeComplex> HermOp(grid);
const int Nk = 40;
const int Nm = 80;
const int Nit= 10000;
int Nconv;
RealD eresid = 1.0e-8;
ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nm,eresid,Nit);
ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit);
LatticeComplex src(grid); gaussian(RNG,src);
{
std::vector<RealD> eval(Nm);
std::vector<LatticeComplex> evec(Nm,grid);
IRL.calc(eval,evec,src, Nconv);
}
{
std::vector<RealD> eval(Nm);
std::vector<LatticeComplex> evec(Nm,grid);
ChebyIRL.calc(eval,evec,src, Nconv);
}
Grid_finalize();
}

293
tests/Test_zmm.cc Normal file
View File

@ -0,0 +1,293 @@
#include <Grid.h>
#include <PerfCount.h>
#include <simd/Avx512Asm.h>
using namespace Grid;
using namespace Grid::QCD;
void WilsonDslashAvx512(void *ptr1,void *ptr2,void *ptr3);
void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3);
void TimesIAvx512F(void *ptr1,void *ptr3);
void TimesIAvx512(void *ptr1,void *ptr3);
int main(int argc,char **argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt4 = GridDefaultLatt();
const int Ls=16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
int threads = GridThread::GetThreads();
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4);
vColourMatrixD mat;
vHalfSpinColourVectorD vec;
vHalfSpinColourVectorD matvec;
vHalfSpinColourVectorD ref;
vComplexD err;
random(sRNG,mat);
random(sRNG,vec);
ref = mat*vec;
WilsonDslashAvx512((void *)&vec, (void *)&mat,(void *)&matvec);
ref = ref - matvec;
err = TensorRemove(innerProduct(ref,ref));
std::cout <<"Double SU3 x 2spin diff "<< Reduce(err)<<std::endl;
vColourMatrixF matF;
vHalfSpinColourVectorF vecF;
vHalfSpinColourVectorF matvecF;
vHalfSpinColourVectorF refF;
vComplexF errF;
random(sRNG,matF);
random(sRNG,vecF);
refF = matF*vecF;
WilsonDslashAvx512F((void *)&vecF, (void *)&matF,(void *)&matvecF);
refF = refF-matvecF;
errF = TensorRemove(innerProduct(refF,refF));
std::cout <<"Single SU3 x 2spin diff "<< Reduce(errF)<<std::endl;
TimesIAvx512F((void *)&vecF,(void *)&matvecF);
refF = timesI(vecF)-matvecF;
errF = TensorRemove(innerProduct(refF,refF));
std::cout <<" timesI single diff "<< Reduce(errF)<<std::endl;
TimesIAvx512((void *)&vec,(void *)&matvec);
ref = timesI(vec)-matvec;
err = TensorRemove(innerProduct(ref,ref));
std::cout <<" timesI double diff "<< Reduce(err)<<std::endl;
LatticeFermion src (FGrid);
LatticeFermion srce(FrbGrid);
LatticeFermion resulto(FrbGrid); resulto=zero;
LatticeFermion resulta(FrbGrid); resulta=zero;
LatticeFermion diff(FrbGrid);
LatticeGaugeField Umu(UGrid);
#if 1
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
random(RNG5,src);
random(RNG4,Umu);
#else
int mmu=3;
std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
if ( mu!=mmu ) U[mu] = zero;
if ( mu==mmu ) U[mu] = 1.0;
PokeIndex<LorentzIndex>(Umu,U[mu],mu);
}
#endif
pickCheckerboard(Even,srce,src);
RealD mass=0.1;
RealD M5 =1.8;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=50;
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopOE(srce,resulto,0);
}
double t1=usecond();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume/2;
std::cout<<GridLogMessage << "Called Dw"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(resulto)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops*ncall/(t1-t0)<<std::endl;
QCD::WilsonFermion5DStatic::AsmOptDslash=1;
t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopOE(srce,resulta,0);
}
t1=usecond();
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
Dw.DhopOE(srce,resulta,0);
PerformanceCounter Counter(i);
Counter.Start();
Dw.DhopOE(srce,resulta,0);
Counter.Stop();
Counter.Report();
}
resulta = (-0.5) * resulta;
std::cout<<GridLogMessage << "Called Asm Dw"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(resulta)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops*ncall/(t1-t0)<<std::endl;
diff = resulto-resulta;
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
}
#undef VLOAD
#undef VSTORE
#undef VMUL
#undef VMADD
#undef ZEND1
#undef ZEND2
#undef ZLOAD
#undef ZMUL
#undef ZMADD
#define VZERO(A) VZEROd(A)
#define VTIMESI(A,B,C) VTIMESId(A,B,C)
#define VTIMESMINUSI(A,B,C) VTIMESMINUSId(A,B,C)
#define VLOAD(OFF,PTR,DEST) VLOADd(OFF,PTR,DEST)
#define VSTORE(OFF,PTR,SRC) VSTOREd(OFF,PTR,SRC)
#define VMUL(Uri,Uir,Chi,UChi,Z) VMULd(Uri,Uir,Chi,UChi,Z)
#define VMADD(Uri,Uir,Chi,UChi,Z) VMADDd(Uri,Uir,Chi,UChi,Z)
#define ZEND1(A,B,C) ZEND1d(A,B,C)
#define ZEND2(A,B,C) ZEND2d(A,B,C)
#define ZLOAD(A,B,C,D) ZLOADd(A,B,C,D)
#define ZMUL(A,B,C,D,E) ZMULd(A,B,C,D,E)
#define ZMADD(A,B,C,D,E) ZMADDd(A,B,C,D,E)
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
#define zz Z0
void TimesIAvx512(void *ptr1,void *ptr3)
{
__asm__ ("mov $0xAAAA, %%eax " : : :"%eax");
__asm__ ("kmov %%eax, %%k6 " : : :);
__asm__ ("knot %%k6, %%k7 " : : :);
MASK_REGS;
LOAD_CHI(ptr1);
__asm__ (
VZERO(zz)
VTIMESI(Chi_00,UChi_00,zz)
VTIMESI(Chi_01,UChi_01,zz)
VTIMESI(Chi_02,UChi_02,zz)
VTIMESI(Chi_10,UChi_10,zz)
VTIMESI(Chi_11,UChi_11,zz)
VTIMESI(Chi_12,UChi_12,zz)
);
SAVE_UCHI(ptr3);
}
void WilsonDslashAvx512(void *ptr1,void *ptr2,void *ptr3)
{
int return_address;
// prototype computed goto to eliminate ABI save restore on call/return in
// generated assembly.
static void * table[] = { &&save, &&mult };
MASK_REGS;
LOAD_CHI(ptr1);
return_address = 0;
goto mult;
save:
SAVE_UCHI(ptr3);
return;
mult:
MULT_2SPIN(ptr2);
goto *table[return_address];
}
#undef VLOAD
#undef VSTORE
#undef VMUL
#undef VMADD
#undef ZEND1
#undef ZEND2
#undef ZLOAD
#undef ZMUL
#undef ZMADD
#undef VZERO
#undef VTIMESI
#undef VTIMESI0
#undef VTIMESI1
#undef VTIMESI2
#undef VTIMESMINUSI
#undef ZMULMEM2SP
#undef ZMADDMEM2SP
#define VZERO(A) VZEROf(A)
#define VMOV(A,B) VMOVf(A,B)
#define VADD(A,B,C) VADDf(A,B,C)
#define VSUB(A,B,C) VSUBf(A,B,C)
#define VTIMESI(A,B,C) VTIMESIf(A,B,C)
#define VTIMESMINUSI(A,B,C) VTIMESMINUSIf(A,B,C)
#define VLOAD(OFF,PTR,DEST) VLOADf(OFF,PTR,DEST)
#define VSTORE(OFF,PTR,SRC) VSTOREf(OFF,PTR,SRC)
#define VMUL(Uri,Uir,Chi,UChi,Z) VMULf(Uri,Uir,Chi,UChi,Z)
#define VMADD(Uri,Uir,Chi,UChi,Z) VMADDf(Uri,Uir,Chi,UChi,Z)
#define ZEND1(A,B,C) ZEND1f(A,B,C)
#define ZEND2(A,B,C) ZEND2f(A,B,C)
#define ZLOAD(A,B,C,D) ZLOADf(A,B,C,D)
#define ZMUL(A,B,C,D,E) ZMULf(A,B,C,D,E)
#define ZMADD(A,B,C,D,E) ZMADDf(A,B,C,D,E)
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
void TimesIAvx512F(void *ptr1,void *ptr3)
{
MASK_REGS;
LOAD_CHI(ptr1);
__asm__ (
VZERO(zz)
VTIMESI(Chi_00,UChi_00,zz)
VTIMESI(Chi_01,UChi_01,zz)
VTIMESI(Chi_02,UChi_02,zz)
VTIMESI(Chi_10,UChi_10,zz)
VTIMESI(Chi_11,UChi_11,zz)
VTIMESI(Chi_12,UChi_12,zz)
);
SAVE_UCHI(ptr3);
}
void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3)
{
MASK_REGS;
LOAD_CHI(ptr1);
MULT_2SPIN(ptr2);
SAVE_UCHI(ptr3);
return;
}