mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-11 11:56:56 +01:00
Merge commit '899ca41cb8c8f47771bfd37cd895cbc2184e5560'
This commit is contained in:
@ -86,16 +86,16 @@ Test_dwf_hdcr_SOURCES=Test_dwf_hdcr.cc
|
||||
Test_dwf_hdcr_LDADD=-lGrid
|
||||
|
||||
|
||||
#Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
|
||||
#Test_dwf_lanczos_LDADD=-lGrid
|
||||
Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
|
||||
Test_dwf_lanczos_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_gamma_SOURCES=Test_gamma.cc
|
||||
Test_gamma_LDADD=-lGrid
|
||||
|
||||
|
||||
#Test_gparity_SOURCES=Test_gparity.cc
|
||||
#Test_gparity_LDADD=-lGrid
|
||||
Test_gparity_SOURCES=Test_gparity.cc
|
||||
Test_gparity_LDADD=-lGrid
|
||||
|
||||
|
||||
#Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
|
||||
@ -190,6 +190,10 @@ Test_stencil_SOURCES=Test_stencil.cc
|
||||
Test_stencil_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_synthetic_lanczos_SOURCES=Test_synthetic_lanczos.cc
|
||||
Test_synthetic_lanczos_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc
|
||||
Test_wilson_cg_prec_LDADD=-lGrid
|
||||
|
||||
|
@ -54,27 +54,27 @@ int main (int argc, char ** argv)
|
||||
|
||||
TComplex cm;
|
||||
for(int dir=0;dir<Nd;dir++){
|
||||
if ( dir!=1 ) continue;
|
||||
// if ( dir!=1 ) continue;
|
||||
for(int shift=0;shift<latt_size[dir];shift++){
|
||||
|
||||
std::cout<<GridLogMessage<<"Shifting by "<<shift<<" in direction"<<dir<<std::endl;
|
||||
|
||||
// std::cout<<GridLogMessage<<"Even grid"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Even grid"<<std::endl;
|
||||
ShiftUe = Cshift(Ue,dir,shift); // Shift everything cb by cb
|
||||
// std::cout<<GridLogMessage << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
|
||||
std::cout<<GridLogMessage << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
|
||||
|
||||
// std::cout<<GridLogMessage<<"Odd grid"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Odd grid"<<std::endl;
|
||||
ShiftUo = Cshift(Uo,dir,shift);
|
||||
// std::cout<<GridLogMessage << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
|
||||
std::cout<<GridLogMessage << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
|
||||
|
||||
// std::cout<<GridLogMessage<<"Recombined Even/Odd grids"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Recombined Even/Odd grids"<<std::endl;
|
||||
setCheckerboard(rbShiftU,ShiftUe);
|
||||
setCheckerboard(rbShiftU,ShiftUo);
|
||||
// std::cout<<GridLogMessage << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
|
||||
std::cout<<GridLogMessage << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
|
||||
|
||||
// std::cout<<GridLogMessage<<"Full grid shift"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Full grid shift"<<std::endl;
|
||||
ShiftU = Cshift(U,dir,shift); // Shift everything
|
||||
// std::cout<<GridLogMessage << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
|
||||
std::cout<<GridLogMessage << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
|
||||
|
||||
std::vector<int> coor(4);
|
||||
|
||||
@ -105,18 +105,18 @@ int main (int argc, char ** argv)
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
|
||||
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
|
||||
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}}}}
|
||||
|
||||
|
||||
int exx=0;
|
||||
std::cout<<GridLogMessage << "Checking the checkerboard shift"<<std::endl;
|
||||
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
|
||||
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
|
||||
@ -144,20 +144,21 @@ int main (int argc, char ** argv)
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
|
||||
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
|
||||
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exit(-1);
|
||||
} else if (0) {
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exx=1;
|
||||
} else if (1) {
|
||||
std::cout<<GridLogMessage<<"PASS shift "<< shift<<" in dir "<< dir
|
||||
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
}
|
||||
}}}}
|
||||
if (exx) exit(-1);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -35,21 +35,26 @@ int main (int argc, char ** argv)
|
||||
|
||||
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
|
||||
const int Nk = 10;
|
||||
const int Np = 1;
|
||||
RealD enorm = 1.0;
|
||||
RealD vthrs = 1;
|
||||
const int Nit= 1000;
|
||||
const int Nk = 30;
|
||||
const int Np = 10;
|
||||
const int Nm = Nk+Np;
|
||||
const int MaxIt= 10000;
|
||||
RealD resid = 1.0e-8;
|
||||
|
||||
ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,PolyX,
|
||||
Nk,Np,enorm,vthrs,Nit);
|
||||
std::vector<double> Coeffs(1,1.0);
|
||||
Polynomial<LatticeFermion> PolyX(Coeffs);
|
||||
ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,PolyX,Nk,Nm,resid,MaxIt);
|
||||
|
||||
|
||||
std::vector<RealD> eval(Nk);
|
||||
std::vector<LatticeFermion> evec(Nk,FGrid);
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<LatticeFermion> evec(Nm,FGrid);
|
||||
for(int i=0;i<Nm;i++){
|
||||
std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
|
||||
};
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(eval,evec,
|
||||
src,
|
||||
Nsbt,
|
||||
Nconv);
|
||||
|
||||
|
||||
|
@ -298,7 +298,7 @@ int main (int argc, char ** argv)
|
||||
c = scm()(1,1)(1,2);
|
||||
scm()(1,1)(2,1) = c;
|
||||
|
||||
pokeIndex<ColourIndex> (c_m,c,0,0);
|
||||
// pokeIndex<ColourIndex> (c_m,c,0,0);
|
||||
}
|
||||
|
||||
FooBar = Bar;
|
||||
|
@ -8,6 +8,10 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
// typedef LatticeColourMatrix Field;
|
||||
typedef LatticeComplex Field;
|
||||
typedef typename Field::vector_object vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
@ -18,23 +22,40 @@ int main (int argc, char ** argv)
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG fRNG(&Fine);
|
||||
|
||||
// fRNG.SeedRandomDevice();
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
fRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
LatticeColourMatrix Foo(&Fine);
|
||||
LatticeColourMatrix Bar(&Fine);
|
||||
LatticeColourMatrix Check(&Fine);
|
||||
LatticeColourMatrix Diff(&Fine);
|
||||
|
||||
Field Foo(&Fine);
|
||||
Field Bar(&Fine);
|
||||
Field Check(&Fine);
|
||||
Field Diff(&Fine);
|
||||
LatticeComplex lex(&Fine);
|
||||
|
||||
lex = zero;
|
||||
random(fRNG,Foo);
|
||||
gaussian(fRNG,Bar);
|
||||
|
||||
/*
|
||||
Integer stride =1000;
|
||||
{
|
||||
double nrm;
|
||||
LatticeComplex coor(&Fine);
|
||||
|
||||
for(int d=0;d<Nd;d++){
|
||||
LatticeCoordinate(coor,d);
|
||||
lex = lex + coor*stride;
|
||||
stride=stride/10;
|
||||
}
|
||||
Foo=lex;
|
||||
}
|
||||
*/
|
||||
|
||||
for(int dir=0;dir<4;dir++){
|
||||
for(int disp=0;disp<Fine._fdimensions[dir];disp++){
|
||||
|
||||
std::cout<<GridLogMessage << "Using stencil to shift dim "<<dir<< " by "<<disp<<std::endl;
|
||||
std::cout<< std::fixed <<GridLogMessage << "Using stencil to shift dim "<<dir<< " by "<<disp<<std::endl;
|
||||
// start to test the Cartesian npoint stencil infrastructure
|
||||
int npoint=1;
|
||||
std::vector<int> directions(npoint,dir);
|
||||
@ -48,8 +69,8 @@ int main (int argc, char ** argv)
|
||||
ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
|
||||
}
|
||||
|
||||
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
|
||||
SimpleCompressor<vColourMatrix> compress;
|
||||
std::vector<vobj,alignedAllocator<vobj> > comm_buf(myStencil._unified_buffer_size);
|
||||
SimpleCompressor<vobj> compress;
|
||||
myStencil.HaloExchange(Foo,comm_buf,compress);
|
||||
|
||||
Bar = Cshift(Foo,dir,disp);
|
||||
@ -75,9 +96,114 @@ int main (int argc, char ** argv)
|
||||
Real nrm = norm2(Diff);
|
||||
std::cout<<GridLogMessage<<"N2diff ="<<nrm<<" "<<nrmC<<" " <<nrmB<<std::endl;
|
||||
|
||||
Real snrmC =0;
|
||||
Real snrmB =0;
|
||||
Real snrm =0;
|
||||
std::vector<int> coor(4);
|
||||
for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
|
||||
for(coor[2]=0;coor[2]<latt_size[2]/mpi_layout[2];coor[2]++){
|
||||
for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
|
||||
for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
|
||||
|
||||
RealD diff;
|
||||
sobj check,bar;
|
||||
peekSite(check,Check,coor);
|
||||
peekSite(bar,Bar,coor);
|
||||
|
||||
sobj ddiff;
|
||||
ddiff = check -bar;
|
||||
diff =norm2(ddiff);
|
||||
if ( diff > 0){
|
||||
std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]
|
||||
<<") " <<check<<" vs "<<bar<<std::endl;
|
||||
}
|
||||
|
||||
|
||||
}}}}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"Testing RedBlack\n ";
|
||||
|
||||
|
||||
Field EFoo(&rbFine);
|
||||
Field OFoo(&rbFine);
|
||||
Field ECheck(&rbFine);
|
||||
Field OCheck(&rbFine);
|
||||
pickCheckerboard(Even,EFoo,Foo);
|
||||
pickCheckerboard(Odd ,OFoo,Foo);
|
||||
|
||||
for(int dir=0;dir<4;dir++){
|
||||
for(int disp=0;disp<rbFine._fdimensions[dir];disp++){
|
||||
|
||||
std::cout<<GridLogMessage << "Using stencil to shift rb dim "<<dir<< " by "<<disp<<std::endl;
|
||||
// start to test the Cartesian npoint stencil infrastructure
|
||||
int npoint=1;
|
||||
std::vector<int> directions(npoint,dir);
|
||||
std::vector<int> displacements(npoint,disp);
|
||||
|
||||
CartesianStencil EStencil(&rbFine,npoint,Even,directions,displacements);
|
||||
CartesianStencil OStencil(&rbFine,npoint,Odd,directions,displacements);
|
||||
|
||||
std::vector<int> ocoor(4);
|
||||
for(int o=0;o<Fine.oSites();o++){
|
||||
Fine.oCoorFromOindex(ocoor,o);
|
||||
ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
|
||||
}
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > Ecomm_buf(EStencil._unified_buffer_size);
|
||||
std::vector<vobj,alignedAllocator<vobj> > Ocomm_buf(OStencil._unified_buffer_size);
|
||||
|
||||
SimpleCompressor<vobj> compress;
|
||||
|
||||
EStencil.HaloExchange(EFoo,Ecomm_buf,compress);
|
||||
OStencil.HaloExchange(OFoo,Ocomm_buf,compress);
|
||||
|
||||
Bar = Cshift(Foo,dir,disp);
|
||||
|
||||
if ( disp & 0x1 ) {
|
||||
ECheck.checkerboard = Even;
|
||||
OCheck.checkerboard = Odd;
|
||||
} else {
|
||||
ECheck.checkerboard = Odd;
|
||||
OCheck.checkerboard = Even;
|
||||
}
|
||||
// Implement a stencil code that should agree with that darn cshift!
|
||||
for(int i=0;i<OCheck._grid->oSites();i++){
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
SE = EStencil.GetEntry(permute_type,0,i);
|
||||
std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
|
||||
|
||||
if ( SE->_is_local && SE->_permute )
|
||||
permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type);
|
||||
else if (SE->_is_local)
|
||||
OCheck._odata[i] = EFoo._odata[SE->_offset];
|
||||
else
|
||||
OCheck._odata[i] = Ecomm_buf[SE->_offset];
|
||||
}
|
||||
for(int i=0;i<ECheck._grid->oSites();i++){
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
SE = OStencil.GetEntry(permute_type,0,i);
|
||||
std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
|
||||
|
||||
if ( SE->_is_local && SE->_permute )
|
||||
permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type);
|
||||
else if (SE->_is_local)
|
||||
ECheck._odata[i] = OFoo._odata[SE->_offset];
|
||||
else
|
||||
ECheck._odata[i] = Ocomm_buf[SE->_offset];
|
||||
}
|
||||
|
||||
setCheckerboard(Check,ECheck);
|
||||
setCheckerboard(Check,OCheck);
|
||||
|
||||
Real nrmC = norm2(Check);
|
||||
Real nrmB = norm2(Bar);
|
||||
Diff = Check-Bar;
|
||||
Real nrm = norm2(Diff);
|
||||
std::cout<<GridLogMessage<<"RB N2diff ="<<nrm<<" "<<nrmC<<" " <<nrmB<<std::endl;
|
||||
|
||||
std::vector<int> coor(4);
|
||||
for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
|
||||
@ -85,33 +211,22 @@ int main (int argc, char ** argv)
|
||||
for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
|
||||
for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
|
||||
|
||||
Complex diff;
|
||||
ColourMatrix check,bar;
|
||||
RealD diff;
|
||||
sobj check,bar;
|
||||
peekSite(check,Check,coor);
|
||||
peekSite(bar,Bar,coor);
|
||||
|
||||
for(int r=0;r<3;r++){
|
||||
for(int c=0;c<3;c++){
|
||||
diff =check()()(r,c)-bar()()(r,c);
|
||||
double nn=real(conjugate(diff)*diff);
|
||||
if ( nn > 0){
|
||||
printf("Coor (%d %d %d %d) \t rc %d%d \t %le (%le,%le) %le\n",
|
||||
coor[0],coor[1],coor[2],coor[3],r,c,
|
||||
nn,
|
||||
real(check()()(r,c)),
|
||||
imag(check()()(r,c)),
|
||||
real(bar()()(r,c))
|
||||
);
|
||||
}
|
||||
snrmC=snrmC+real(conjugate(check()()(r,c))*check()()(r,c));
|
||||
snrmB=snrmB+real(conjugate(bar()()(r,c))*bar()()(r,c));
|
||||
snrm=snrm+nn;
|
||||
}}
|
||||
sobj ddiff;
|
||||
ddiff = check -bar;
|
||||
diff =norm2(ddiff);
|
||||
if ( diff > 0){
|
||||
std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3] <<") "
|
||||
<<"shift "<<disp<<" dir "<< dir
|
||||
<< " stencil impl " <<check<<" vs cshift impl "<<bar<<std::endl;
|
||||
}
|
||||
|
||||
}}}}
|
||||
|
||||
std::cout<<GridLogMessage<<"scalar N2diff = "<<snrm<<" " <<snrmC<<" "<<snrmB<<std::endl;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
123
tests/Test_synthetic_lanczos.cc
Normal file
123
tests/Test_synthetic_lanczos.cc
Normal file
@ -0,0 +1,123 @@
|
||||
#include <fenv.h>
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
static int
|
||||
FEenableexcept (unsigned int excepts)
|
||||
{
|
||||
static fenv_t fenv;
|
||||
unsigned int new_excepts = excepts & FE_ALL_EXCEPT,
|
||||
old_excepts; // previous masks
|
||||
|
||||
if ( fegetenv (&fenv) ) return -1;
|
||||
old_excepts = fenv.__control & FE_ALL_EXCEPT;
|
||||
|
||||
// unmask
|
||||
fenv.__control &= ~new_excepts;
|
||||
fenv.__mxcsr &= ~(new_excepts << 7);
|
||||
|
||||
return ( fesetenv (&fenv) ? -1 : old_excepts );
|
||||
}
|
||||
|
||||
|
||||
template<class Field> class DumbOperator : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
LatticeComplex scale;
|
||||
|
||||
DumbOperator(GridBase *grid) : scale(grid)
|
||||
{
|
||||
GridParallelRNG pRNG(grid);
|
||||
std::vector<int> seeds({5,6,7,8});
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
random(pRNG,scale);
|
||||
|
||||
scale = exp(-real(scale)*6.0);
|
||||
std::cout << " True matrix \n"<< scale <<std::endl;
|
||||
}
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {};
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp){};
|
||||
|
||||
void Op (const Field &in, Field &out){
|
||||
out = scale * in;
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
out = scale * in;
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
double n1, n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){
|
||||
ComplexD dot;
|
||||
|
||||
out = scale * in;
|
||||
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
|
||||
dot = innerProduct(out,out);
|
||||
n2=real(dot);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
|
||||
FEenableexcept(FE_ALL_EXCEPT & ~FE_INEXACT);
|
||||
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
GridCartesian *grid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
|
||||
GridParallelRNG RNG(grid);
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
RNG.SeedFixedIntegers(seeds);
|
||||
|
||||
|
||||
RealD alpha = 1.0;
|
||||
RealD beta = 0.03;
|
||||
RealD mu = 0.0;
|
||||
int order = 11;
|
||||
ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order);
|
||||
|
||||
std::ofstream file("pooh.dat");
|
||||
Cheby.csv(file);
|
||||
|
||||
HermOpOperatorFunction<LatticeComplex> X;
|
||||
DumbOperator<LatticeComplex> HermOp(grid);
|
||||
|
||||
const int Nk = 40;
|
||||
const int Nm = 80;
|
||||
const int Nit= 10000;
|
||||
|
||||
int Nconv;
|
||||
RealD eresid = 1.0e-8;
|
||||
|
||||
ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nm,eresid,Nit);
|
||||
|
||||
ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit);
|
||||
|
||||
LatticeComplex src(grid); gaussian(RNG,src);
|
||||
{
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<LatticeComplex> evec(Nm,grid);
|
||||
IRL.calc(eval,evec,src, Nconv);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<LatticeComplex> evec(Nm,grid);
|
||||
ChebyIRL.calc(eval,evec,src, Nconv);
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
293
tests/Test_zmm.cc
Normal file
293
tests/Test_zmm.cc
Normal file
@ -0,0 +1,293 @@
|
||||
#include <Grid.h>
|
||||
#include <PerfCount.h>
|
||||
#include <simd/Avx512Asm.h>
|
||||
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
void WilsonDslashAvx512(void *ptr1,void *ptr2,void *ptr3);
|
||||
void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3);
|
||||
void TimesIAvx512F(void *ptr1,void *ptr3);
|
||||
void TimesIAvx512(void *ptr1,void *ptr3);
|
||||
|
||||
|
||||
|
||||
int main(int argc,char **argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
std::vector<int> latt4 = GridDefaultLatt();
|
||||
const int Ls=16;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
int threads = GridThread::GetThreads();
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
|
||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4);
|
||||
|
||||
vColourMatrixD mat;
|
||||
vHalfSpinColourVectorD vec;
|
||||
vHalfSpinColourVectorD matvec;
|
||||
vHalfSpinColourVectorD ref;
|
||||
vComplexD err;
|
||||
|
||||
random(sRNG,mat);
|
||||
random(sRNG,vec);
|
||||
|
||||
ref = mat*vec;
|
||||
|
||||
WilsonDslashAvx512((void *)&vec, (void *)&mat,(void *)&matvec);
|
||||
|
||||
ref = ref - matvec;
|
||||
err = TensorRemove(innerProduct(ref,ref));
|
||||
std::cout <<"Double SU3 x 2spin diff "<< Reduce(err)<<std::endl;
|
||||
|
||||
vColourMatrixF matF;
|
||||
vHalfSpinColourVectorF vecF;
|
||||
vHalfSpinColourVectorF matvecF;
|
||||
vHalfSpinColourVectorF refF;
|
||||
vComplexF errF;
|
||||
|
||||
random(sRNG,matF);
|
||||
random(sRNG,vecF);
|
||||
|
||||
refF = matF*vecF;
|
||||
|
||||
WilsonDslashAvx512F((void *)&vecF, (void *)&matF,(void *)&matvecF);
|
||||
|
||||
refF = refF-matvecF;
|
||||
errF = TensorRemove(innerProduct(refF,refF));
|
||||
std::cout <<"Single SU3 x 2spin diff "<< Reduce(errF)<<std::endl;
|
||||
|
||||
TimesIAvx512F((void *)&vecF,(void *)&matvecF);
|
||||
refF = timesI(vecF)-matvecF;
|
||||
errF = TensorRemove(innerProduct(refF,refF));
|
||||
std::cout <<" timesI single diff "<< Reduce(errF)<<std::endl;
|
||||
|
||||
TimesIAvx512((void *)&vec,(void *)&matvec);
|
||||
|
||||
ref = timesI(vec)-matvec;
|
||||
err = TensorRemove(innerProduct(ref,ref));
|
||||
std::cout <<" timesI double diff "<< Reduce(err)<<std::endl;
|
||||
|
||||
LatticeFermion src (FGrid);
|
||||
LatticeFermion srce(FrbGrid);
|
||||
|
||||
LatticeFermion resulto(FrbGrid); resulto=zero;
|
||||
LatticeFermion resulta(FrbGrid); resulta=zero;
|
||||
LatticeFermion diff(FrbGrid);
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
#if 1
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
random(RNG5,src);
|
||||
random(RNG4,Umu);
|
||||
#else
|
||||
int mmu=3;
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
if ( mu!=mmu ) U[mu] = zero;
|
||||
if ( mu==mmu ) U[mu] = 1.0;
|
||||
PokeIndex<LorentzIndex>(Umu,U[mu],mu);
|
||||
}
|
||||
#endif
|
||||
pickCheckerboard(Even,srce,src);
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
||||
int ncall=50;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Dw.DhopOE(srce,resulto,0);
|
||||
}
|
||||
double t1=usecond();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=1344*volume/2;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(resulto)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops*ncall/(t1-t0)<<std::endl;
|
||||
|
||||
QCD::WilsonFermion5DStatic::AsmOptDslash=1;
|
||||
t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Dw.DhopOE(srce,resulta,0);
|
||||
}
|
||||
t1=usecond();
|
||||
|
||||
|
||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||
Dw.DhopOE(srce,resulta,0);
|
||||
PerformanceCounter Counter(i);
|
||||
Counter.Start();
|
||||
Dw.DhopOE(srce,resulta,0);
|
||||
Counter.Stop();
|
||||
Counter.Report();
|
||||
}
|
||||
resulta = (-0.5) * resulta;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Asm Dw"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(resulta)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops*ncall/(t1-t0)<<std::endl;
|
||||
diff = resulto-resulta;
|
||||
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
#undef VLOAD
|
||||
#undef VSTORE
|
||||
#undef VMUL
|
||||
#undef VMADD
|
||||
#undef ZEND1
|
||||
#undef ZEND2
|
||||
#undef ZLOAD
|
||||
#undef ZMUL
|
||||
#undef ZMADD
|
||||
|
||||
#define VZERO(A) VZEROd(A)
|
||||
#define VTIMESI(A,B,C) VTIMESId(A,B,C)
|
||||
#define VTIMESMINUSI(A,B,C) VTIMESMINUSId(A,B,C)
|
||||
|
||||
#define VLOAD(OFF,PTR,DEST) VLOADd(OFF,PTR,DEST)
|
||||
#define VSTORE(OFF,PTR,SRC) VSTOREd(OFF,PTR,SRC)
|
||||
#define VMUL(Uri,Uir,Chi,UChi,Z) VMULd(Uri,Uir,Chi,UChi,Z)
|
||||
#define VMADD(Uri,Uir,Chi,UChi,Z) VMADDd(Uri,Uir,Chi,UChi,Z)
|
||||
#define ZEND1(A,B,C) ZEND1d(A,B,C)
|
||||
#define ZEND2(A,B,C) ZEND2d(A,B,C)
|
||||
#define ZLOAD(A,B,C,D) ZLOADd(A,B,C,D)
|
||||
#define ZMUL(A,B,C,D,E) ZMULd(A,B,C,D,E)
|
||||
#define ZMADD(A,B,C,D,E) ZMADDd(A,B,C,D,E)
|
||||
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
|
||||
#define zz Z0
|
||||
|
||||
void TimesIAvx512(void *ptr1,void *ptr3)
|
||||
{
|
||||
__asm__ ("mov $0xAAAA, %%eax " : : :"%eax");
|
||||
__asm__ ("kmov %%eax, %%k6 " : : :);
|
||||
__asm__ ("knot %%k6, %%k7 " : : :);
|
||||
|
||||
|
||||
MASK_REGS;
|
||||
|
||||
LOAD_CHI(ptr1);
|
||||
|
||||
__asm__ (
|
||||
VZERO(zz)
|
||||
VTIMESI(Chi_00,UChi_00,zz)
|
||||
VTIMESI(Chi_01,UChi_01,zz)
|
||||
VTIMESI(Chi_02,UChi_02,zz)
|
||||
VTIMESI(Chi_10,UChi_10,zz)
|
||||
VTIMESI(Chi_11,UChi_11,zz)
|
||||
VTIMESI(Chi_12,UChi_12,zz)
|
||||
);
|
||||
|
||||
SAVE_UCHI(ptr3);
|
||||
}
|
||||
|
||||
void WilsonDslashAvx512(void *ptr1,void *ptr2,void *ptr3)
|
||||
{
|
||||
int return_address;
|
||||
// prototype computed goto to eliminate ABI save restore on call/return in
|
||||
// generated assembly.
|
||||
static void * table[] = { &&save, &&mult };
|
||||
|
||||
MASK_REGS;
|
||||
|
||||
LOAD_CHI(ptr1);
|
||||
|
||||
return_address = 0;
|
||||
goto mult;
|
||||
|
||||
save:
|
||||
SAVE_UCHI(ptr3);
|
||||
return;
|
||||
|
||||
mult:
|
||||
MULT_2SPIN(ptr2);
|
||||
goto *table[return_address];
|
||||
|
||||
}
|
||||
|
||||
#undef VLOAD
|
||||
#undef VSTORE
|
||||
#undef VMUL
|
||||
#undef VMADD
|
||||
#undef ZEND1
|
||||
#undef ZEND2
|
||||
#undef ZLOAD
|
||||
#undef ZMUL
|
||||
#undef ZMADD
|
||||
#undef VZERO
|
||||
#undef VTIMESI
|
||||
#undef VTIMESI0
|
||||
#undef VTIMESI1
|
||||
#undef VTIMESI2
|
||||
#undef VTIMESMINUSI
|
||||
#undef ZMULMEM2SP
|
||||
#undef ZMADDMEM2SP
|
||||
#define VZERO(A) VZEROf(A)
|
||||
#define VMOV(A,B) VMOVf(A,B)
|
||||
#define VADD(A,B,C) VADDf(A,B,C)
|
||||
#define VSUB(A,B,C) VSUBf(A,B,C)
|
||||
#define VTIMESI(A,B,C) VTIMESIf(A,B,C)
|
||||
#define VTIMESMINUSI(A,B,C) VTIMESMINUSIf(A,B,C)
|
||||
|
||||
#define VLOAD(OFF,PTR,DEST) VLOADf(OFF,PTR,DEST)
|
||||
#define VSTORE(OFF,PTR,SRC) VSTOREf(OFF,PTR,SRC)
|
||||
#define VMUL(Uri,Uir,Chi,UChi,Z) VMULf(Uri,Uir,Chi,UChi,Z)
|
||||
#define VMADD(Uri,Uir,Chi,UChi,Z) VMADDf(Uri,Uir,Chi,UChi,Z)
|
||||
#define ZEND1(A,B,C) ZEND1f(A,B,C)
|
||||
#define ZEND2(A,B,C) ZEND2f(A,B,C)
|
||||
#define ZLOAD(A,B,C,D) ZLOADf(A,B,C,D)
|
||||
#define ZMUL(A,B,C,D,E) ZMULf(A,B,C,D,E)
|
||||
#define ZMADD(A,B,C,D,E) ZMADDf(A,B,C,D,E)
|
||||
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
|
||||
void TimesIAvx512F(void *ptr1,void *ptr3)
|
||||
{
|
||||
MASK_REGS;
|
||||
|
||||
LOAD_CHI(ptr1);
|
||||
__asm__ (
|
||||
VZERO(zz)
|
||||
VTIMESI(Chi_00,UChi_00,zz)
|
||||
VTIMESI(Chi_01,UChi_01,zz)
|
||||
VTIMESI(Chi_02,UChi_02,zz)
|
||||
VTIMESI(Chi_10,UChi_10,zz)
|
||||
VTIMESI(Chi_11,UChi_11,zz)
|
||||
VTIMESI(Chi_12,UChi_12,zz)
|
||||
);
|
||||
SAVE_UCHI(ptr3);
|
||||
}
|
||||
|
||||
void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3)
|
||||
{
|
||||
MASK_REGS;
|
||||
|
||||
LOAD_CHI(ptr1);
|
||||
|
||||
MULT_2SPIN(ptr2);
|
||||
|
||||
SAVE_UCHI(ptr3);
|
||||
|
||||
return;
|
||||
}
|
||||
|
Reference in New Issue
Block a user