1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Merge branch 'master' of github.com:paboyle/Grid

This commit is contained in:
Peter Boyle 2015-11-04 05:14:26 -06:00
commit dfc1de6f60
13 changed files with 260 additions and 99 deletions

View File

@ -24,7 +24,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt4 = GridDefaultLatt();
const int Ls=8;
const int Ls=16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@ -82,22 +82,24 @@ int main (int argc, char ** argv)
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=10;
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.Dhop(src,result,0);
}
double t1=usecond();
int ncall=100;
{
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.Dhop(src,result,0);
}
double t1=usecond();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage << "Called Dw"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
}
if (1)
@ -140,6 +142,18 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
{
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo);
}
double t1=usecond();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
}
Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo);

View File

@ -90,7 +90,7 @@ int main (int argc, char ** argv)
WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=10000;
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.Dhop(src,result,0);

View File

@ -124,6 +124,7 @@ namespace Grid {
if ( comm_dim ) {
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
// std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
if (splice_dim) {
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
@ -164,23 +165,23 @@ namespace Grid {
assert(comm_dim==1);
assert(shift>=0);
assert(shift<fd);
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
int cb= (cbmask==0x2)? Odd : Even;
int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
if (comm_proc) {
int words = send_buf.size();
int words = buffer_size;
if (cbmask != 0x3) words=words>>1;
int bytes = words * sizeof(cobj);
@ -201,10 +202,11 @@ namespace Grid {
recv_from_rank,
bytes);
for(int i=0;i<buffer_size;i++){
for(int i=0;i<words;i++){
u_comm_buf[u_comm_offset+i]=recv_buf[i];
// std::cout << " Halo["<<i<<"] snd "<<send_buf[i]<< " rcv "<<recv_buf[i]<<" mask 0x"<<cbmask<<std::endl;
}
u_comm_offset+=buffer_size;
u_comm_offset+=words;
}
}
}
@ -241,6 +243,7 @@ namespace Grid {
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int words = sizeof(cobj)/sizeof(vector_type);
assert(cbmask==0x3); // Fixme think there is a latent bug if not true
/*
* possibly slow to allocate
* Doesn't matter in this test, but may want to preallocate in the

View File

@ -29,14 +29,15 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
int bo=0;
//PARALLEL_NESTED_LOOP21
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int bo = n*rhs._grid->_slice_block[dimension];
// int bo = n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb &cbmask ) {
buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
}
}
}
@ -59,7 +60,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
//PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
@ -109,14 +110,15 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
int bo=0;
//PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
// int bo =n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
rhs._odata[so+o+b]=buffer[bo+b];
rhs._odata[so+o+b]=buffer[bo++];
}
}
}

View File

@ -9,7 +9,7 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
Lattice<vobj> ret(rhs._grid);
Lattice<vobj> ret(rhs._grid);
int fd = rhs._grid->_fdimensions[dimension];
int rd = rhs._grid->_rdimensions[dimension];
@ -26,10 +26,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
if ( !comm_dim ) {
// std::cout << "Cshift_local" <<std::endl;
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
} else if ( splice_dim ) {
// std::cout << "Cshift_comms_simd" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift);
} else {
// std::cout << "Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift);
}
return ret;
@ -42,9 +45,13 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
// std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x3);
} else {
// std::cout << "Two pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
}
@ -113,12 +120,16 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
int xmit_to_rank;
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
grid->SendToRecvFrom((void *)&send_buf[0],
xmit_to_rank,
(void *)&recv_buf[0],
recv_from_rank,
bytes);
// for(int i=0;i<words;i++){
// std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
// }
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
}
}

View File

@ -82,7 +82,8 @@ namespace QCD {
template<class Impl>
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out = (4.0+mass)*in;
typename FermionField::scalar_type scal(4.0+mass);
out = scal*in;
}
template<class Impl>

View File

@ -78,7 +78,7 @@ void WilsonKernels<Impl>::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel
}
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
accumReconXm(result,Uchi);
// Ym
SE=st.GetEntry(ptype,Ym,sF);
if ( SE->_is_local && SE->_permute ) {

View File

@ -524,16 +524,22 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g
// reunitarise??
static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrix &out,double scale=1.0){
GridBase *grid = out._grid;
LatticeComplex ca (grid);
LatticeMatrix lie(grid);
LatticeMatrix la (grid);
Complex ci(0.0,scale);
Complex cone(1.0,0.0);
Matrix ta;
lie=zero;
for(int a=0;a<generators();a++){
random(pRNG,ca); ca=real(ca)-0.5;
random(pRNG,ca);
ca = (ca+conjugate(ca))*0.5;
ca = ca - 0.5;
generator(a,ta);
la=ci*ca*ta;

View File

@ -9,10 +9,10 @@
#include <immintrin.h>
#ifndef KNC_ONLY_STORES
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
#endif
//#ifndef KNC_ONLY_STORES
//#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
//#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
//#endif
namespace Optimization {

View File

@ -8,7 +8,7 @@ namespace Grid {
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances)
: _entries(npoints), _permute_type(npoints)
: _entries(npoints), _permute_type(npoints), _comm_buf_size(npoints)
{
_npoints = npoints;
_grid = grid;
@ -61,11 +61,17 @@ namespace Grid {
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
Comms(point,dimension,shift,0x3);
// std::cout<<"Comms 0x3"<<std::endl;
} else {
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
}
}
// for(int ss=0;ss<osites;ss++){
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
// }
}
}
@ -139,13 +145,14 @@ namespace Grid {
int cb= (cbmask==0x2)? Odd : Even;
int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){
int offnode = (((x+sshift)%fd) >= rd );
// int comm_proc = ((x+sshift)/ld)%pd;
// int offnode = (comm_proc!=0);
int sx = (x+sshift)%rd;
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
int offnode = (comm_proc!= 0);
// std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
int wraparound=0;
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
wraparound = 1;
@ -249,7 +256,7 @@ namespace Grid {
int so = plane*_grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){

View File

@ -54,27 +54,27 @@ int main (int argc, char ** argv)
TComplex cm;
for(int dir=0;dir<Nd;dir++){
if ( dir!=1 ) continue;
// if ( dir!=1 ) continue;
for(int shift=0;shift<latt_size[dir];shift++){
std::cout<<GridLogMessage<<"Shifting by "<<shift<<" in direction"<<dir<<std::endl;
// std::cout<<GridLogMessage<<"Even grid"<<std::endl;
std::cout<<GridLogMessage<<"Even grid"<<std::endl;
ShiftUe = Cshift(Ue,dir,shift); // Shift everything cb by cb
// std::cout<<GridLogMessage << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
std::cout<<GridLogMessage << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
// std::cout<<GridLogMessage<<"Odd grid"<<std::endl;
std::cout<<GridLogMessage<<"Odd grid"<<std::endl;
ShiftUo = Cshift(Uo,dir,shift);
// std::cout<<GridLogMessage << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
std::cout<<GridLogMessage << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
// std::cout<<GridLogMessage<<"Recombined Even/Odd grids"<<std::endl;
std::cout<<GridLogMessage<<"Recombined Even/Odd grids"<<std::endl;
setCheckerboard(rbShiftU,ShiftUe);
setCheckerboard(rbShiftU,ShiftUo);
// std::cout<<GridLogMessage << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
std::cout<<GridLogMessage << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
// std::cout<<GridLogMessage<<"Full grid shift"<<std::endl;
std::cout<<GridLogMessage<<"Full grid shift"<<std::endl;
ShiftU = Cshift(U,dir,shift); // Shift everything
// std::cout<<GridLogMessage << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
std::cout<<GridLogMessage << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
std::vector<int> coor(4);
@ -105,18 +105,18 @@ int main (int argc, char ** argv)
Fine.CoorFromIndex(peer,index,latt_size);
if (nrm > 0){
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exit(-1);
}
}}}}
int exx=0;
std::cout<<GridLogMessage << "Checking the checkerboard shift"<<std::endl;
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
@ -144,20 +144,21 @@ int main (int argc, char ** argv)
Fine.CoorFromIndex(peer,index,latt_size);
if (nrm > 0){
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exit(-1);
} else if (0) {
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exx=1;
} else if (1) {
std::cout<<GridLogMessage<<"PASS shift "<< shift<<" in dir "<< dir
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
}
}}}}
if (exx) exit(-1);
}
}

View File

@ -17,7 +17,8 @@ public:
pRNG.SeedFixedIntegers(seeds);
random(pRNG,sqrtscale);
sqrtscale = real(sqrtscale)*3.0+0.5;// force real pos def
sqrtscale = 0.5*(sqrtscale + conjugate(sqrtscale));
sqrtscale = sqrtscale*3.0+0.5;// force real pos def
scale = sqrtscale *sqrtscale; //scale should be bounded by 12.25
//

View File

@ -8,6 +8,10 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
// typedef LatticeColourMatrix Field;
typedef LatticeComplex Field;
typedef typename Field::vector_object vobj;
typedef typename vobj::scalar_object sobj;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
@ -18,23 +22,40 @@ int main (int argc, char ** argv)
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
GridParallelRNG fRNG(&Fine);
// fRNG.SeedRandomDevice();
std::vector<int> seeds({1,2,3,4});
fRNG.SeedFixedIntegers(seeds);
LatticeColourMatrix Foo(&Fine);
LatticeColourMatrix Bar(&Fine);
LatticeColourMatrix Check(&Fine);
LatticeColourMatrix Diff(&Fine);
Field Foo(&Fine);
Field Bar(&Fine);
Field Check(&Fine);
Field Diff(&Fine);
LatticeComplex lex(&Fine);
lex = zero;
random(fRNG,Foo);
gaussian(fRNG,Bar);
/*
Integer stride =1000;
{
double nrm;
LatticeComplex coor(&Fine);
for(int d=0;d<Nd;d++){
LatticeCoordinate(coor,d);
lex = lex + coor*stride;
stride=stride/10;
}
Foo=lex;
}
*/
for(int dir=0;dir<4;dir++){
for(int disp=0;disp<Fine._fdimensions[dir];disp++){
std::cout<<GridLogMessage << "Using stencil to shift dim "<<dir<< " by "<<disp<<std::endl;
std::cout<< std::fixed <<GridLogMessage << "Using stencil to shift dim "<<dir<< " by "<<disp<<std::endl;
// start to test the Cartesian npoint stencil infrastructure
int npoint=1;
std::vector<int> directions(npoint,dir);
@ -48,8 +69,8 @@ int main (int argc, char ** argv)
ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
}
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
SimpleCompressor<vColourMatrix> compress;
std::vector<vobj,alignedAllocator<vobj> > comm_buf(myStencil._unified_buffer_size);
SimpleCompressor<vobj> compress;
myStencil.HaloExchange(Foo,comm_buf,compress);
Bar = Cshift(Foo,dir,disp);
@ -75,9 +96,114 @@ int main (int argc, char ** argv)
Real nrm = norm2(Diff);
std::cout<<GridLogMessage<<"N2diff ="<<nrm<<" "<<nrmC<<" " <<nrmB<<std::endl;
Real snrmC =0;
Real snrmB =0;
Real snrm =0;
std::vector<int> coor(4);
for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
for(coor[2]=0;coor[2]<latt_size[2]/mpi_layout[2];coor[2]++){
for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
RealD diff;
sobj check,bar;
peekSite(check,Check,coor);
peekSite(bar,Bar,coor);
sobj ddiff;
ddiff = check -bar;
diff =norm2(ddiff);
if ( diff > 0){
std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]
<<") " <<check<<" vs "<<bar<<std::endl;
}
}}}}
}
}
std::cout<<GridLogMessage<<"Testing RedBlack\n ";
Field EFoo(&rbFine);
Field OFoo(&rbFine);
Field ECheck(&rbFine);
Field OCheck(&rbFine);
pickCheckerboard(Even,EFoo,Foo);
pickCheckerboard(Odd ,OFoo,Foo);
for(int dir=0;dir<4;dir++){
for(int disp=0;disp<rbFine._fdimensions[dir];disp++){
std::cout<<GridLogMessage << "Using stencil to shift rb dim "<<dir<< " by "<<disp<<std::endl;
// start to test the Cartesian npoint stencil infrastructure
int npoint=1;
std::vector<int> directions(npoint,dir);
std::vector<int> displacements(npoint,disp);
CartesianStencil EStencil(&rbFine,npoint,Even,directions,displacements);
CartesianStencil OStencil(&rbFine,npoint,Odd,directions,displacements);
std::vector<int> ocoor(4);
for(int o=0;o<Fine.oSites();o++){
Fine.oCoorFromOindex(ocoor,o);
ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
}
std::vector<vobj,alignedAllocator<vobj> > Ecomm_buf(EStencil._unified_buffer_size);
std::vector<vobj,alignedAllocator<vobj> > Ocomm_buf(OStencil._unified_buffer_size);
SimpleCompressor<vobj> compress;
EStencil.HaloExchange(EFoo,Ecomm_buf,compress);
OStencil.HaloExchange(OFoo,Ocomm_buf,compress);
Bar = Cshift(Foo,dir,disp);
if ( disp & 0x1 ) {
ECheck.checkerboard = Even;
OCheck.checkerboard = Odd;
} else {
ECheck.checkerboard = Odd;
OCheck.checkerboard = Even;
}
// Implement a stencil code that should agree with that darn cshift!
for(int i=0;i<OCheck._grid->oSites();i++){
int permute_type;
StencilEntry *SE;
SE = EStencil.GetEntry(permute_type,0,i);
std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
if ( SE->_is_local && SE->_permute )
permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type);
else if (SE->_is_local)
OCheck._odata[i] = EFoo._odata[SE->_offset];
else
OCheck._odata[i] = Ecomm_buf[SE->_offset];
}
for(int i=0;i<ECheck._grid->oSites();i++){
int permute_type;
StencilEntry *SE;
SE = OStencil.GetEntry(permute_type,0,i);
std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
if ( SE->_is_local && SE->_permute )
permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type);
else if (SE->_is_local)
ECheck._odata[i] = OFoo._odata[SE->_offset];
else
ECheck._odata[i] = Ocomm_buf[SE->_offset];
}
setCheckerboard(Check,ECheck);
setCheckerboard(Check,OCheck);
Real nrmC = norm2(Check);
Real nrmB = norm2(Bar);
Diff = Check-Bar;
Real nrm = norm2(Diff);
std::cout<<GridLogMessage<<"RB N2diff ="<<nrm<<" "<<nrmC<<" " <<nrmB<<std::endl;
std::vector<int> coor(4);
for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
@ -85,33 +211,22 @@ int main (int argc, char ** argv)
for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
Complex diff;
ColourMatrix check,bar;
RealD diff;
sobj check,bar;
peekSite(check,Check,coor);
peekSite(bar,Bar,coor);
for(int r=0;r<3;r++){
for(int c=0;c<3;c++){
diff =check()()(r,c)-bar()()(r,c);
double nn=real(conjugate(diff)*diff);
if ( nn > 0){
printf("Coor (%d %d %d %d) \t rc %d%d \t %le (%le,%le) %le\n",
coor[0],coor[1],coor[2],coor[3],r,c,
nn,
real(check()()(r,c)),
imag(check()()(r,c)),
real(bar()()(r,c))
);
}
snrmC=snrmC+real(conjugate(check()()(r,c))*check()()(r,c));
snrmB=snrmB+real(conjugate(bar()()(r,c))*bar()()(r,c));
snrm=snrm+nn;
}}
sobj ddiff;
ddiff = check -bar;
diff =norm2(ddiff);
if ( diff > 0){
std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3] <<") "
<<"shift "<<disp<<" dir "<< dir
<< " stencil impl " <<check<<" vs cshift impl "<<bar<<std::endl;
}
}}}}
std::cout<<GridLogMessage<<"scalar N2diff = "<<snrm<<" " <<snrmC<<" "<<snrmB<<std::endl;
}
}