mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Reworking CSHIFT and Stencil. Implementing Wilson and discovered rework is required
This commit is contained in:
parent
94f728bee4
commit
f159495a9d
4
TODO
4
TODO
@ -2,6 +2,10 @@
|
||||
- use protocol buffers? replace xmlReader/Writer ec..
|
||||
- Binary use htonll, htonl
|
||||
|
||||
* Stencil -- do the permute for locally permuted in halo exchange.
|
||||
- BUG cshift mpi; the "s" indexing is weird in the Cshift_comms_simd
|
||||
as simd_layout or not is confusing
|
||||
|
||||
* Reduce implemention is poor
|
||||
* Bug in SeedFixedIntegers gives same output on each site.
|
||||
* Bug in RNG with complex numbers ; only filling real values; need helper function -- DONE
|
||||
|
@ -2,7 +2,7 @@
|
||||
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* AVX */
|
||||
/* #undef AVX1 */
|
||||
#define AVX1 1
|
||||
|
||||
/* AVX2 */
|
||||
/* #undef AVX2 */
|
||||
@ -77,7 +77,7 @@
|
||||
#define PACKAGE_VERSION "1.0"
|
||||
|
||||
/* SSE4 */
|
||||
#define SSE4 1
|
||||
/* #undef SSE4 */
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
@ -52,7 +52,7 @@ namespace Grid {
|
||||
// Gather for when there is no need to SIMD split with compression
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class cobj,class compressor> void
|
||||
Gather_plane_simple (Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
|
||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -97,7 +97,7 @@ Gather_plane_simple (Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj>
|
||||
// Gather for when there *is* need to SIMD split with compression
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class cobj,class vobj,class compressor> void
|
||||
Gather_plane_extract(Lattice<vobj> &rhs,std::vector<typename cobj::scalar_type *> pointers,int dimension,int plane,int cbmask,compressor &compress)
|
||||
Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_type *> pointers,int dimension,int plane,int cbmask,compressor &compress)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -182,7 +182,7 @@ template<class cobj,class vobj,class compressor> void
|
||||
// Could allow a functional munging of the halo to another type during the comms.
|
||||
// this could implement the 16bit/32bit/64bit compression.
|
||||
template<class vobj,class cobj, class compressor> void
|
||||
HaloExchange(Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
|
||||
HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
|
||||
{
|
||||
// conformable(source._grid,_grid);
|
||||
assert(source._grid==_grid);
|
||||
@ -237,7 +237,7 @@ template<class cobj,class vobj,class compressor> void
|
||||
}
|
||||
|
||||
template<class vobj,class cobj, class compressor>
|
||||
void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||
void GatherStartComms(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||
int &u_comm_offset,compressor & compress)
|
||||
{
|
||||
@ -250,6 +250,7 @@ template<class cobj,class vobj,class compressor> void
|
||||
|
||||
int fd = _grid->_fdimensions[dimension];
|
||||
int rd = _grid->_rdimensions[dimension];
|
||||
int pd = _grid->_processors[dimension];
|
||||
int simd_layout = _grid->_simd_layout[dimension];
|
||||
int comm_dim = _grid->_processors[dimension] >1 ;
|
||||
assert(simd_layout==1);
|
||||
@ -267,11 +268,10 @@ template<class cobj,class vobj,class compressor> void
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int offnode = ( x+sshift >= rd );
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = (x+sshift)/rd;
|
||||
|
||||
if (offnode) {
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
|
||||
if (comm_proc) {
|
||||
|
||||
int words = send_buf.size();
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
@ -284,7 +284,8 @@ template<class cobj,class vobj,class compressor> void
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
assert (xmit_to_rank != _grid->ThisRank());
|
||||
assert (recv_from_rank != _grid->ThisRank());
|
||||
// FIXME Implement asynchronous send & also avoid buffer copy
|
||||
_grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
@ -293,7 +294,11 @@ template<class cobj,class vobj,class compressor> void
|
||||
bytes);
|
||||
printf("GatherStartComms communicated offnode x %d\n",x);fflush(stdout);
|
||||
|
||||
printf("GatherStartComms inserting %d buf size %d\n",u_comm_offset,buffer_size);fflush(stdout);
|
||||
printf("GatherStartComms inserting %le to u_comm_offset %d buf size %d for dim %d shift %d\n",
|
||||
*( (RealF *) &recv_buf[0]),
|
||||
u_comm_offset,buffer_size,
|
||||
dimension,shift
|
||||
); fflush(stdout);
|
||||
for(int i=0;i<buffer_size;i++){
|
||||
u_comm_buf[u_comm_offset+i]=recv_buf[i];
|
||||
}
|
||||
@ -305,17 +310,19 @@ template<class cobj,class vobj,class compressor> void
|
||||
|
||||
|
||||
template<class vobj,class cobj, class compressor>
|
||||
void GatherStartCommsSimd(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||
void GatherStartCommsSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||
int &u_comm_offset,compressor &compress)
|
||||
{
|
||||
const int Nsimd = _grid->Nsimd();
|
||||
|
||||
typedef typename cobj::vector_type vector_type;
|
||||
typedef typename cobj::scalar_type scalar_type;
|
||||
|
||||
int fd = _grid->_fdimensions[dimension];
|
||||
int rd = _grid->_rdimensions[dimension];
|
||||
int ld = _grid->_ldimensions[dimension];
|
||||
int pd = _grid->_processors[dimension];
|
||||
int simd_layout = _grid->_simd_layout[dimension];
|
||||
int comm_dim = _grid->_processors[dimension] >1 ;
|
||||
|
||||
@ -346,89 +353,62 @@ template<class cobj,class vobj,class compressor> void
|
||||
|
||||
int cb = (cbmask==0x2)? 1 : 0;
|
||||
int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
|
||||
|
||||
std::vector<int> comm_offnode(simd_layout);
|
||||
std::vector<int> comm_proc (simd_layout); //relative processor coord in dim=dimension
|
||||
std::vector<int> icoor(_grid->Nd());
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
// loop over outer coord planes orthog to dim
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int comm_any = 0;
|
||||
for(int s=0;s<simd_layout;s++) {
|
||||
int shifted_x = x+s*rd+sshift;
|
||||
comm_offnode[s] = shifted_x >= ld;
|
||||
comm_any = comm_any | comm_offnode[s];
|
||||
comm_proc[s] = shifted_x/ld;
|
||||
// FIXME call local permute copy if none are offnode.
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[i] = (scalar_type *)&send_buf_extract[i][0];
|
||||
}
|
||||
|
||||
int o = 0;
|
||||
int bo = x*_grid->_ostride[dimension];
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
if ( comm_any ) {
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[Nsimd-1-i] = (scalar_type *)&send_buf_extract[i][0];
|
||||
std::cout<< "Gathering "<< x <<std::endl;
|
||||
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
|
||||
std::cout<< "Gathered "<<std::endl;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
int ic= (i&inner_bit)? 1:0;
|
||||
|
||||
int my_coor = rd*ic + x;
|
||||
int nbr_coor = my_coor+sshift;
|
||||
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
|
||||
|
||||
int nbr_ic = (nbr_coor%ld)/rd; // inner coord of peer
|
||||
int nbr_ox = (nbr_coor%rd); // outer coord of peer
|
||||
int nbr_lane = (i&(~inner_bit));
|
||||
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
if (nbr_ic) nbr_lane|=inner_bit;
|
||||
assert (sx == nbr_ox);
|
||||
|
||||
if(nbr_proc){
|
||||
|
||||
std::cout<< "MPI sending "<<std::endl;
|
||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
_grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf_extract[i][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
std::cout<< "MPI complete "<<std::endl;
|
||||
|
||||
rpointers[i] = (scalar_type *)&recv_buf_extract[i][0];
|
||||
} else {
|
||||
rpointers[i] = (scalar_type *)&send_buf_extract[nbr_lane][0];
|
||||
}
|
||||
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int s;
|
||||
_grid->iCoorFromIindex(icoor,i);
|
||||
s = icoor[dimension];
|
||||
|
||||
if(comm_offnode[s]){
|
||||
|
||||
int rank = _grid->_processor;
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
_grid->ShiftedRanks(dimension,comm_proc[s],xmit_to_rank,recv_from_rank);
|
||||
|
||||
|
||||
_grid->SendToRecvFrom((void *)&send_buf_extract[i][0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf_extract[i][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
rpointers[i] = (scalar_type *)&recv_buf_extract[i][0];
|
||||
|
||||
} else {
|
||||
|
||||
rpointers[i] = (scalar_type *)&send_buf_extract[i][0];
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Permute by swizzling pointers in merge
|
||||
int permute_slice=0;
|
||||
int lshift=sshift%ld;
|
||||
int wrap =lshift/rd;
|
||||
int num =lshift%rd;
|
||||
|
||||
if ( x< rd-num ) permute_slice=wrap;
|
||||
else permute_slice = 1-wrap;
|
||||
|
||||
int toggle_bit = (Nsimd>>(permute_type+1));
|
||||
int PermuteMap;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
if ( permute_slice ) {
|
||||
PermuteMap=i^toggle_bit;
|
||||
pointers[Nsimd-1-i] = rpointers[PermuteMap];
|
||||
} else {
|
||||
pointers[Nsimd-1-i] = rpointers[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Here we don't want to scatter, just place into a buffer.
|
||||
for(int i=0;i<buffer_size;i++){
|
||||
merge(u_comm_buf[u_comm_offset+i],pointers);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Here we don't want to scatter, just place into a buffer.
|
||||
std::cout<< "merging "<<std::endl;
|
||||
for(int i=0;i<buffer_size;i++){
|
||||
merge(u_comm_buf[u_comm_offset+i],rpointers);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -98,6 +98,9 @@ public:
|
||||
index = index / dims[d];
|
||||
}
|
||||
}
|
||||
inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
|
||||
CoorFromIndex(coor,Oindex,_rdimensions);
|
||||
}
|
||||
static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
|
||||
int nd=dims.size();
|
||||
int stride=1;
|
||||
@ -107,9 +110,6 @@ public:
|
||||
stride=stride*dims[d];
|
||||
}
|
||||
}
|
||||
inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
|
||||
CoorFromIndex(coor,Oindex,_rdimensions);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// SIMD lane addressing
|
||||
|
@ -5,7 +5,7 @@ namespace Grid {
|
||||
//////////////////////////////////////////////////////
|
||||
// Gather for when there is no need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
||||
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -49,7 +49,7 @@ template<class vobj> void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vo
|
||||
//////////////////////////////////////////////////////
|
||||
// Gather for when there *is* need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class scalar_type> void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
|
||||
template<class vobj,class scalar_type> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
|
@ -79,6 +79,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int
|
||||
|
||||
int fd = rhs._grid->_fdimensions[dimension];
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
int pd = rhs._grid->_processors[dimension];
|
||||
int simd_layout = rhs._grid->_simd_layout[dimension];
|
||||
int comm_dim = rhs._grid->_processors[dimension] >1 ;
|
||||
assert(simd_layout==1);
|
||||
@ -95,11 +96,10 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int offnode = ( x+sshift >= rd );
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = (x+sshift)/rd;
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
|
||||
if (!offnode) {
|
||||
if (comm_proc==0) {
|
||||
|
||||
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
@ -138,6 +138,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &r
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
int rd = grid->_rdimensions[dimension];
|
||||
int ld = grid->_ldimensions[dimension];
|
||||
int pd = grid->_processors[dimension];
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
@ -164,101 +165,58 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &r
|
||||
///////////////////////////////////////////
|
||||
// Work out what to send where
|
||||
///////////////////////////////////////////
|
||||
|
||||
int cb = (cbmask==0x2)? 1 : 0;
|
||||
int sshift= grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
|
||||
|
||||
std::vector<int> comm_offnode(simd_layout);
|
||||
std::vector<int> comm_proc (simd_layout); //relative processor coord in dim=dimension
|
||||
std::vector<int> icoor(grid->Nd());
|
||||
|
||||
// loop over outer coord planes orthog to dim
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int comm_any = 0;
|
||||
for(int s=0;s<simd_layout;s++) {
|
||||
int shifted_x = x+s*rd+sshift;
|
||||
comm_offnode[s] = shifted_x >= ld;
|
||||
comm_any = comm_any | comm_offnode[s];
|
||||
comm_proc[s] = shifted_x/ld;
|
||||
// FIXME call local permute copy if none are offnode.
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[i] = (scalar_type *)&send_buf_extract[i][0];
|
||||
}
|
||||
|
||||
int o = 0;
|
||||
int bo = x*grid->_ostride[dimension];
|
||||
int sx = (x+sshift)%rd;
|
||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||
|
||||
if ( comm_any ) {
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
int ic= (i&inner_bit)? 1:0;
|
||||
|
||||
for(int i=0;i<Nsimd;i++){ // there is a reversal in extract merge
|
||||
pointers[i] = (scalar_type *)&send_buf_extract[Nsimd-1-i][0];
|
||||
int my_coor = rd*ic + x;
|
||||
int nbr_coor = my_coor+sshift;
|
||||
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
|
||||
|
||||
int nbr_ic = (nbr_coor%ld)/rd; // inner coord of peer
|
||||
int nbr_ox = (nbr_coor%rd); // outer coord of peer
|
||||
int nbr_lane = (i&(~inner_bit));
|
||||
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
if (nbr_ic) nbr_lane|=inner_bit;
|
||||
|
||||
assert (sx == nbr_ox);
|
||||
|
||||
if(nbr_proc){
|
||||
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf_extract[i][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
rpointers[i] = (scalar_type *)&recv_buf_extract[i][0];
|
||||
} else {
|
||||
rpointers[i] = (scalar_type *)&send_buf_extract[nbr_lane][0];
|
||||
}
|
||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
|
||||
int s;
|
||||
grid->iCoorFromIindex(icoor,i);
|
||||
s = icoor[dimension];
|
||||
|
||||
if(comm_offnode[s]){
|
||||
|
||||
int rank = grid->_processor;
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
grid->ShiftedRanks(dimension,comm_proc[s],xmit_to_rank,recv_from_rank);
|
||||
|
||||
|
||||
grid->SendToRecvFrom((void *)&send_buf_extract[i][0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf_extract[i][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
rpointers[i] = (scalar_type *)&recv_buf_extract[i][0];
|
||||
|
||||
} else {
|
||||
|
||||
rpointers[i] = (scalar_type *)&send_buf_extract[i][0];
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Permute by swizzling pointers in merge
|
||||
int permute_slice=0;
|
||||
int lshift=sshift%ld;
|
||||
int wrap =lshift/rd;
|
||||
int num =lshift%rd;
|
||||
|
||||
if ( x< rd-num ) permute_slice=wrap;
|
||||
else permute_slice = 1-wrap;
|
||||
|
||||
int toggle_bit = (Nsimd>>(permute_type+1));
|
||||
int PermuteMap;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
if ( permute_slice ) {
|
||||
PermuteMap=i^toggle_bit;
|
||||
pointers[Nsimd-1-i] = rpointers[PermuteMap];
|
||||
} else {
|
||||
pointers[Nsimd-1-i] = rpointers[i];
|
||||
}
|
||||
}
|
||||
|
||||
Scatter_plane_merge(ret,pointers,dimension,x,cbmask);
|
||||
|
||||
} else {
|
||||
|
||||
int permute_slice=0;
|
||||
int wrap = sshift/rd;
|
||||
int num = sshift%rd;
|
||||
if ( x< rd-num ) permute_slice=wrap;
|
||||
else permute_slice = 1-wrap;
|
||||
|
||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type);
|
||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
}
|
||||
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -22,8 +22,12 @@ const int WilsonMatrix::Tm = 7;
|
||||
public:
|
||||
int mu;
|
||||
|
||||
void Point(int p) { mu=p;};
|
||||
vHalfSpinColourVector operator () (vSpinColourVector &in)
|
||||
void Point(int p) {
|
||||
mu=p;
|
||||
std::cout << "WilsonCompressor.Point " << mu<<std::endl;
|
||||
};
|
||||
|
||||
vHalfSpinColourVector operator () (const vSpinColourVector &in)
|
||||
{
|
||||
vHalfSpinColourVector ret;
|
||||
switch(mu) {
|
||||
@ -90,7 +94,8 @@ void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
|
||||
|
||||
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
// Stencil.HaloExchange(in,comm_buf);
|
||||
WilsonCompressor compressor;
|
||||
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
||||
|
||||
for(int ss=0;ss<grid->oSites();ss++){
|
||||
|
||||
@ -100,10 +105,13 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
vHalfSpinColourVector chi;
|
||||
vHalfSpinColourVector Uchi;
|
||||
vHalfSpinColourVector *chi_p;
|
||||
|
||||
result=zero;
|
||||
|
||||
#if 0
|
||||
// Xp
|
||||
offset = Stencil._offsets [Xp][ss];
|
||||
local = Stencil._is_local[Xp][ss];
|
||||
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
spProjXp(chi,in._odata[offset]);
|
||||
@ -112,7 +120,6 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Xp)),&(*chi_p)());
|
||||
spReconXp(result,Uchi);
|
||||
|
||||
#if 0
|
||||
// Yp
|
||||
offset = Stencil._offsets [Yp][ss];
|
||||
local = Stencil._is_local[Yp][ss];
|
||||
@ -145,6 +152,7 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
}
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Tp)),&(*chi_p)());
|
||||
accumReconTp(result,Uchi);
|
||||
#endif
|
||||
|
||||
// Xm
|
||||
offset = Stencil._offsets [Xm][ss];
|
||||
@ -156,6 +164,7 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
}
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
|
||||
accumReconXm(result,Uchi);
|
||||
#if 0
|
||||
|
||||
// Ym
|
||||
offset = Stencil._offsets [Ym][ss];
|
||||
@ -190,6 +199,7 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Tm)),&(*chi_p)());
|
||||
accumReconTm(result,Uchi);
|
||||
#endif
|
||||
|
||||
out._odata[ss] = result;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Grid.h>
|
||||
#include <parallelIO/GridNerscIO.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
@ -10,7 +9,7 @@ int main (int argc, char ** argv)
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout({1,1,2,2});
|
||||
std::vector<int> mpi_layout ({2,2,2,2});
|
||||
std::vector<int> mpi_layout ({2,2,1,4});
|
||||
std::vector<int> latt_size ({8,8,8,16});
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
@ -71,11 +70,11 @@ int main (int argc, char ** argv)
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
cout<<"FAIL shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<endl;
|
||||
cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
}
|
||||
}}}}
|
||||
}
|
||||
|
@ -116,8 +116,6 @@ int main (int argc, char ** argv)
|
||||
// Insist that operations on random scalars gives
|
||||
// identical results to on vectors.
|
||||
|
||||
Tester<RealD,vRealD>(funcPlus());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing vComplexF "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
@ -9,7 +9,7 @@ class SimpleCompressor {
|
||||
public:
|
||||
void Point(int) {};
|
||||
|
||||
vobj operator() (vobj &arg) {
|
||||
vobj operator() (const vobj &arg) {
|
||||
return arg;
|
||||
}
|
||||
};
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include <Grid.h>
|
||||
#include <parallelIO/GridNerscIO.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
@ -10,13 +9,19 @@ struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout({1,1,2,2});
|
||||
std::vector<int> mpi_layout ({1,1,1,1});
|
||||
std::vector<int> mpi_layout ({2,1,1,2});
|
||||
std::vector<int> latt_size ({8,8,8,8});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
@ -29,19 +34,37 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeFermion ref(&Grid); ref=zero;
|
||||
LatticeFermion tmp(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = 1.0;
|
||||
pokeIndex<3>(Umu,U[mu],mu);
|
||||
// U[mu] = 1.0;
|
||||
// pokeIndex<3>(Umu,U[mu],mu);
|
||||
U[mu] = peekIndex<3>(Umu,mu);
|
||||
}
|
||||
|
||||
{
|
||||
int mu=0;
|
||||
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
||||
ref = src;
|
||||
ref = U[0]*Cshift(ref,0,1);
|
||||
std::vector<int> mask({0,0,0,0,1,0,0,0});
|
||||
{ // Naive wilson implementation
|
||||
ref = zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
||||
if( mask[mu] ) {
|
||||
tmp = U[mu]*Cshift(src,mu,1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
}
|
||||
|
||||
if( mask[mu+4] ){
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu,-1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
|
Loading…
Reference in New Issue
Block a user