mirror of
https://github.com/paboyle/Grid.git
synced 2025-07-26 09:17:08 +01:00
Merge branch 'master' of github.com:paboyle/Grid
This commit is contained in:
@@ -124,6 +124,7 @@ namespace Grid {
|
||||
if ( comm_dim ) {
|
||||
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
|
||||
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
||||
// std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
if (splice_dim) {
|
||||
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
||||
@@ -164,23 +165,23 @@ namespace Grid {
|
||||
assert(comm_dim==1);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
|
||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||
|
||||
|
||||
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
|
||||
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
|
||||
|
||||
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
|
||||
if (comm_proc) {
|
||||
|
||||
int words = send_buf.size();
|
||||
int words = buffer_size;
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
|
||||
int bytes = words * sizeof(cobj);
|
||||
@@ -201,10 +202,11 @@ namespace Grid {
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
for(int i=0;i<buffer_size;i++){
|
||||
for(int i=0;i<words;i++){
|
||||
u_comm_buf[u_comm_offset+i]=recv_buf[i];
|
||||
// std::cout << " Halo["<<i<<"] snd "<<send_buf[i]<< " rcv "<<recv_buf[i]<<" mask 0x"<<cbmask<<std::endl;
|
||||
}
|
||||
u_comm_offset+=buffer_size;
|
||||
u_comm_offset+=words;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -241,6 +243,7 @@ namespace Grid {
|
||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||
int words = sizeof(cobj)/sizeof(vector_type);
|
||||
|
||||
assert(cbmask==0x3); // Fixme think there is a latent bug if not true
|
||||
/*
|
||||
* possibly slow to allocate
|
||||
* Doesn't matter in this test, but may want to preallocate in the
|
||||
|
@@ -29,14 +29,15 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
int bo=0;
|
||||
//PARALLEL_NESTED_LOOP21
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs._grid->_slice_stride[dimension];
|
||||
int bo = n*rhs._grid->_slice_block[dimension];
|
||||
// int bo = n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb &cbmask ) {
|
||||
buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
||||
buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -59,7 +60,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
//PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
@@ -109,14 +110,15 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
int bo=0;
|
||||
//PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int bo =n*rhs._grid->_slice_block[dimension];
|
||||
// int bo =n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
rhs._odata[so+o+b]=buffer[bo+b];
|
||||
rhs._odata[so+o+b]=buffer[bo++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
Lattice<vobj> ret(rhs._grid);
|
||||
Lattice<vobj> ret(rhs._grid);
|
||||
|
||||
int fd = rhs._grid->_fdimensions[dimension];
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
@@ -26,10 +26,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
||||
|
||||
|
||||
if ( !comm_dim ) {
|
||||
// std::cout << "Cshift_local" <<std::endl;
|
||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||
} else if ( splice_dim ) {
|
||||
// std::cout << "Cshift_comms_simd" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||
} else {
|
||||
// std::cout << "Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift);
|
||||
}
|
||||
return ret;
|
||||
@@ -42,9 +45,13 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
// std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
// std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
@@ -113,12 +120,16 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
||||
int xmit_to_rank;
|
||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
|
||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
// for(int i=0;i<words;i++){
|
||||
// std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
|
||||
// }
|
||||
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
|
||||
}
|
||||
}
|
||||
|
@@ -82,7 +82,8 @@ namespace QCD {
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
out = (4.0+mass)*in;
|
||||
typename FermionField::scalar_type scal(4.0+mass);
|
||||
out = scal*in;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
|
@@ -78,7 +78,7 @@ void WilsonKernels<Impl>::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
|
||||
accumReconXm(result,Uchi);
|
||||
|
||||
|
||||
// Ym
|
||||
SE=st.GetEntry(ptype,Ym,sF);
|
||||
if ( SE->_is_local && SE->_permute ) {
|
||||
|
@@ -524,16 +524,22 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g
|
||||
// reunitarise??
|
||||
static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrix &out,double scale=1.0){
|
||||
GridBase *grid = out._grid;
|
||||
|
||||
LatticeComplex ca (grid);
|
||||
LatticeMatrix lie(grid);
|
||||
LatticeMatrix la (grid);
|
||||
Complex ci(0.0,scale);
|
||||
Complex cone(1.0,0.0);
|
||||
Matrix ta;
|
||||
|
||||
lie=zero;
|
||||
for(int a=0;a<generators();a++){
|
||||
|
||||
random(pRNG,ca); ca=real(ca)-0.5;
|
||||
random(pRNG,ca);
|
||||
|
||||
ca = (ca+conjugate(ca))*0.5;
|
||||
ca = ca - 0.5;
|
||||
|
||||
generator(a,ta);
|
||||
|
||||
la=ci*ca*ta;
|
||||
|
@@ -9,10 +9,10 @@
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#ifndef KNC_ONLY_STORES
|
||||
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
|
||||
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
|
||||
#endif
|
||||
//#ifndef KNC_ONLY_STORES
|
||||
//#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
|
||||
//#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
|
||||
//#endif
|
||||
|
||||
|
||||
namespace Optimization {
|
||||
|
@@ -8,7 +8,7 @@ namespace Grid {
|
||||
int checkerboard,
|
||||
const std::vector<int> &directions,
|
||||
const std::vector<int> &distances)
|
||||
: _entries(npoints), _permute_type(npoints)
|
||||
: _entries(npoints), _permute_type(npoints), _comm_buf_size(npoints)
|
||||
{
|
||||
_npoints = npoints;
|
||||
_grid = grid;
|
||||
@@ -61,11 +61,17 @@ namespace Grid {
|
||||
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
Comms(point,dimension,shift,0x3);
|
||||
// std::cout<<"Comms 0x3"<<std::endl;
|
||||
} else {
|
||||
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
|
||||
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
|
||||
}
|
||||
}
|
||||
// for(int ss=0;ss<osites;ss++){
|
||||
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
|
||||
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,13 +145,14 @@ namespace Grid {
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int offnode = (((x+sshift)%fd) >= rd );
|
||||
// int comm_proc = ((x+sshift)/ld)%pd;
|
||||
// int offnode = (comm_proc!=0);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
int offnode = (comm_proc!= 0);
|
||||
|
||||
// std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
|
||||
int wraparound=0;
|
||||
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
|
||||
wraparound = 1;
|
||||
@@ -249,7 +256,7 @@ namespace Grid {
|
||||
int so = plane*_grid->_ostride[dimension]; // base offset for start of plane
|
||||
int o = 0; // relative offset to base within plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
|
||||
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<_grid->_slice_block[dimension];b++){
|
||||
|
||||
|
Reference in New Issue
Block a user