1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-07-26 09:17:08 +01:00

Merge branch 'master' of github.com:paboyle/Grid

This commit is contained in:
Peter Boyle
2015-11-04 05:14:26 -06:00
13 changed files with 260 additions and 99 deletions

View File

@@ -124,6 +124,7 @@ namespace Grid {
if ( comm_dim ) {
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
// std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
if (splice_dim) {
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
@@ -164,23 +165,23 @@ namespace Grid {
assert(comm_dim==1);
assert(shift>=0);
assert(shift<fd);
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
int cb= (cbmask==0x2)? Odd : Even;
int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
if (comm_proc) {
int words = send_buf.size();
int words = buffer_size;
if (cbmask != 0x3) words=words>>1;
int bytes = words * sizeof(cobj);
@@ -201,10 +202,11 @@ namespace Grid {
recv_from_rank,
bytes);
for(int i=0;i<buffer_size;i++){
for(int i=0;i<words;i++){
u_comm_buf[u_comm_offset+i]=recv_buf[i];
// std::cout << " Halo["<<i<<"] snd "<<send_buf[i]<< " rcv "<<recv_buf[i]<<" mask 0x"<<cbmask<<std::endl;
}
u_comm_offset+=buffer_size;
u_comm_offset+=words;
}
}
}
@@ -241,6 +243,7 @@ namespace Grid {
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int words = sizeof(cobj)/sizeof(vector_type);
assert(cbmask==0x3); // Fixme think there is a latent bug if not true
/*
* possibly slow to allocate
* Doesn't matter in this test, but may want to preallocate in the

View File

@@ -29,14 +29,15 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
int bo=0;
//PARALLEL_NESTED_LOOP21
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int bo = n*rhs._grid->_slice_block[dimension];
// int bo = n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb &cbmask ) {
buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
}
}
}
@@ -59,7 +60,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
//PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
@@ -109,14 +110,15 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
int bo=0;
//PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
// int bo =n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
rhs._odata[so+o+b]=buffer[bo+b];
rhs._odata[so+o+b]=buffer[bo++];
}
}
}

View File

@@ -9,7 +9,7 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
Lattice<vobj> ret(rhs._grid);
Lattice<vobj> ret(rhs._grid);
int fd = rhs._grid->_fdimensions[dimension];
int rd = rhs._grid->_rdimensions[dimension];
@@ -26,10 +26,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
if ( !comm_dim ) {
// std::cout << "Cshift_local" <<std::endl;
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
} else if ( splice_dim ) {
// std::cout << "Cshift_comms_simd" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift);
} else {
// std::cout << "Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift);
}
return ret;
@@ -42,9 +45,13 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
// std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x3);
} else {
// std::cout << "Two pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
}
@@ -113,12 +120,16 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
int xmit_to_rank;
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
grid->SendToRecvFrom((void *)&send_buf[0],
xmit_to_rank,
(void *)&recv_buf[0],
recv_from_rank,
bytes);
// for(int i=0;i<words;i++){
// std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
// }
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
}
}

View File

@@ -82,7 +82,8 @@ namespace QCD {
template<class Impl>
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out = (4.0+mass)*in;
typename FermionField::scalar_type scal(4.0+mass);
out = scal*in;
}
template<class Impl>

View File

@@ -78,7 +78,7 @@ void WilsonKernels<Impl>::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel
}
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
accumReconXm(result,Uchi);
// Ym
SE=st.GetEntry(ptype,Ym,sF);
if ( SE->_is_local && SE->_permute ) {

View File

@@ -524,16 +524,22 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g
// reunitarise??
static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrix &out,double scale=1.0){
GridBase *grid = out._grid;
LatticeComplex ca (grid);
LatticeMatrix lie(grid);
LatticeMatrix la (grid);
Complex ci(0.0,scale);
Complex cone(1.0,0.0);
Matrix ta;
lie=zero;
for(int a=0;a<generators();a++){
random(pRNG,ca); ca=real(ca)-0.5;
random(pRNG,ca);
ca = (ca+conjugate(ca))*0.5;
ca = ca - 0.5;
generator(a,ta);
la=ci*ca*ta;

View File

@@ -9,10 +9,10 @@
#include <immintrin.h>
#ifndef KNC_ONLY_STORES
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
#endif
//#ifndef KNC_ONLY_STORES
//#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
//#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
//#endif
namespace Optimization {

View File

@@ -8,7 +8,7 @@ namespace Grid {
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances)
: _entries(npoints), _permute_type(npoints)
: _entries(npoints), _permute_type(npoints), _comm_buf_size(npoints)
{
_npoints = npoints;
_grid = grid;
@@ -61,11 +61,17 @@ namespace Grid {
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
Comms(point,dimension,shift,0x3);
// std::cout<<"Comms 0x3"<<std::endl;
} else {
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
}
}
// for(int ss=0;ss<osites;ss++){
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
// }
}
}
@@ -139,13 +145,14 @@ namespace Grid {
int cb= (cbmask==0x2)? Odd : Even;
int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){
int offnode = (((x+sshift)%fd) >= rd );
// int comm_proc = ((x+sshift)/ld)%pd;
// int offnode = (comm_proc!=0);
int sx = (x+sshift)%rd;
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
int offnode = (comm_proc!= 0);
// std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
int wraparound=0;
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
wraparound = 1;
@@ -249,7 +256,7 @@ namespace Grid {
int so = plane*_grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){