mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
COmms improvements
This commit is contained in:
@ -484,24 +484,26 @@ public:
|
||||
|
||||
int dag = compress.dag;
|
||||
int face_idx=0;
|
||||
#define vet_same_node(a,b) \
|
||||
{ auto tmp = b; }
|
||||
if ( dag ) {
|
||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx));
|
||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx));
|
||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
|
||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TpCompress,Tp,face_idx));
|
||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XmCompress,Xm,face_idx));
|
||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YmCompress,Ym,face_idx));
|
||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
|
||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TmCompress,Tm,face_idx));
|
||||
vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XpCompress,Xp,face_idx));
|
||||
vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YpCompress,Yp,face_idx));
|
||||
vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
|
||||
vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TpCompress,Tp,face_idx));
|
||||
vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XmCompress,Xm,face_idx));
|
||||
vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YmCompress,Ym,face_idx));
|
||||
vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
|
||||
vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TmCompress,Tm,face_idx));
|
||||
} else {
|
||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XmCompress,Xp,face_idx));
|
||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YmCompress,Yp,face_idx));
|
||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
|
||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TmCompress,Tp,face_idx));
|
||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XpCompress,Xm,face_idx));
|
||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YpCompress,Ym,face_idx));
|
||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
|
||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TpCompress,Tm,face_idx));
|
||||
vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XmCompress,Xp,face_idx));
|
||||
vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YmCompress,Yp,face_idx));
|
||||
vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
|
||||
vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TmCompress,Tp,face_idx));
|
||||
vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XpCompress,Xm,face_idx));
|
||||
vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YpCompress,Ym,face_idx));
|
||||
vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
|
||||
vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TpCompress,Tm,face_idx));
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
|
@ -439,6 +439,17 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
||||
int sF = ss*Ls; \
|
||||
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
|
||||
});
|
||||
#define ASM_CALL_SLICE(A) \
|
||||
auto grid = in.Grid() ; \
|
||||
int nt = grid->LocalDimensions()[4]; \
|
||||
int nxyz = Nsite/nt ; \
|
||||
for(int t=0;t<nt;t++){ \
|
||||
thread_for( sss, nxyz, { \
|
||||
int ss = t*nxyz+sss; \
|
||||
int sU = ss; \
|
||||
int sF = ss*Ls; \
|
||||
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
|
||||
});}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
|
@ -358,7 +358,7 @@ public:
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
//
|
||||
// fancy calculation for shm code
|
||||
inline int SameNode(int point) {
|
||||
|
||||
int dimension = this->_directions[point];
|
||||
@ -378,7 +378,7 @@ public:
|
||||
|
||||
int nbr_proc;
|
||||
if (displacement>0) nbr_proc = 1;
|
||||
else nbr_proc = pd-1;
|
||||
else nbr_proc = pd-1;
|
||||
|
||||
// FIXME this logic needs to be sorted for three link term
|
||||
// assert( (displacement==1) || (displacement==-1));
|
||||
|
@ -305,14 +305,14 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
}); \
|
||||
});
|
||||
|
||||
#define accelerator_barrier(dummy) { printf(" theGridAccelerator::wait()\n"); theGridAccelerator->wait(); }
|
||||
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); }
|
||||
|
||||
inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);};
|
||||
inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);};
|
||||
inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
|
||||
inline void acceleratorCopySynchronise(void) { printf(" theCopyAccelerator::wait()\n"); theCopyAccelerator->wait(); }
|
||||
inline void acceleratorCopySynchronise(void) { theCopyAccelerator->wait(); }
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes);}
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
|
Reference in New Issue
Block a user