1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Shmem related fixes for shmem compile

This commit is contained in:
Peter Boyle 2016-02-11 07:37:39 -06:00
parent e2f73e3ead
commit 7f927a541c
14 changed files with 3411 additions and 4061 deletions

7309
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifdef HAVE_MM_MALLOC_H
#include <mm_malloc.h>
#endif
#ifdef GRID_COMMS_SHMEM
#include <mpp/shmem.h>
#endif
namespace Grid {
@ -74,19 +77,37 @@ public:
pointer allocate(size_type __n, const void* = 0)
{
#ifdef GRID_COMMS_SHMEM
static void * bcast;
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
shmem_barrier_all();
_Tp *ptr = (_Tp *) shmem_align(128,__n*sizeof(_Tp));
shmem_barrier_all();
bcast = (void *) _Tp;
shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
assert( bcast == (void *) _Tp);
#else
#ifdef HAVE_MM_MALLOC_H
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
#else
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
#endif
#endif
return ptr;
}
void deallocate(pointer __p, size_type) {
#ifdef GRID_COMMS_SHMEM
shmem_free((void *)__p);
#else
#ifdef HAVE_MM_MALLOC_H
_mm_free((void *)__p);
#else
free((void *)__p);
#endif
#endif
}
void construct(pointer __p, const _Tp& __val) { };

View File

@ -15,7 +15,7 @@
/* EMPTY_SIMD only for DEBUGGING */
#undef EMPTY_SIMD
/* GRID_COMMS_MPI */
/* GRID_COMMS_SHMEM */
#undef GRID_COMMS_MPI
/* GRID_COMMS_NONE */
@ -27,15 +27,6 @@
/* GRID_DEFAULT_PRECISION is SINGLE */
#undef GRID_DEFAULT_PRECISION_SINGLE
/* Support Altivec instructions */
#undef HAVE_ALTIVEC
/* Support AVX (Advanced Vector Extensions) instructions */
#undef HAVE_AVX
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
#undef HAVE_AVX2
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
don't. */
#undef HAVE_DECL_BE64TOH
@ -50,9 +41,6 @@
/* Define to 1 if you have the <execinfo.h> header file. */
#undef HAVE_EXECINFO_H
/* Support FMA3 (Fused Multiply-Add) instructions */
#undef HAVE_FMA
/* Define to 1 if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
@ -71,30 +59,9 @@
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Support mmx instructions */
#undef HAVE_MMX
/* Define to 1 if you have the <mm_malloc.h> header file. */
#undef HAVE_MM_MALLOC_H
/* Support SSE (Streaming SIMD Extensions) instructions */
#undef HAVE_SSE
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
#undef HAVE_SSE2
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
#undef HAVE_SSE3
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
#undef HAVE_SSE4_1
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
#undef HAVE_SSE4_2
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
#undef HAVE_SSSE3
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
@ -137,9 +104,6 @@
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION

View File

@ -66,6 +66,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <AlignedAllocator.h>
#include <Simd.h>
#include <Threads.h>
#include <Lexicographic.h>
#include <Communicator.h>
#include <Cartesian.h>
#include <Tensors.h>

View File

@ -6,6 +6,10 @@ if BUILD_COMMS_MPI
extra_sources+=communicator/Communicator_mpi.cc
endif
if BUILD_COMMS_SHMEM
extra_sources+=communicator/Communicator_shmem.cc
endif
if BUILD_COMMS_NONE
extra_sources+=communicator/Communicator_none.cc
endif

View File

@ -151,7 +151,6 @@ namespace Grid {
//PARALLEL_NESTED_LOOP2
for(int i=0;i<Mergers.size();i++){
spintime-=usecond();
int packet_id = Mergers[i].packet_id;
while(! Packets[packet_id].done ); // spin for completion

View File

@ -115,27 +115,11 @@ public:
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d];
return idx;
}
static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
int nd= dims.size();
coor.resize(nd);
for(int d=0;d<nd;d++){
coor[d] = index % dims[d];
index = index / dims[d];
}
}
inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
CoorFromIndex(coor,Oindex,_rdimensions);
}
static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
int nd=dims.size();
int stride=1;
index=0;
for(int d=0;d<nd;d++){
index = index+stride*coor[d];
stride=stride*dims[d];
}
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
}
//////////////////////////////////////////////////////////
// SIMD lane addressing
//////////////////////////////////////////////////////////
@ -147,7 +131,7 @@ public:
}
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
{
CoorFromIndex(coor,lane,_simd_layout);
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
}
inline int PermuteDim(int dimension){
return _simd_layout[dimension]>1;
@ -179,7 +163,7 @@ public:
// Global addressing
////////////////////////////////////////////////////////////////
void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
CoorFromIndex(gcoor,gidx,_gdimensions);
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
}
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0;

View File

@ -44,12 +44,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
shmem_init_thread(SHMEM_THREAD_FUNNELED);
_processor = shmem_my_pe();
CoorFromIndex(_processor_coor,_processor,_processors);
Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors);
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
if ( _processor == 0 ) {
printf("I'm running SHMEM communications %d \n",_processor);
}
int Size = shmem_n_pes();
assert(Size==_Nprocessors);
}
@ -67,13 +69,13 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){
u = dest;
}
void CartesianCommunicator::GlobalSum(float &f){
static float source = u;
static float source = f;
static float dest = 0 ;
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
u = dest;
f = dest;
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
@ -83,20 +85,20 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N)
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
for(int i=0;i<N;i++){
source = u[i];
source = f[i];
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
u[i] = dest;
f[i] = dest;
}
}
void CartesianCommunicator::GlobalSum(double &d)
{
static double source = u;
static double source = d;
static double dest = 0 ;
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
u = dest;
d = dest;
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
@ -106,9 +108,9 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
for(int i=0;i<N;i++){
source = u[i];
source = d[i];
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
u[i] = dest;
d[i] = dest;
}
}
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
@ -117,21 +119,21 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest
assert(std::abs(shift) <_processors[dim]);
coor[dim] = (coor[dim] + shift + _processors[dim])%_processors[dim];
IndexFromCoor(coor,source,_processors);
Lexicographic::IndexFromCoor(coor,source,_processors);
coor[dim] = (coor[dim] - shift + _processors[dim])%_processors[dim];
IndexFromCoor(coor,dest,_processors);
Lexicographic::IndexFromCoor(coor,dest,_processors);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
IndexFromCoor(coor,rank,_processors);
Lexicographic::IndexFromCoor(coor,rank,_processors);
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
CoorFromIndex(coor,rank,_processors);
Lexicographic::CoorFromIndex(coor,rank,_processors);
}
// Basic Halo comms primitive
@ -187,7 +189,9 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
Broadcast(root,data,bytes);
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
assert( (bytes % 4)==0);
shmem_broadcast32(data,data,bytes/4,root,0,0,shmem_n_pes(),psync);
}
}

View File

@ -178,7 +178,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
// sum over reduced dimension planes, breaking out orthog dir
for(int ss=0;ss<grid->oSites();ss++){
GridBase::CoorFromIndex(coor,ss,grid->_rdimensions);
Lexicographic::CoorFromIndex(coor,ss,grid->_rdimensions);
int r = coor[orthogdim];
lvSum[r]=lvSum[r]+Data._odata[ss];
}

View File

@ -115,9 +115,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
int sc;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
@ -160,9 +160,9 @@ PARALLEL_FOR_LOOP
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
// z = A x + y
fineZ._odata[sf]=coarseA._odata[sc]*fineX._odata[sf]+fineY._odata[sf];
@ -225,9 +225,9 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
@ -311,9 +311,9 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];

View File

@ -146,7 +146,7 @@ class BinaryIO {
csum = 0;
std::vector<int> lcoor;
for(int l=0;l<grid->lSites();l++){
grid->CoorFromIndex(lcoor,l,grid->_ldimensions);
Lexicographic::CoorFromIndex(lcoor,l,grid->_ldimensions);
peekLocalSite(siteObj,lat,lcoor);
munge(siteObj,fileObj,csum);
}
@ -451,7 +451,7 @@ class BinaryIO {
std::vector<int> lsite(nd);
std::vector<int> iosite(nd);
grid->CoorFromIndex(tsite,tlex,range);
Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
@ -588,7 +588,7 @@ class BinaryIO {
std::vector<int> lsite(nd);
std::vector<int> iosite(nd);
grid->CoorFromIndex(tsite,tlex,range);
Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site

View File

@ -105,7 +105,7 @@ void LebesgueOrder::IterateI(int ND,
x[d]=xi[d]+xo[d];
}
IndexInteger index;
grid->IndexFromCoor(x,index,grid->_rdimensions);
Lexicographic::IndexFromCoor(x,index,grid->_rdimensions);
_LebesgueReorder.push_back(index);
}
}

View File

@ -96,13 +96,13 @@ int main (int argc, char ** argv)
std::vector<int> peer(4);
Complex tmp =cm;
Integer index=real(tmp);
Fine.CoorFromIndex(peer,index,latt_size);
Lexicographic::CoorFromIndex(peer,index,latt_size);
if (nrm > 0){
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
Lexicographic::CoorFromIndex(peer,index,latt_size);
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
}
}}}}

View File

@ -132,7 +132,7 @@ int main (int argc, char ** argv)
std::vector<int> peer(4);
Complex ctmp = cm;
Integer index=real(ctmp);
Fine.CoorFromIndex(peer,index,latt_size);
Lexicographic::CoorFromIndex(peer,index,latt_size);
if (nrm > 0){
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
@ -140,7 +140,7 @@ int main (int argc, char ** argv)
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
Lexicographic::CoorFromIndex(peer,index,latt_size);
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exit(-1);
}
@ -180,7 +180,7 @@ int main (int argc, char ** argv)
std::vector<int> peer(4);
Complex ctmp=cmeo;
Integer index=real(ctmp);
Fine.CoorFromIndex(peer,index,latt_size);
Lexicographic::CoorFromIndex(peer,index,latt_size);
double nrm = abs(cmeo()()()-scm);
if (nrm != 0) {
@ -189,7 +189,7 @@ int main (int argc, char ** argv)
<< cmeo()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
Lexicographic::CoorFromIndex(peer,index,latt_size);
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exx=1;
@ -205,7 +205,7 @@ int main (int argc, char ** argv)
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
index=real(scm);
Fine.CoorFromIndex(peer,index,latt_size);
Lexicographic::CoorFromIndex(peer,index,latt_size);
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
exx=1;
} else if (1) {