From e2f73e3ead5cbc29387afba8d37426b0cd02c47d Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 10 Feb 2016 16:50:32 -0800 Subject: [PATCH] Updates for shmem --- configure.ac | 5 + lib/communicator/Communicator_base.h | 3 + lib/communicator/Communicator_shmem.cc | 194 ++++++++++++++++++++++ lib/qcd/action/fermion/WilsonFermion5D.cc | 2 - 4 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 lib/communicator/Communicator_shmem.cc diff --git a/configure.ac b/configure.ac index d25d1674..b911e296 100644 --- a/configure.ac +++ b/configure.ac @@ -178,11 +178,16 @@ case ${ac_COMMS} in echo Configuring for MPI communications AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) ;; + shmem) + echo Configuring for SHMEM communications + AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_SHMEM] ) + ;; *) AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); ;; esac +AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ]) AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ]) AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ]) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 3e3eaf9c..0eaa2daa 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -34,6 +34,9 @@ Author: Peter Boyle #ifdef GRID_COMMS_MPI #include #endif +#ifdef GRID_COMMS_SHMEM +#include +#endif namespace Grid { class CartesianCommunicator { public: diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc new file mode 100644 index 00000000..0edbb4cb --- /dev/null +++ b/lib/communicator/Communicator_shmem.cc @@ -0,0 +1,194 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/Communicator_shmem.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include "Grid.h" +#include + +namespace Grid { + + // Should error check all MPI calls. + +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +{ + _ndimension = processors.size(); + std::vector periodic(_ndimension,1); + + _Nprocessors=1; + _processors = processors; + _processor_coor.resize(_ndimension); + + shmem_init_thread(SHMEM_THREAD_FUNNELED); + _processor = shmem_my_pe(); + + CoorFromIndex(_processor_coor,_processor,_processors); + + for(int i=0;i<_ndimension;i++){ + _Nprocessors*=_processors[i]; + } + + int Size = shmem_n_pes(); + assert(Size==_Nprocessors); +} + +void CartesianCommunicator::GlobalSum(uint32_t &u){ + static long long source = (long long) u; + static long long dest = 0 ; + static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + + // int nreduce=1; + // int pestart=0; + // int logStride=0; + shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); + u = dest; +} +void CartesianCommunicator::GlobalSum(float &f){ + static float source = u; + static float dest = 0 ; + static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + + shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); + u = dest; +} +void CartesianCommunicator::GlobalSumVector(float *f,int N) +{ + static float source ; + static float dest = 0 ; + static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + + for(int i=0;i coor = _processor_coor; + assert(std::abs(shift) <_processors[dim]); + + coor[dim] = (coor[dim] + shift + _processors[dim])%_processors[dim]; + IndexFromCoor(coor,source,_processors); + + coor[dim] = (coor[dim] - shift + _processors[dim])%_processors[dim]; + IndexFromCoor(coor,dest,_processors); + +} +int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) +{ + int rank; + IndexFromCoor(coor,rank,_processors); + return rank; +} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) +{ + CoorFromIndex(coor,rank,_processors); +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFrom(void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + std::vector reqs(0); + SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); + SendToRecvFromComplete(reqs); +} +void CartesianCommunicator::RecvFrom(void *recv, + int from, + int bytes) +{ + // Need to change interface to know send buffer; change to a get/put interface. + assert(0); +} +void CartesianCommunicator::SendTo(void *xmit, + int dest, + int bytes) +{ + // Need to change interface to know destination buffer... likely needed for I/O + assert(0); +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + shmem_putmem_nb(recv,xmit,bytes,dest,NULL); +} +void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) +{ + shmem_quiet(); // I'm done + shmem_barrier_all();// He's done too +} +void CartesianCommunicator::Barrier(void) +{ + shmem_barrier_all(); +} +void CartesianCommunicator::Broadcast(int root,void* data, int bytes) +{ + static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + assert( (bytes % 4)==0); + shmem_broadcast32(data,data,bytes/4,root,0,0,_Nprocessors,psync); +} +void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) +{ + Broadcast(root,data,bytes); +} + +} + diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index c9982e4a..57e51d67 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -427,8 +427,6 @@ void WilsonFermion5D::DhopInternalCommsOverlapCompute(StencilImpl & st, Le // assert((dag==DaggerNo) ||(dag==DaggerYes)); alltime-=usecond(); - int calls; - int updates; Compressor compressor(dag); // Assume balanced KMP_AFFINITY; this is forced in GridThread.h