1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-13 01:05:36 +00:00

Merge branch 'bgq-asm-shmemfixes' into feature/bgq-asm

This commit is contained in:
paboyle 2017-03-12 23:30:09 +00:00
commit 8b99d80d8c
4 changed files with 32 additions and 14 deletions

View File

@ -26,6 +26,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/ActionCore.h>
#include <mpi.h> #include <mpi.h>
namespace Grid { namespace Grid {

View File

@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#include <mpp/shmem.h> #include <mpp/shmem.h>
#include <array>
namespace Grid { namespace Grid {
@ -51,7 +52,7 @@ typedef struct HandShake_t {
} HandShake; } HandShake;
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) { std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) {
array<long,_SHMEM_REDUCE_SYNC_SIZE> ret; std::array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
ret.fill(SHMEM_SYNC_VALUE); ret.fill(SHMEM_SYNC_VALUE);
return ret; return ret;
} }
@ -109,7 +110,7 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){
source = u; source = u;
dest = 0; dest = 0;
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all(); // necessary? shmem_barrier_all(); // necessary?
u = dest; u = dest;
} }
@ -125,7 +126,7 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
source = u; source = u;
dest = 0; dest = 0;
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all(); // necessary? shmem_barrier_all(); // necessary?
u = dest; u = dest;
} }
@ -137,7 +138,8 @@ void CartesianCommunicator::GlobalSum(float &f){
source = f; source = f;
dest =0.0; dest =0.0;
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
f = dest; f = dest;
} }
void CartesianCommunicator::GlobalSumVector(float *f,int N) void CartesianCommunicator::GlobalSumVector(float *f,int N)
@ -148,14 +150,16 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N)
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init; static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
if ( shmem_addr_accessible(f,_processor) ){ if ( shmem_addr_accessible(f,_processor) ){
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync); shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
return; return;
} }
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
dest =0.0; dest =0.0;
source = f[i]; source = f[i];
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
f[i] = dest; f[i] = dest;
} }
} }
@ -168,7 +172,8 @@ void CartesianCommunicator::GlobalSum(double &d)
source = d; source = d;
dest = 0; dest = 0;
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
d = dest; d = dest;
} }
void CartesianCommunicator::GlobalSumVector(double *d,int N) void CartesianCommunicator::GlobalSumVector(double *d,int N)
@ -180,14 +185,16 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
if ( shmem_addr_accessible(d,_processor) ){ if ( shmem_addr_accessible(d,_processor) ){
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync); shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
return; return;
} }
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
source = d[i]; source = d[i];
dest =0.0; dest =0.0;
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
d[i] = dest; d[i] = dest;
} }
} }
@ -282,11 +289,13 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
SHMEM_VET(recv); SHMEM_VET(recv);
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL); // shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
shmem_putmem(recv,xmit,bytes,dest); shmem_putmem(recv,xmit,bytes,dest);
if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all();
} }
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
// shmem_quiet(); // I'm done // shmem_quiet(); // I'm done
shmem_barrier_all();// He's done too if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too
} }
void CartesianCommunicator::Barrier(void) void CartesianCommunicator::Barrier(void)
{ {
@ -301,13 +310,13 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
int words = bytes/4; int words = bytes/4;
if ( shmem_addr_accessible(data,_processor) ){ if ( shmem_addr_accessible(data,_processor) ){
shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync); shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data());
return; return;
} }
for(int w=0;w<words;w++){ for(int w=0;w<words;w++){
word = array[w]; word = array[w];
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync); shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
if ( shmem_my_pe() != root ) { if ( shmem_my_pe() != root ) {
array[w] = word; array[w] = word;
} }
@ -325,13 +334,17 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
for(int w=0;w<words;w++){ for(int w=0;w<words;w++){
word = array[w]; word = array[w];
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync); shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
if ( shmem_my_pe() != root ) { if ( shmem_my_pe() != root ) {
array[w]= word; array[w]= word;
} }
shmem_barrier_all(); shmem_barrier_all();
} }
} }
int CartesianCommunicator::RankWorld(void){
return shmem_my_pe();
}
} }

View File

@ -229,7 +229,7 @@ namespace QCD {
// } // }
int face_idx=0; int face_idx=0;
if ( dag ) { if ( dag ) {
std::cout << " Optimised Dagger compress " <<std::endl; // std::cout << " Optimised Dagger compress " <<std::endl;
this->HaloGatherDir(source,XpCompress,Xp,face_idx); this->HaloGatherDir(source,XpCompress,Xp,face_idx);
this->HaloGatherDir(source,YpCompress,Yp,face_idx); this->HaloGatherDir(source,YpCompress,Yp,face_idx);
this->HaloGatherDir(source,ZpCompress,Zp,face_idx); this->HaloGatherDir(source,ZpCompress,Zp,face_idx);

View File

@ -390,6 +390,9 @@ void Grid_finalize(void)
MPI_Finalize(); MPI_Finalize();
Grid_unquiesce_nodes(); Grid_unquiesce_nodes();
#endif #endif
#if defined (GRID_COMMS_SHMEM)
shmem_finalize();
#endif
} }
void * Grid_backtrace_buffer[_NBACKTRACE]; void * Grid_backtrace_buffer[_NBACKTRACE];