mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
Merge branch 'bgq-asm-shmemfixes' into feature/bgq-asm
This commit is contained in:
commit
8b99d80d8c
@ -26,6 +26,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
#include <Grid/GridQCDcore.h>
|
||||||
|
#include <Grid/qcd/action/ActionCore.h>
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <mpp/shmem.h>
|
#include <mpp/shmem.h>
|
||||||
|
#include <array>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -51,7 +52,7 @@ typedef struct HandShake_t {
|
|||||||
} HandShake;
|
} HandShake;
|
||||||
|
|
||||||
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) {
|
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) {
|
||||||
array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
|
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
|
||||||
ret.fill(SHMEM_SYNC_VALUE);
|
ret.fill(SHMEM_SYNC_VALUE);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -109,7 +110,7 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){
|
|||||||
|
|
||||||
source = u;
|
source = u;
|
||||||
dest = 0;
|
dest = 0;
|
||||||
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
shmem_barrier_all(); // necessary?
|
shmem_barrier_all(); // necessary?
|
||||||
u = dest;
|
u = dest;
|
||||||
}
|
}
|
||||||
@ -125,7 +126,7 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
|
|||||||
|
|
||||||
source = u;
|
source = u;
|
||||||
dest = 0;
|
dest = 0;
|
||||||
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
shmem_barrier_all(); // necessary?
|
shmem_barrier_all(); // necessary?
|
||||||
u = dest;
|
u = dest;
|
||||||
}
|
}
|
||||||
@ -137,7 +138,8 @@ void CartesianCommunicator::GlobalSum(float &f){
|
|||||||
|
|
||||||
source = f;
|
source = f;
|
||||||
dest =0.0;
|
dest =0.0;
|
||||||
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
|
shmem_barrier_all();
|
||||||
f = dest;
|
f = dest;
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||||
@ -148,14 +150,16 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
|||||||
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
if ( shmem_addr_accessible(f,_processor) ){
|
if ( shmem_addr_accessible(f,_processor) ){
|
||||||
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
|
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
|
shmem_barrier_all();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
dest =0.0;
|
dest =0.0;
|
||||||
source = f[i];
|
source = f[i];
|
||||||
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
|
shmem_barrier_all();
|
||||||
f[i] = dest;
|
f[i] = dest;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -168,7 +172,8 @@ void CartesianCommunicator::GlobalSum(double &d)
|
|||||||
|
|
||||||
source = d;
|
source = d;
|
||||||
dest = 0;
|
dest = 0;
|
||||||
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
|
shmem_barrier_all();
|
||||||
d = dest;
|
d = dest;
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||||
@ -180,14 +185,16 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
|||||||
|
|
||||||
|
|
||||||
if ( shmem_addr_accessible(d,_processor) ){
|
if ( shmem_addr_accessible(d,_processor) ){
|
||||||
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
|
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
|
shmem_barrier_all();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
source = d[i];
|
source = d[i];
|
||||||
dest =0.0;
|
dest =0.0;
|
||||||
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
|
||||||
|
shmem_barrier_all();
|
||||||
d[i] = dest;
|
d[i] = dest;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -282,11 +289,13 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
|||||||
SHMEM_VET(recv);
|
SHMEM_VET(recv);
|
||||||
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
|
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
|
||||||
shmem_putmem(recv,xmit,bytes,dest);
|
shmem_putmem(recv,xmit,bytes,dest);
|
||||||
|
|
||||||
|
if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all();
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
{
|
{
|
||||||
// shmem_quiet(); // I'm done
|
// shmem_quiet(); // I'm done
|
||||||
shmem_barrier_all();// He's done too
|
if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::Barrier(void)
|
void CartesianCommunicator::Barrier(void)
|
||||||
{
|
{
|
||||||
@ -301,13 +310,13 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
|||||||
int words = bytes/4;
|
int words = bytes/4;
|
||||||
|
|
||||||
if ( shmem_addr_accessible(data,_processor) ){
|
if ( shmem_addr_accessible(data,_processor) ){
|
||||||
shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync);
|
shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
word = array[w];
|
word = array[w];
|
||||||
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
|
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
|
||||||
if ( shmem_my_pe() != root ) {
|
if ( shmem_my_pe() != root ) {
|
||||||
array[w] = word;
|
array[w] = word;
|
||||||
}
|
}
|
||||||
@ -325,7 +334,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
|||||||
|
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
word = array[w];
|
word = array[w];
|
||||||
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
|
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
|
||||||
if ( shmem_my_pe() != root ) {
|
if ( shmem_my_pe() != root ) {
|
||||||
array[w]= word;
|
array[w]= word;
|
||||||
}
|
}
|
||||||
@ -333,5 +342,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int CartesianCommunicator::RankWorld(void){
|
||||||
|
return shmem_my_pe();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -229,7 +229,7 @@ namespace QCD {
|
|||||||
// }
|
// }
|
||||||
int face_idx=0;
|
int face_idx=0;
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
std::cout << " Optimised Dagger compress " <<std::endl;
|
// std::cout << " Optimised Dagger compress " <<std::endl;
|
||||||
this->HaloGatherDir(source,XpCompress,Xp,face_idx);
|
this->HaloGatherDir(source,XpCompress,Xp,face_idx);
|
||||||
this->HaloGatherDir(source,YpCompress,Yp,face_idx);
|
this->HaloGatherDir(source,YpCompress,Yp,face_idx);
|
||||||
this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
|
this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
|
||||||
|
@ -390,6 +390,9 @@ void Grid_finalize(void)
|
|||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
Grid_unquiesce_nodes();
|
Grid_unquiesce_nodes();
|
||||||
#endif
|
#endif
|
||||||
|
#if defined (GRID_COMMS_SHMEM)
|
||||||
|
shmem_finalize();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void * Grid_backtrace_buffer[_NBACKTRACE];
|
void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||||
|
Loading…
Reference in New Issue
Block a user