mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Partial dirichlet BCs
This commit is contained in:
parent
0db4f1803f
commit
e2e269e03b
@ -59,6 +59,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
||||
int off,std::vector<std::pair<int,int> > & table);
|
||||
|
||||
/*
|
||||
template<class vobj,class cobj,class compressor>
|
||||
void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so) __attribute__((noinline));
|
||||
|
||||
@ -103,6 +104,7 @@ void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,const La
|
||||
});
|
||||
rhs_v.ViewClose();
|
||||
}
|
||||
*/
|
||||
|
||||
struct StencilEntry {
|
||||
#ifdef GRID_CUDA
|
||||
@ -133,8 +135,18 @@ class CartesianStencilAccelerator {
|
||||
int _osites;
|
||||
StencilVector _directions;
|
||||
StencilVector _distances;
|
||||
StencilVector _comms_send;
|
||||
StencilVector _comms_recv;
|
||||
///////////////////////////////////////////////////
|
||||
// If true, this is FULLY communicated per face
|
||||
// Otherwise will either be full or partial dirichlet
|
||||
///////////////////////////////////////////////////
|
||||
StencilVector _comms_send;
|
||||
StencilVector _comms_recv; // this is FULLY communicated per face
|
||||
///////////////////////////////////////////////////
|
||||
// If true, this is partially communicated per face
|
||||
///////////////////////////////////////////////////
|
||||
StencilVector _comms_partial_send;
|
||||
StencilVector _comms_partial_recv;
|
||||
//
|
||||
StencilVector _comm_buf_size;
|
||||
StencilVector _permute_type;
|
||||
StencilVector same_node;
|
||||
@ -229,7 +241,8 @@ public:
|
||||
Integer from_rank;
|
||||
Integer do_send;
|
||||
Integer do_recv;
|
||||
Integer bytes;
|
||||
Integer xbytes;
|
||||
Integer rbytes;
|
||||
};
|
||||
struct Merge {
|
||||
cobj * mpointer;
|
||||
@ -237,11 +250,15 @@ public:
|
||||
Vector<cobj *> vpointers;
|
||||
Integer buffer_size;
|
||||
Integer type;
|
||||
Integer partial; // partial dirichlet BCs
|
||||
Coordinate dims;
|
||||
};
|
||||
struct Decompress {
|
||||
cobj * kernel_p;
|
||||
cobj * mpi_p;
|
||||
Integer buffer_size;
|
||||
Integer partial; // partial dirichlet BCs
|
||||
Coordinate dims;
|
||||
};
|
||||
struct CopyReceiveBuffer {
|
||||
void * from_p;
|
||||
@ -252,7 +269,8 @@ public:
|
||||
Integer direction;
|
||||
Integer OrthogPlane;
|
||||
Integer DestProc;
|
||||
Integer bytes;
|
||||
Integer xbytes;
|
||||
Integer rbytes;
|
||||
Integer lane;
|
||||
Integer cb;
|
||||
void *recv_buf;
|
||||
@ -277,6 +295,7 @@ public:
|
||||
}
|
||||
|
||||
int face_table_computed;
|
||||
int partialDirichlet;
|
||||
std::vector<commVector<std::pair<int,int> > > face_table ;
|
||||
Vector<int> surface_list;
|
||||
|
||||
@ -365,7 +384,7 @@ public:
|
||||
Packets[i].to_rank,Packets[i].do_send,
|
||||
Packets[i].recv_buf,
|
||||
Packets[i].from_rank,Packets[i].do_recv,
|
||||
Packets[i].bytes,i);
|
||||
Packets[i].xbytes,Packets[i].rbytes,i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -501,7 +520,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
Integer CheckForDuplicate(Integer direction, Integer OrthogPlane, Integer DestProc, void *recv_buf,Integer lane,Integer bytes,Integer cb)
|
||||
Integer CheckForDuplicate(Integer direction, Integer OrthogPlane, Integer DestProc, void *recv_buf,Integer lane,
|
||||
Integer xbytes,Integer rbytes,
|
||||
Integer cb)
|
||||
{
|
||||
CachedTransfer obj;
|
||||
obj.direction = direction;
|
||||
@ -509,19 +530,22 @@ public:
|
||||
obj.DestProc = DestProc;
|
||||
obj.recv_buf = recv_buf;
|
||||
obj.lane = lane;
|
||||
obj.bytes = bytes;
|
||||
obj.xbytes = xbytes;
|
||||
obj.rbytes = rbytes;
|
||||
obj.cb = cb;
|
||||
|
||||
for(int i=0;i<CachedTransfers.size();i++){
|
||||
if ( (CachedTransfers[i].direction ==direction)
|
||||
&&(CachedTransfers[i].OrthogPlane==OrthogPlane)
|
||||
&&(CachedTransfers[i].DestProc ==DestProc)
|
||||
&&(CachedTransfers[i].bytes ==bytes)
|
||||
&&(CachedTransfers[i].xbytes ==xbytes)
|
||||
&&(CachedTransfers[i].rbytes ==rbytes)
|
||||
&&(CachedTransfers[i].lane ==lane)
|
||||
&&(CachedTransfers[i].cb ==cb)
|
||||
){
|
||||
|
||||
AddCopy(CachedTransfers[i].recv_buf,recv_buf,bytes);
|
||||
// FIXME worry about duplicate with partial compression
|
||||
// Wont happen as DWF has no duplicates, but...
|
||||
AddCopy(CachedTransfers[i].recv_buf,recv_buf,rbytes);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -532,7 +556,7 @@ public:
|
||||
void AddPacket(void *xmit,void * rcv,
|
||||
Integer to, Integer do_send,
|
||||
Integer from, Integer do_recv,
|
||||
Integer bytes){
|
||||
Integer xbytes,Integer rbytes){
|
||||
Packet p;
|
||||
p.send_buf = xmit;
|
||||
p.recv_buf = rcv;
|
||||
@ -540,11 +564,16 @@ public:
|
||||
p.from_rank= from;
|
||||
p.do_send = do_send;
|
||||
p.do_recv = do_recv;
|
||||
p.bytes = bytes;
|
||||
p.xbytes = xbytes;
|
||||
p.rbytes = rbytes;
|
||||
// if (do_send) std::cout << GridLogMessage << " MPI packet to "<<to<< " of size "<<xbytes<<std::endl;
|
||||
// if (do_recv) std::cout << GridLogMessage << " MPI packet from "<<from<< " of size "<<xbytes<<std::endl;
|
||||
Packets.push_back(p);
|
||||
}
|
||||
void AddDecompress(cobj *k_p,cobj *m_p,Integer buffer_size,std::vector<Decompress> &dv) {
|
||||
Decompress d;
|
||||
d.partial = this->partialDirichlet;
|
||||
d.dims = _grid->_fdimensions;
|
||||
d.kernel_p = k_p;
|
||||
d.mpi_p = m_p;
|
||||
d.buffer_size = buffer_size;
|
||||
@ -552,6 +581,8 @@ public:
|
||||
}
|
||||
void AddMerge(cobj *merge_p,Vector<cobj *> &rpointers,Integer buffer_size,Integer type,std::vector<Merge> &mv) {
|
||||
Merge m;
|
||||
m.partial = this->partialDirichlet;
|
||||
m.dims = _grid->_fdimensions;
|
||||
m.type = type;
|
||||
m.mpointer = merge_p;
|
||||
m.vpointers= rpointers;
|
||||
@ -571,21 +602,10 @@ public:
|
||||
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd)
|
||||
{
|
||||
for(int i=0;i<mm.size();i++){
|
||||
auto mp = &mm[i].mpointer[0];
|
||||
auto vp0= &mm[i].vpointers[0][0];
|
||||
auto vp1= &mm[i].vpointers[1][0];
|
||||
auto type= mm[i].type;
|
||||
accelerator_forNB(o,mm[i].buffer_size/2,vobj::Nsimd(),{
|
||||
decompress.Exchange(mp,vp0,vp1,type,o);
|
||||
});
|
||||
decompressor::MergeFace(decompress,mm[i]);
|
||||
}
|
||||
|
||||
for(int i=0;i<dd.size();i++){
|
||||
auto kp = dd[i].kernel_p;
|
||||
auto mp = dd[i].mpi_p;
|
||||
accelerator_forNB(o,dd[i].buffer_size,1,{
|
||||
decompress.Decompress(kp,mp,o);
|
||||
});
|
||||
decompressor::DecompressFace(decompress,dd[i]);
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////
|
||||
@ -664,6 +684,10 @@ public:
|
||||
if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0;
|
||||
if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0;
|
||||
}
|
||||
if ( partialDirichlet ) {
|
||||
this->_comms_partial_send[ii] = !this->_comms_send[ii];
|
||||
this->_comms_partial_recv[ii] = !this->_comms_recv[ii];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -691,7 +715,7 @@ public:
|
||||
this->same_node.resize(npoints);
|
||||
|
||||
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
|
||||
|
||||
partialDirichlet = p.partialDirichlet;
|
||||
DirichletBlock(p.dirichlet); // comms send/recv set up
|
||||
|
||||
_unified_buffer_size=0;
|
||||
@ -827,7 +851,7 @@ public:
|
||||
GridBase *grid=_grid;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
int comms_recv = this->_comms_recv[point];
|
||||
int comms_recv = this->_comms_recv[point] || this->_comms_partial_recv[point] ;
|
||||
int fd = _grid->_fdimensions[dimension];
|
||||
int ld = _grid->_ldimensions[dimension];
|
||||
int rd = _grid->_rdimensions[dimension];
|
||||
@ -1014,8 +1038,10 @@ public:
|
||||
{
|
||||
typedef typename cobj::vector_type vector_type;
|
||||
|
||||
int comms_send = this->_comms_send[point] ;
|
||||
int comms_recv = this->_comms_recv[point] ;
|
||||
int comms_send = this->_comms_send[point];
|
||||
int comms_recv = this->_comms_recv[point];
|
||||
int comms_partial_send = this->_comms_partial_send[point] ;
|
||||
int comms_partial_recv = this->_comms_partial_recv[point] ;
|
||||
|
||||
assert(rhs.Grid()==_grid);
|
||||
// conformable(_grid,rhs.Grid());
|
||||
@ -1046,7 +1072,17 @@ public:
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
|
||||
int bytes = words * compress.CommDatumSize();
|
||||
int xbytes;
|
||||
int rbytes;
|
||||
|
||||
if ( comms_send ) xbytes = bytes; // Full send
|
||||
else if ( comms_partial_send ) xbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||
else xbytes = 0; // full dirichlet
|
||||
|
||||
if ( comms_recv ) rbytes = bytes;
|
||||
else if ( comms_partial_recv ) rbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||
else rbytes = 0;
|
||||
|
||||
int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
int comm_off = u_comm_offset;
|
||||
|
||||
@ -1059,49 +1095,47 @@ public:
|
||||
assert (xmit_to_rank != _grid->ThisRank());
|
||||
assert (recv_from_rank != _grid->ThisRank());
|
||||
|
||||
if( comms_send ) {
|
||||
|
||||
if ( !face_table_computed ) {
|
||||
face_table.resize(face_idx+1);
|
||||
std::vector<std::pair<int,int> > face_table_host ;
|
||||
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
|
||||
if ( !face_table_computed ) {
|
||||
face_table.resize(face_idx+1);
|
||||
std::vector<std::pair<int,int> > face_table_host ;
|
||||
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
|
||||
// std::cout << "bytes expect "<< bytes << " " << face_table_host.size()* compress.CommDatumSize()<<std::endl;
|
||||
face_table[face_idx].resize(face_table_host.size());
|
||||
acceleratorCopyToDevice(&face_table_host[0],
|
||||
&face_table[face_idx][0],
|
||||
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||
}
|
||||
|
||||
|
||||
if ( compress.DecompressionStep() ) {
|
||||
recv_buf=u_simd_recv_buf[0];
|
||||
} else {
|
||||
recv_buf=this->u_recv_buf_p;
|
||||
}
|
||||
|
||||
send_buf = this->u_send_buf_p; // Gather locally, must send
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Gather locally
|
||||
////////////////////////////////////////////////////////
|
||||
assert(send_buf!=NULL);
|
||||
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
|
||||
}
|
||||
|
||||
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,bytes,cbmask);
|
||||
if ( (!duplicate) ) { // Force comms for now
|
||||
|
||||
if ( (compress.DecompressionStep()&&comms_recv) || comms_partial_recv ) {
|
||||
recv_buf=u_simd_recv_buf[0];
|
||||
} else {
|
||||
recv_buf=this->u_recv_buf_p;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Gather locally
|
||||
////////////////////////////////////////////////////////
|
||||
send_buf = this->u_send_buf_p; // Gather locally, must send
|
||||
assert(send_buf!=NULL);
|
||||
|
||||
compressor::Gather_plane_simple(face_table[face_idx],rhs,send_buf,compress,comm_off,so,comms_partial_send);
|
||||
|
||||
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,xbytes,rbytes,cbmask);
|
||||
if ( !duplicate ) { // Force comms for now
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Build a list of things to do after we synchronise GPUs
|
||||
// Start comms now???
|
||||
///////////////////////////////////////////////////////////
|
||||
AddPacket((void *)&send_buf[comm_off],
|
||||
(void *)&recv_buf[comm_off],
|
||||
xmit_to_rank, comms_send,
|
||||
recv_from_rank, comms_recv,
|
||||
bytes);
|
||||
xmit_to_rank, comms_send|comms_partial_send,
|
||||
recv_from_rank, comms_recv|comms_partial_recv,
|
||||
xbytes,rbytes);
|
||||
}
|
||||
|
||||
if ( compress.DecompressionStep() && comms_recv ) {
|
||||
|
||||
if ( (compress.DecompressionStep() && comms_recv) || comms_partial_recv ) {
|
||||
AddDecompress(&this->u_recv_buf_p[comm_off],
|
||||
&recv_buf[comm_off],
|
||||
words,Decompressions);
|
||||
@ -1109,7 +1143,6 @@ public:
|
||||
|
||||
u_comm_offset+=words;
|
||||
face_idx++;
|
||||
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -1122,8 +1155,10 @@ public:
|
||||
|
||||
const int maxl =2;// max layout in a direction
|
||||
|
||||
int comms_send = this->_comms_send[point] ;
|
||||
int comms_recv = this->_comms_recv[point] ;
|
||||
int comms_send = this->_comms_send[point];
|
||||
int comms_recv = this->_comms_recv[point];
|
||||
int comms_partial_send = this->_comms_partial_send[point] ;
|
||||
int comms_partial_recv = this->_comms_partial_recv[point] ;
|
||||
|
||||
int fd = _grid->_fdimensions[dimension];
|
||||
int rd = _grid->_rdimensions[dimension];
|
||||
@ -1153,6 +1188,11 @@ public:
|
||||
|
||||
int datum_bytes = compress.CommDatumSize();
|
||||
int bytes = (reduced_buffer_size*datum_bytes)/simd_layout;
|
||||
|
||||
// how many bytes on wire : partial dirichlet or dirichlet may set to < bytes
|
||||
int xbytes;
|
||||
int rbytes;
|
||||
|
||||
assert(bytes*simd_layout == reduced_buffer_size*datum_bytes);
|
||||
|
||||
Vector<cobj *> rpointers(maxl);
|
||||
@ -1182,22 +1222,37 @@ public:
|
||||
if ( !face_table_computed ) {
|
||||
face_table.resize(face_idx+1);
|
||||
std::vector<std::pair<int,int> > face_table_host ;
|
||||
|
||||
|
||||
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
|
||||
face_table[face_idx].resize(face_table_host.size());
|
||||
acceleratorCopyToDevice(&face_table_host[0],
|
||||
&face_table[face_idx][0],
|
||||
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||
|
||||
}
|
||||
|
||||
if ( comms_send || comms_recv ) {
|
||||
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
|
||||
|
||||
if ( comms_send ) xbytes = bytes;
|
||||
else if ( comms_partial_send ) xbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||
else xbytes = 0;
|
||||
|
||||
if ( comms_recv ) rbytes = bytes;
|
||||
else if ( comms_partial_recv ) rbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||
else rbytes = 0;
|
||||
|
||||
// Gathers SIMD lanes for send and merge
|
||||
// Different faces can be full comms or partial comms with multiple ranks per node
|
||||
if ( comms_send || comms_recv||comms_partial_send||comms_partial_recv ) {
|
||||
|
||||
int partial = partialDirichlet;
|
||||
compressor::Gather_plane_exchange(face_table[face_idx],rhs,
|
||||
spointers,dimension,sx,cbmask,
|
||||
compress,permute_type,partial );
|
||||
}
|
||||
face_idx++;
|
||||
|
||||
//spointers[0] -- low
|
||||
//spointers[1] -- high
|
||||
|
||||
//spointers[0] -- low simd coor
|
||||
//spointers[1] -- high simd coor
|
||||
for(int i=0;i<maxl;i++){
|
||||
|
||||
int my_coor = rd*i + x; // self explanatory
|
||||
@ -1222,13 +1277,16 @@ public:
|
||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
rpointers[i] = rp;
|
||||
|
||||
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,bytes,cbmask);
|
||||
|
||||
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,xbytes,rbytes,cbmask);
|
||||
if ( !duplicate ) {
|
||||
if ( (bytes != rbytes) && (rbytes!=0) ){
|
||||
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
|
||||
}
|
||||
AddPacket((void *)sp,(void *)rp,
|
||||
xmit_to_rank,comms_send,
|
||||
recv_from_rank,comms_recv,
|
||||
bytes);
|
||||
xmit_to_rank,comms_send|comms_partial_send,
|
||||
recv_from_rank,comms_recv|comms_partial_recv,
|
||||
xbytes,rbytes);
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -1238,7 +1296,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if ( comms_recv ) {
|
||||
if ( comms_recv|comms_partial_recv ) {
|
||||
AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user