mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Partial dirichlet BCs
This commit is contained in:
parent
0db4f1803f
commit
e2e269e03b
@ -59,6 +59,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
||||||
int off,std::vector<std::pair<int,int> > & table);
|
int off,std::vector<std::pair<int,int> > & table);
|
||||||
|
|
||||||
|
/*
|
||||||
template<class vobj,class cobj,class compressor>
|
template<class vobj,class cobj,class compressor>
|
||||||
void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so) __attribute__((noinline));
|
void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so) __attribute__((noinline));
|
||||||
|
|
||||||
@ -103,6 +104,7 @@ void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,const La
|
|||||||
});
|
});
|
||||||
rhs_v.ViewClose();
|
rhs_v.ViewClose();
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
struct StencilEntry {
|
struct StencilEntry {
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
@ -133,8 +135,18 @@ class CartesianStencilAccelerator {
|
|||||||
int _osites;
|
int _osites;
|
||||||
StencilVector _directions;
|
StencilVector _directions;
|
||||||
StencilVector _distances;
|
StencilVector _distances;
|
||||||
StencilVector _comms_send;
|
///////////////////////////////////////////////////
|
||||||
StencilVector _comms_recv;
|
// If true, this is FULLY communicated per face
|
||||||
|
// Otherwise will either be full or partial dirichlet
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
StencilVector _comms_send;
|
||||||
|
StencilVector _comms_recv; // this is FULLY communicated per face
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// If true, this is partially communicated per face
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
StencilVector _comms_partial_send;
|
||||||
|
StencilVector _comms_partial_recv;
|
||||||
|
//
|
||||||
StencilVector _comm_buf_size;
|
StencilVector _comm_buf_size;
|
||||||
StencilVector _permute_type;
|
StencilVector _permute_type;
|
||||||
StencilVector same_node;
|
StencilVector same_node;
|
||||||
@ -229,7 +241,8 @@ public:
|
|||||||
Integer from_rank;
|
Integer from_rank;
|
||||||
Integer do_send;
|
Integer do_send;
|
||||||
Integer do_recv;
|
Integer do_recv;
|
||||||
Integer bytes;
|
Integer xbytes;
|
||||||
|
Integer rbytes;
|
||||||
};
|
};
|
||||||
struct Merge {
|
struct Merge {
|
||||||
cobj * mpointer;
|
cobj * mpointer;
|
||||||
@ -237,11 +250,15 @@ public:
|
|||||||
Vector<cobj *> vpointers;
|
Vector<cobj *> vpointers;
|
||||||
Integer buffer_size;
|
Integer buffer_size;
|
||||||
Integer type;
|
Integer type;
|
||||||
|
Integer partial; // partial dirichlet BCs
|
||||||
|
Coordinate dims;
|
||||||
};
|
};
|
||||||
struct Decompress {
|
struct Decompress {
|
||||||
cobj * kernel_p;
|
cobj * kernel_p;
|
||||||
cobj * mpi_p;
|
cobj * mpi_p;
|
||||||
Integer buffer_size;
|
Integer buffer_size;
|
||||||
|
Integer partial; // partial dirichlet BCs
|
||||||
|
Coordinate dims;
|
||||||
};
|
};
|
||||||
struct CopyReceiveBuffer {
|
struct CopyReceiveBuffer {
|
||||||
void * from_p;
|
void * from_p;
|
||||||
@ -252,7 +269,8 @@ public:
|
|||||||
Integer direction;
|
Integer direction;
|
||||||
Integer OrthogPlane;
|
Integer OrthogPlane;
|
||||||
Integer DestProc;
|
Integer DestProc;
|
||||||
Integer bytes;
|
Integer xbytes;
|
||||||
|
Integer rbytes;
|
||||||
Integer lane;
|
Integer lane;
|
||||||
Integer cb;
|
Integer cb;
|
||||||
void *recv_buf;
|
void *recv_buf;
|
||||||
@ -277,6 +295,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
int face_table_computed;
|
int face_table_computed;
|
||||||
|
int partialDirichlet;
|
||||||
std::vector<commVector<std::pair<int,int> > > face_table ;
|
std::vector<commVector<std::pair<int,int> > > face_table ;
|
||||||
Vector<int> surface_list;
|
Vector<int> surface_list;
|
||||||
|
|
||||||
@ -365,7 +384,7 @@ public:
|
|||||||
Packets[i].to_rank,Packets[i].do_send,
|
Packets[i].to_rank,Packets[i].do_send,
|
||||||
Packets[i].recv_buf,
|
Packets[i].recv_buf,
|
||||||
Packets[i].from_rank,Packets[i].do_recv,
|
Packets[i].from_rank,Packets[i].do_recv,
|
||||||
Packets[i].bytes,i);
|
Packets[i].xbytes,Packets[i].rbytes,i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -501,7 +520,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Integer CheckForDuplicate(Integer direction, Integer OrthogPlane, Integer DestProc, void *recv_buf,Integer lane,Integer bytes,Integer cb)
|
Integer CheckForDuplicate(Integer direction, Integer OrthogPlane, Integer DestProc, void *recv_buf,Integer lane,
|
||||||
|
Integer xbytes,Integer rbytes,
|
||||||
|
Integer cb)
|
||||||
{
|
{
|
||||||
CachedTransfer obj;
|
CachedTransfer obj;
|
||||||
obj.direction = direction;
|
obj.direction = direction;
|
||||||
@ -509,19 +530,22 @@ public:
|
|||||||
obj.DestProc = DestProc;
|
obj.DestProc = DestProc;
|
||||||
obj.recv_buf = recv_buf;
|
obj.recv_buf = recv_buf;
|
||||||
obj.lane = lane;
|
obj.lane = lane;
|
||||||
obj.bytes = bytes;
|
obj.xbytes = xbytes;
|
||||||
|
obj.rbytes = rbytes;
|
||||||
obj.cb = cb;
|
obj.cb = cb;
|
||||||
|
|
||||||
for(int i=0;i<CachedTransfers.size();i++){
|
for(int i=0;i<CachedTransfers.size();i++){
|
||||||
if ( (CachedTransfers[i].direction ==direction)
|
if ( (CachedTransfers[i].direction ==direction)
|
||||||
&&(CachedTransfers[i].OrthogPlane==OrthogPlane)
|
&&(CachedTransfers[i].OrthogPlane==OrthogPlane)
|
||||||
&&(CachedTransfers[i].DestProc ==DestProc)
|
&&(CachedTransfers[i].DestProc ==DestProc)
|
||||||
&&(CachedTransfers[i].bytes ==bytes)
|
&&(CachedTransfers[i].xbytes ==xbytes)
|
||||||
|
&&(CachedTransfers[i].rbytes ==rbytes)
|
||||||
&&(CachedTransfers[i].lane ==lane)
|
&&(CachedTransfers[i].lane ==lane)
|
||||||
&&(CachedTransfers[i].cb ==cb)
|
&&(CachedTransfers[i].cb ==cb)
|
||||||
){
|
){
|
||||||
|
// FIXME worry about duplicate with partial compression
|
||||||
AddCopy(CachedTransfers[i].recv_buf,recv_buf,bytes);
|
// Wont happen as DWF has no duplicates, but...
|
||||||
|
AddCopy(CachedTransfers[i].recv_buf,recv_buf,rbytes);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -532,7 +556,7 @@ public:
|
|||||||
void AddPacket(void *xmit,void * rcv,
|
void AddPacket(void *xmit,void * rcv,
|
||||||
Integer to, Integer do_send,
|
Integer to, Integer do_send,
|
||||||
Integer from, Integer do_recv,
|
Integer from, Integer do_recv,
|
||||||
Integer bytes){
|
Integer xbytes,Integer rbytes){
|
||||||
Packet p;
|
Packet p;
|
||||||
p.send_buf = xmit;
|
p.send_buf = xmit;
|
||||||
p.recv_buf = rcv;
|
p.recv_buf = rcv;
|
||||||
@ -540,11 +564,16 @@ public:
|
|||||||
p.from_rank= from;
|
p.from_rank= from;
|
||||||
p.do_send = do_send;
|
p.do_send = do_send;
|
||||||
p.do_recv = do_recv;
|
p.do_recv = do_recv;
|
||||||
p.bytes = bytes;
|
p.xbytes = xbytes;
|
||||||
|
p.rbytes = rbytes;
|
||||||
|
// if (do_send) std::cout << GridLogMessage << " MPI packet to "<<to<< " of size "<<xbytes<<std::endl;
|
||||||
|
// if (do_recv) std::cout << GridLogMessage << " MPI packet from "<<from<< " of size "<<xbytes<<std::endl;
|
||||||
Packets.push_back(p);
|
Packets.push_back(p);
|
||||||
}
|
}
|
||||||
void AddDecompress(cobj *k_p,cobj *m_p,Integer buffer_size,std::vector<Decompress> &dv) {
|
void AddDecompress(cobj *k_p,cobj *m_p,Integer buffer_size,std::vector<Decompress> &dv) {
|
||||||
Decompress d;
|
Decompress d;
|
||||||
|
d.partial = this->partialDirichlet;
|
||||||
|
d.dims = _grid->_fdimensions;
|
||||||
d.kernel_p = k_p;
|
d.kernel_p = k_p;
|
||||||
d.mpi_p = m_p;
|
d.mpi_p = m_p;
|
||||||
d.buffer_size = buffer_size;
|
d.buffer_size = buffer_size;
|
||||||
@ -552,6 +581,8 @@ public:
|
|||||||
}
|
}
|
||||||
void AddMerge(cobj *merge_p,Vector<cobj *> &rpointers,Integer buffer_size,Integer type,std::vector<Merge> &mv) {
|
void AddMerge(cobj *merge_p,Vector<cobj *> &rpointers,Integer buffer_size,Integer type,std::vector<Merge> &mv) {
|
||||||
Merge m;
|
Merge m;
|
||||||
|
m.partial = this->partialDirichlet;
|
||||||
|
m.dims = _grid->_fdimensions;
|
||||||
m.type = type;
|
m.type = type;
|
||||||
m.mpointer = merge_p;
|
m.mpointer = merge_p;
|
||||||
m.vpointers= rpointers;
|
m.vpointers= rpointers;
|
||||||
@ -571,21 +602,10 @@ public:
|
|||||||
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd)
|
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd)
|
||||||
{
|
{
|
||||||
for(int i=0;i<mm.size();i++){
|
for(int i=0;i<mm.size();i++){
|
||||||
auto mp = &mm[i].mpointer[0];
|
decompressor::MergeFace(decompress,mm[i]);
|
||||||
auto vp0= &mm[i].vpointers[0][0];
|
|
||||||
auto vp1= &mm[i].vpointers[1][0];
|
|
||||||
auto type= mm[i].type;
|
|
||||||
accelerator_forNB(o,mm[i].buffer_size/2,vobj::Nsimd(),{
|
|
||||||
decompress.Exchange(mp,vp0,vp1,type,o);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i=0;i<dd.size();i++){
|
for(int i=0;i<dd.size();i++){
|
||||||
auto kp = dd[i].kernel_p;
|
decompressor::DecompressFace(decompress,dd[i]);
|
||||||
auto mp = dd[i].mpi_p;
|
|
||||||
accelerator_forNB(o,dd[i].buffer_size,1,{
|
|
||||||
decompress.Decompress(kp,mp,o);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
@ -664,6 +684,10 @@ public:
|
|||||||
if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0;
|
if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0;
|
||||||
if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0;
|
if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0;
|
||||||
}
|
}
|
||||||
|
if ( partialDirichlet ) {
|
||||||
|
this->_comms_partial_send[ii] = !this->_comms_send[ii];
|
||||||
|
this->_comms_partial_recv[ii] = !this->_comms_recv[ii];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -691,7 +715,7 @@ public:
|
|||||||
this->same_node.resize(npoints);
|
this->same_node.resize(npoints);
|
||||||
|
|
||||||
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
|
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
|
||||||
|
partialDirichlet = p.partialDirichlet;
|
||||||
DirichletBlock(p.dirichlet); // comms send/recv set up
|
DirichletBlock(p.dirichlet); // comms send/recv set up
|
||||||
|
|
||||||
_unified_buffer_size=0;
|
_unified_buffer_size=0;
|
||||||
@ -827,7 +851,7 @@ public:
|
|||||||
GridBase *grid=_grid;
|
GridBase *grid=_grid;
|
||||||
const int Nsimd = grid->Nsimd();
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
int comms_recv = this->_comms_recv[point];
|
int comms_recv = this->_comms_recv[point] || this->_comms_partial_recv[point] ;
|
||||||
int fd = _grid->_fdimensions[dimension];
|
int fd = _grid->_fdimensions[dimension];
|
||||||
int ld = _grid->_ldimensions[dimension];
|
int ld = _grid->_ldimensions[dimension];
|
||||||
int rd = _grid->_rdimensions[dimension];
|
int rd = _grid->_rdimensions[dimension];
|
||||||
@ -1014,8 +1038,10 @@ public:
|
|||||||
{
|
{
|
||||||
typedef typename cobj::vector_type vector_type;
|
typedef typename cobj::vector_type vector_type;
|
||||||
|
|
||||||
int comms_send = this->_comms_send[point] ;
|
int comms_send = this->_comms_send[point];
|
||||||
int comms_recv = this->_comms_recv[point] ;
|
int comms_recv = this->_comms_recv[point];
|
||||||
|
int comms_partial_send = this->_comms_partial_send[point] ;
|
||||||
|
int comms_partial_recv = this->_comms_partial_recv[point] ;
|
||||||
|
|
||||||
assert(rhs.Grid()==_grid);
|
assert(rhs.Grid()==_grid);
|
||||||
// conformable(_grid,rhs.Grid());
|
// conformable(_grid,rhs.Grid());
|
||||||
@ -1046,7 +1072,17 @@ public:
|
|||||||
if (cbmask != 0x3) words=words>>1;
|
if (cbmask != 0x3) words=words>>1;
|
||||||
|
|
||||||
int bytes = words * compress.CommDatumSize();
|
int bytes = words * compress.CommDatumSize();
|
||||||
|
int xbytes;
|
||||||
|
int rbytes;
|
||||||
|
|
||||||
|
if ( comms_send ) xbytes = bytes; // Full send
|
||||||
|
else if ( comms_partial_send ) xbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||||
|
else xbytes = 0; // full dirichlet
|
||||||
|
|
||||||
|
if ( comms_recv ) rbytes = bytes;
|
||||||
|
else if ( comms_partial_recv ) rbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||||
|
else rbytes = 0;
|
||||||
|
|
||||||
int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||||
int comm_off = u_comm_offset;
|
int comm_off = u_comm_offset;
|
||||||
|
|
||||||
@ -1059,49 +1095,47 @@ public:
|
|||||||
assert (xmit_to_rank != _grid->ThisRank());
|
assert (xmit_to_rank != _grid->ThisRank());
|
||||||
assert (recv_from_rank != _grid->ThisRank());
|
assert (recv_from_rank != _grid->ThisRank());
|
||||||
|
|
||||||
if( comms_send ) {
|
if ( !face_table_computed ) {
|
||||||
|
face_table.resize(face_idx+1);
|
||||||
if ( !face_table_computed ) {
|
std::vector<std::pair<int,int> > face_table_host ;
|
||||||
face_table.resize(face_idx+1);
|
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
|
||||||
std::vector<std::pair<int,int> > face_table_host ;
|
// std::cout << "bytes expect "<< bytes << " " << face_table_host.size()* compress.CommDatumSize()<<std::endl;
|
||||||
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
|
|
||||||
face_table[face_idx].resize(face_table_host.size());
|
face_table[face_idx].resize(face_table_host.size());
|
||||||
acceleratorCopyToDevice(&face_table_host[0],
|
acceleratorCopyToDevice(&face_table_host[0],
|
||||||
&face_table[face_idx][0],
|
&face_table[face_idx][0],
|
||||||
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if ( compress.DecompressionStep() ) {
|
|
||||||
recv_buf=u_simd_recv_buf[0];
|
|
||||||
} else {
|
|
||||||
recv_buf=this->u_recv_buf_p;
|
|
||||||
}
|
|
||||||
|
|
||||||
send_buf = this->u_send_buf_p; // Gather locally, must send
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
// Gather locally
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
assert(send_buf!=NULL);
|
|
||||||
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,bytes,cbmask);
|
|
||||||
if ( (!duplicate) ) { // Force comms for now
|
|
||||||
|
|
||||||
|
if ( (compress.DecompressionStep()&&comms_recv) || comms_partial_recv ) {
|
||||||
|
recv_buf=u_simd_recv_buf[0];
|
||||||
|
} else {
|
||||||
|
recv_buf=this->u_recv_buf_p;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Gather locally
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
send_buf = this->u_send_buf_p; // Gather locally, must send
|
||||||
|
assert(send_buf!=NULL);
|
||||||
|
|
||||||
|
compressor::Gather_plane_simple(face_table[face_idx],rhs,send_buf,compress,comm_off,so,comms_partial_send);
|
||||||
|
|
||||||
|
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,xbytes,rbytes,cbmask);
|
||||||
|
if ( !duplicate ) { // Force comms for now
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// Build a list of things to do after we synchronise GPUs
|
// Build a list of things to do after we synchronise GPUs
|
||||||
// Start comms now???
|
// Start comms now???
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
AddPacket((void *)&send_buf[comm_off],
|
AddPacket((void *)&send_buf[comm_off],
|
||||||
(void *)&recv_buf[comm_off],
|
(void *)&recv_buf[comm_off],
|
||||||
xmit_to_rank, comms_send,
|
xmit_to_rank, comms_send|comms_partial_send,
|
||||||
recv_from_rank, comms_recv,
|
recv_from_rank, comms_recv|comms_partial_recv,
|
||||||
bytes);
|
xbytes,rbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( compress.DecompressionStep() && comms_recv ) {
|
if ( (compress.DecompressionStep() && comms_recv) || comms_partial_recv ) {
|
||||||
AddDecompress(&this->u_recv_buf_p[comm_off],
|
AddDecompress(&this->u_recv_buf_p[comm_off],
|
||||||
&recv_buf[comm_off],
|
&recv_buf[comm_off],
|
||||||
words,Decompressions);
|
words,Decompressions);
|
||||||
@ -1109,7 +1143,6 @@ public:
|
|||||||
|
|
||||||
u_comm_offset+=words;
|
u_comm_offset+=words;
|
||||||
face_idx++;
|
face_idx++;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@ -1122,8 +1155,10 @@ public:
|
|||||||
|
|
||||||
const int maxl =2;// max layout in a direction
|
const int maxl =2;// max layout in a direction
|
||||||
|
|
||||||
int comms_send = this->_comms_send[point] ;
|
int comms_send = this->_comms_send[point];
|
||||||
int comms_recv = this->_comms_recv[point] ;
|
int comms_recv = this->_comms_recv[point];
|
||||||
|
int comms_partial_send = this->_comms_partial_send[point] ;
|
||||||
|
int comms_partial_recv = this->_comms_partial_recv[point] ;
|
||||||
|
|
||||||
int fd = _grid->_fdimensions[dimension];
|
int fd = _grid->_fdimensions[dimension];
|
||||||
int rd = _grid->_rdimensions[dimension];
|
int rd = _grid->_rdimensions[dimension];
|
||||||
@ -1153,6 +1188,11 @@ public:
|
|||||||
|
|
||||||
int datum_bytes = compress.CommDatumSize();
|
int datum_bytes = compress.CommDatumSize();
|
||||||
int bytes = (reduced_buffer_size*datum_bytes)/simd_layout;
|
int bytes = (reduced_buffer_size*datum_bytes)/simd_layout;
|
||||||
|
|
||||||
|
// how many bytes on wire : partial dirichlet or dirichlet may set to < bytes
|
||||||
|
int xbytes;
|
||||||
|
int rbytes;
|
||||||
|
|
||||||
assert(bytes*simd_layout == reduced_buffer_size*datum_bytes);
|
assert(bytes*simd_layout == reduced_buffer_size*datum_bytes);
|
||||||
|
|
||||||
Vector<cobj *> rpointers(maxl);
|
Vector<cobj *> rpointers(maxl);
|
||||||
@ -1182,22 +1222,37 @@ public:
|
|||||||
if ( !face_table_computed ) {
|
if ( !face_table_computed ) {
|
||||||
face_table.resize(face_idx+1);
|
face_table.resize(face_idx+1);
|
||||||
std::vector<std::pair<int,int> > face_table_host ;
|
std::vector<std::pair<int,int> > face_table_host ;
|
||||||
|
|
||||||
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
|
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
|
||||||
face_table[face_idx].resize(face_table_host.size());
|
face_table[face_idx].resize(face_table_host.size());
|
||||||
acceleratorCopyToDevice(&face_table_host[0],
|
acceleratorCopyToDevice(&face_table_host[0],
|
||||||
&face_table[face_idx][0],
|
&face_table[face_idx][0],
|
||||||
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( comms_send || comms_recv ) {
|
|
||||||
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
|
if ( comms_send ) xbytes = bytes;
|
||||||
|
else if ( comms_partial_send ) xbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||||
|
else xbytes = 0;
|
||||||
|
|
||||||
|
if ( comms_recv ) rbytes = bytes;
|
||||||
|
else if ( comms_partial_recv ) rbytes = bytes/compressor::PartialCompressionFactor(_grid);
|
||||||
|
else rbytes = 0;
|
||||||
|
|
||||||
|
// Gathers SIMD lanes for send and merge
|
||||||
|
// Different faces can be full comms or partial comms with multiple ranks per node
|
||||||
|
if ( comms_send || comms_recv||comms_partial_send||comms_partial_recv ) {
|
||||||
|
|
||||||
|
int partial = partialDirichlet;
|
||||||
|
compressor::Gather_plane_exchange(face_table[face_idx],rhs,
|
||||||
|
spointers,dimension,sx,cbmask,
|
||||||
|
compress,permute_type,partial );
|
||||||
}
|
}
|
||||||
face_idx++;
|
face_idx++;
|
||||||
|
|
||||||
//spointers[0] -- low
|
//spointers[0] -- low simd coor
|
||||||
//spointers[1] -- high
|
//spointers[1] -- high simd coor
|
||||||
|
|
||||||
for(int i=0;i<maxl;i++){
|
for(int i=0;i<maxl;i++){
|
||||||
|
|
||||||
int my_coor = rd*i + x; // self explanatory
|
int my_coor = rd*i + x; // self explanatory
|
||||||
@ -1222,13 +1277,16 @@ public:
|
|||||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
rpointers[i] = rp;
|
rpointers[i] = rp;
|
||||||
|
|
||||||
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,bytes,cbmask);
|
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,xbytes,rbytes,cbmask);
|
||||||
if ( !duplicate ) {
|
if ( !duplicate ) {
|
||||||
|
if ( (bytes != rbytes) && (rbytes!=0) ){
|
||||||
|
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
|
||||||
|
}
|
||||||
AddPacket((void *)sp,(void *)rp,
|
AddPacket((void *)sp,(void *)rp,
|
||||||
xmit_to_rank,comms_send,
|
xmit_to_rank,comms_send|comms_partial_send,
|
||||||
recv_from_rank,comms_recv,
|
recv_from_rank,comms_recv|comms_partial_recv,
|
||||||
bytes);
|
xbytes,rbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -1238,7 +1296,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( comms_recv ) {
|
if ( comms_recv|comms_partial_recv ) {
|
||||||
AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers);
|
AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user