mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 09:45:36 +00:00
dc814f30da
Number of IO MPI tasks can be varied by selecting which dimensions use parallel IO and which dimensions use Serial send to boss I/O. Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes doing the I/O. Interpolates nicely between ALL nodes write their data, a single boss per time-plane in processor space [old UKQCD fortran code did this], and a single node doing all I/O. Not sure I have the transfer sizes big enough and am not overly convinced fstream is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero. Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations on my MacOS + OpenMPI and Clang environment. It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from each node in order to gather bigger chunks at the syscall level. That would push us up to the circa 8x 18*4*8 == 4KB size write chunk, and by taking, say, x/y non parallel we get to 16MB contiguous chunks written in multi 4KB transactions per IOnode in 64^3 lattices for configuration I/O. I suspect this is fine for system performance.
90 lines
1.9 KiB
C++
90 lines
1.9 KiB
C++
#include "Grid.h"
|
|
namespace Grid {
|
|
|
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|
{
|
|
_processors = processors;
|
|
_ndimension = processors.size();
|
|
_processor_coor.resize(_ndimension);
|
|
|
|
// Require 1^N processor grid for fake
|
|
_Nprocessors=1;
|
|
_processor = 0;
|
|
for(int d=0;d<_ndimension;d++) {
|
|
assert(_processors[d]==1);
|
|
_processor_coor[d] = 0;
|
|
}
|
|
}
|
|
|
|
void CartesianCommunicator::GlobalSum(float &){}
|
|
void CartesianCommunicator::GlobalSumVector(float *,int N){}
|
|
void CartesianCommunicator::GlobalSum(double &){}
|
|
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
|
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
|
|
|
void CartesianCommunicator::RecvFrom(void *recv,
|
|
int recv_from_rank,
|
|
int bytes)
|
|
{
|
|
assert(0);
|
|
}
|
|
void CartesianCommunicator::SendTo(void *xmit,
|
|
int xmit_to_rank,
|
|
int bytes)
|
|
{
|
|
assert(0);
|
|
}
|
|
|
|
|
|
// Basic Halo comms primitive -- should never call in single node
|
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
|
int dest,
|
|
void *recv,
|
|
int from,
|
|
int bytes)
|
|
{
|
|
assert(0);
|
|
}
|
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
|
void *xmit,
|
|
int dest,
|
|
void *recv,
|
|
int from,
|
|
int bytes)
|
|
{
|
|
assert(0);
|
|
}
|
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
|
{
|
|
assert(0);
|
|
}
|
|
|
|
void CartesianCommunicator::Barrier(void)
|
|
{
|
|
}
|
|
|
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
|
{
|
|
}
|
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
|
{
|
|
}
|
|
|
|
|
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
|
{
|
|
source =0;
|
|
dest=0;
|
|
}
|
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
|
{
|
|
return 0;
|
|
}
|
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
|
{
|
|
}
|
|
|
|
|
|
}
|
|
|