mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'develop' into feature/hadrons
This commit is contained in:
commit
41d6cab033
@ -226,6 +226,48 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
};
|
};
|
||||||
#endif // MMAP
|
#endif // MMAP
|
||||||
|
|
||||||
|
#ifdef GRID_MPI3_SHM_NONE
|
||||||
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
|
{
|
||||||
|
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
|
||||||
|
assert(_ShmSetup==1);
|
||||||
|
assert(_ShmAlloc==0);
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// allocate the shared windows for our group
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Barrier(WorldShmComm);
|
||||||
|
WorldShmCommBufs.resize(WorldShmSize);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Hugetlbf and others map filesystems as mappable huge pages
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
char shm_name [NAME_MAX];
|
||||||
|
assert(WorldShmSize == 1);
|
||||||
|
for(int r=0;r<WorldShmSize;r++){
|
||||||
|
|
||||||
|
int fd=-1;
|
||||||
|
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
|
||||||
|
#ifdef MAP_POPULATE
|
||||||
|
mmap_flag|=MAP_POPULATE;
|
||||||
|
#endif
|
||||||
|
#ifdef MAP_HUGETLB
|
||||||
|
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||||
|
#endif
|
||||||
|
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||||
|
if ( ptr == (void *)MAP_FAILED ) {
|
||||||
|
printf("mmap %s failed\n",shm_name);
|
||||||
|
perror("failed mmap"); assert(0);
|
||||||
|
}
|
||||||
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
|
close(fd);
|
||||||
|
WorldShmCommBufs[r] =ptr;
|
||||||
|
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||||
|
}
|
||||||
|
_ShmAlloc=1;
|
||||||
|
_ShmAllocBytes = bytes;
|
||||||
|
};
|
||||||
|
#endif // MMAP
|
||||||
|
|
||||||
#ifdef GRID_MPI3_SHMOPEN
|
#ifdef GRID_MPI3_SHMOPEN
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
|
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
|
||||||
@ -246,7 +288,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
|
|
||||||
size_t size = bytes;
|
size_t size = bytes;
|
||||||
|
|
||||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
sprintf(shm_name,"/myGrid_mpi3_shm_%d_%d",WorldNode,r);
|
||||||
|
|
||||||
shm_unlink(shm_name);
|
shm_unlink(shm_name);
|
||||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||||
|
@ -91,7 +91,7 @@ class BinaryIO {
|
|||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
GridBase *grid = lat._grid;
|
GridBase *grid = lat._grid;
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
std::vector<sobj> scalardata(lsites);
|
std::vector<sobj> scalardata(lsites);
|
||||||
unvectorizeToLexOrdArray(scalardata,lat);
|
unvectorizeToLexOrdArray(scalardata,lat);
|
||||||
@ -160,7 +160,9 @@ class BinaryIO {
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Scidac csum is rather more heavyweight
|
* Scidac csum is rather more heavyweight
|
||||||
|
* FIXME -- 128^3 x 256 x 16 will overflow.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int global_site;
|
int global_site;
|
||||||
|
|
||||||
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
||||||
@ -261,7 +263,7 @@ class BinaryIO {
|
|||||||
GridBase *grid,
|
GridBase *grid,
|
||||||
std::vector<fobj> &iodata,
|
std::vector<fobj> &iodata,
|
||||||
std::string file,
|
std::string file,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
const std::string &format, int control,
|
const std::string &format, int control,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
@ -523,7 +525,7 @@ class BinaryIO {
|
|||||||
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
||||||
std::string file,
|
std::string file,
|
||||||
munger munge,
|
munger munge,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
const std::string &format,
|
const std::string &format,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
@ -533,7 +535,7 @@ class BinaryIO {
|
|||||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||||
|
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
std::vector<sobj> scalardata(lsites);
|
std::vector<sobj> scalardata(lsites);
|
||||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||||
@ -544,7 +546,7 @@ class BinaryIO {
|
|||||||
GridStopWatch timer;
|
GridStopWatch timer;
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||||
|
|
||||||
vectorizeFromLexOrdArray(scalardata,Umu);
|
vectorizeFromLexOrdArray(scalardata,Umu);
|
||||||
grid->Barrier();
|
grid->Barrier();
|
||||||
@ -560,7 +562,7 @@ class BinaryIO {
|
|||||||
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
||||||
std::string file,
|
std::string file,
|
||||||
munger munge,
|
munger munge,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
const std::string &format,
|
const std::string &format,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
@ -569,7 +571,7 @@ class BinaryIO {
|
|||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
std::vector<sobj> scalardata(lsites);
|
std::vector<sobj> scalardata(lsites);
|
||||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||||
@ -580,7 +582,7 @@ class BinaryIO {
|
|||||||
GridStopWatch timer; timer.Start();
|
GridStopWatch timer; timer.Start();
|
||||||
unvectorizeToLexOrdArray(scalardata,Umu);
|
unvectorizeToLexOrdArray(scalardata,Umu);
|
||||||
|
|
||||||
parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||||
|
|
||||||
grid->Barrier();
|
grid->Barrier();
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
@ -597,7 +599,7 @@ class BinaryIO {
|
|||||||
static inline void readRNG(GridSerialRNG &serial,
|
static inline void readRNG(GridSerialRNG &serial,
|
||||||
GridParallelRNG ¶llel,
|
GridParallelRNG ¶llel,
|
||||||
std::string file,
|
std::string file,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
uint32_t &scidac_csumb)
|
uint32_t &scidac_csumb)
|
||||||
@ -610,8 +612,8 @@ class BinaryIO {
|
|||||||
std::string format = "IEEE32BIG";
|
std::string format = "IEEE32BIG";
|
||||||
|
|
||||||
GridBase *grid = parallel._grid;
|
GridBase *grid = parallel._grid;
|
||||||
int gsites = grid->gSites();
|
uint64_t gsites = grid->gSites();
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
uint32_t nersc_csum_tmp = 0;
|
uint32_t nersc_csum_tmp = 0;
|
||||||
uint32_t scidac_csuma_tmp = 0;
|
uint32_t scidac_csuma_tmp = 0;
|
||||||
@ -626,7 +628,7 @@ class BinaryIO {
|
|||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
|
|
||||||
timer.Start();
|
timer.Start();
|
||||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||||
std::vector<RngStateType> tmp(RngStateCount);
|
std::vector<RngStateType> tmp(RngStateCount);
|
||||||
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
||||||
parallel.SetState(tmp,lidx);
|
parallel.SetState(tmp,lidx);
|
||||||
@ -659,7 +661,7 @@ class BinaryIO {
|
|||||||
static inline void writeRNG(GridSerialRNG &serial,
|
static inline void writeRNG(GridSerialRNG &serial,
|
||||||
GridParallelRNG ¶llel,
|
GridParallelRNG ¶llel,
|
||||||
std::string file,
|
std::string file,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
uint32_t &scidac_csumb)
|
uint32_t &scidac_csumb)
|
||||||
@ -670,8 +672,8 @@ class BinaryIO {
|
|||||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||||
|
|
||||||
GridBase *grid = parallel._grid;
|
GridBase *grid = parallel._grid;
|
||||||
int gsites = grid->gSites();
|
uint64_t gsites = grid->gSites();
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
uint32_t nersc_csum_tmp;
|
uint32_t nersc_csum_tmp;
|
||||||
uint32_t scidac_csuma_tmp;
|
uint32_t scidac_csuma_tmp;
|
||||||
@ -684,7 +686,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Start();
|
timer.Start();
|
||||||
std::vector<RNGstate> iodata(lsites);
|
std::vector<RNGstate> iodata(lsites);
|
||||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||||
std::vector<RngStateType> tmp(RngStateCount);
|
std::vector<RngStateType> tmp(RngStateCount);
|
||||||
parallel.GetState(tmp,lidx);
|
parallel.GetState(tmp,lidx);
|
||||||
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
||||||
|
@ -337,6 +337,20 @@ class GridLimeWriter : public BinaryIO {
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||||
{
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
||||||
|
// the same file through different file handles (integer units).
|
||||||
|
//
|
||||||
|
// These are both buffered, so why I think this code is right is as follows.
|
||||||
|
//
|
||||||
|
// i) write record header to FILE *File, telegraphing the size; flush
|
||||||
|
// ii) ftello reads the offset from FILE *File .
|
||||||
|
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
||||||
|
// Closes iostream and flushes.
|
||||||
|
// iv) fseek on FILE * to end of this disjoint section.
|
||||||
|
// v) Continue writing scidac record.
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Create record header
|
// Create record header
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
@ -350,25 +364,24 @@ class GridLimeWriter : public BinaryIO {
|
|||||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
fflush(File);
|
||||||
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
|
||||||
// the same file through different file handles (integer units).
|
///////////////////////////////////////////
|
||||||
//
|
// Write by other means into the binary record
|
||||||
// These are both buffered, so why I think this code is right is as follows.
|
///////////////////////////////////////////
|
||||||
//
|
uint64_t offset1 = ftello(File); // std::cout << " Writing to offset "<<offset1 << std::endl;
|
||||||
// i) write record header to FILE *File, telegraphing the size.
|
|
||||||
// ii) ftello reads the offset from FILE *File .
|
|
||||||
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
|
||||||
// Closes iostream and flushes.
|
|
||||||
// iv) fseek on FILE * to end of this disjoint section.
|
|
||||||
// v) Continue writing scidac record.
|
|
||||||
////////////////////////////////////////////////////////////////////
|
|
||||||
uint64_t offset = ftello(File);
|
|
||||||
// std::cout << " Writing to offset "<<offset << std::endl;
|
|
||||||
std::string format = getFormatString<vobj>();
|
std::string format = getFormatString<vobj>();
|
||||||
BinarySimpleMunger<sobj,sobj> munge;
|
BinarySimpleMunger<sobj,sobj> munge;
|
||||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// Wind forward and close the record
|
||||||
|
///////////////////////////////////////////
|
||||||
|
fseek(File,0,SEEK_END);
|
||||||
|
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
|
||||||
|
|
||||||
|
assert((offset2-offset1) == PayloadSize);
|
||||||
|
|
||||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||||
|
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
|
@ -57,7 +57,7 @@ namespace Grid {
|
|||||||
// for the header-reader
|
// for the header-reader
|
||||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||||
{
|
{
|
||||||
int offset=0;
|
uint64_t offset=0;
|
||||||
std::map<std::string,std::string> header;
|
std::map<std::string,std::string> header;
|
||||||
std::string line;
|
std::string line;
|
||||||
|
|
||||||
@ -139,7 +139,7 @@ namespace Grid {
|
|||||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||||
|
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
int offset = readHeader(file,Umu._grid,header);
|
uint64_t offset = readHeader(file,Umu._grid,header);
|
||||||
|
|
||||||
FieldMetaData clone(header);
|
FieldMetaData clone(header);
|
||||||
|
|
||||||
@ -236,7 +236,7 @@ namespace Grid {
|
|||||||
GaugeStatistics(Umu,header);
|
GaugeStatistics(Umu,header);
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
|
|
||||||
int offset;
|
uint64_t offset;
|
||||||
|
|
||||||
truncate(file);
|
truncate(file);
|
||||||
|
|
||||||
@ -278,7 +278,7 @@ namespace Grid {
|
|||||||
header.plaquette=0.0;
|
header.plaquette=0.0;
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
|
|
||||||
int offset;
|
uint64_t offset;
|
||||||
|
|
||||||
#ifdef RNG_RANLUX
|
#ifdef RNG_RANLUX
|
||||||
header.floating_point = std::string("UINT64");
|
header.floating_point = std::string("UINT64");
|
||||||
@ -313,7 +313,7 @@ namespace Grid {
|
|||||||
|
|
||||||
GridBase *grid = parallel._grid;
|
GridBase *grid = parallel._grid;
|
||||||
|
|
||||||
int offset = readHeader(file,grid,header);
|
uint64_t offset = readHeader(file,grid,header);
|
||||||
|
|
||||||
FieldMetaData clone(header);
|
FieldMetaData clone(header);
|
||||||
|
|
||||||
|
@ -180,7 +180,6 @@ int main (int argc, char ** argv) {
|
|||||||
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
|
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
|
||||||
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
|
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
|
||||||
GridRedBlackCartesian * CoarseGrid5rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5);
|
|
||||||
|
|
||||||
// Gauge field
|
// Gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
@ -206,7 +205,7 @@ int main (int argc, char ** argv) {
|
|||||||
|
|
||||||
const int nbasis= 60;
|
const int nbasis= 60;
|
||||||
assert(nbasis==Ns1);
|
assert(nbasis==Ns1);
|
||||||
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd);
|
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
|
||||||
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
|
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
|
||||||
|
|
||||||
assert( (Params.doFine)||(Params.doFineRead));
|
assert( (Params.doFine)||(Params.doFineRead));
|
||||||
|
Loading…
Reference in New Issue
Block a user