mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Binary IO file for generic Grid array parallel I/O.
Number of IO MPI tasks can be varied by selecting which
dimensions use parallel IO and which dimensions use Serial send to boss
I/O.
Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes
doing the I/O.
Interpolates nicely between ALL nodes write their data, a single boss per time-plane
in processor space [old UKQCD fortran code did this], and a single node doing all I/O.
Not sure I have the transfer sizes big enough and am not overly convinced fstream
is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero.
Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations
on my MacOS + OpenMPI and Clang environment.
It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from
each node in order to gather bigger chunks at the syscall level.
That would push us up to the circa 8x 18*4*8 == 4KB size write chunk, and by taking, say, x/y non
parallel we get to 16MB contiguous chunks written in multi 4KB transactions
per IOnode in 64^3 lattices for configuration I/O.
I suspect this is fine for system performance.
			
			
This commit is contained in:
		@@ -45,6 +45,7 @@
 | 
			
		||||
#include <Stencil.h>      
 | 
			
		||||
#include <Algorithms.h>   
 | 
			
		||||
#include <qcd/QCD.h>
 | 
			
		||||
#include <parallelIO/BinaryIO.h>
 | 
			
		||||
#include <parallelIO/NerscIO.h>
 | 
			
		||||
 | 
			
		||||
#include <Init.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -177,7 +177,7 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    Grid_quiesce_nodes();
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
 | 
			
		||||
    WilsonFermionStatic::HandOptDslash=1;
 | 
			
		||||
    QCD::WilsonFermionStatic::HandOptDslash=1;
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
 | 
			
		||||
    LebesgueOrder::UseLebesgueOrder=1;
 | 
			
		||||
 
 | 
			
		||||
@@ -87,6 +87,14 @@ class CartesianCommunicator {
 | 
			
		||||
			void *recv,
 | 
			
		||||
			int recv_from_rank,
 | 
			
		||||
			int bytes);
 | 
			
		||||
 | 
			
		||||
    void RecvFrom(void *recv,
 | 
			
		||||
		  int recv_from_rank,
 | 
			
		||||
		  int bytes);
 | 
			
		||||
    void SendTo(void *xmit,
 | 
			
		||||
		int xmit_to_rank,
 | 
			
		||||
		int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			 void *xmit,
 | 
			
		||||
			 int xmit_to_rank,
 | 
			
		||||
 
 | 
			
		||||
@@ -81,13 +81,30 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromComplete(reqs);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::RecvFrom(void *recv,
 | 
			
		||||
				     int from,
 | 
			
		||||
				     int bytes) 
 | 
			
		||||
{
 | 
			
		||||
  MPI_Status stat;
 | 
			
		||||
  int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendTo(void *xmit,
 | 
			
		||||
				   int dest,
 | 
			
		||||
				   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int rank = _processor; // used for tag; must know who it comes from
 | 
			
		||||
  int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
  void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						  void *xmit,
 | 
			
		||||
						  int dest,
 | 
			
		||||
						  void *recv,
 | 
			
		||||
						  int from,
 | 
			
		||||
						  int bytes)
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
@@ -100,7 +117,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
 | 
			
		||||
  list.push_back(xrq);
 | 
			
		||||
  list.push_back(rrq);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
@@ -22,6 +22,20 @@ void CartesianCommunicator::GlobalSum(double &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::RecvFrom(void *recv,
 | 
			
		||||
				     int recv_from_rank,
 | 
			
		||||
				     int bytes) 
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendTo(void *xmit,
 | 
			
		||||
				   int xmit_to_rank,
 | 
			
		||||
				   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive -- should never call in single node
 | 
			
		||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
					   int dest,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										513
									
								
								lib/parallelIO/BinaryIO.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										513
									
								
								lib/parallelIO/BinaryIO.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,513 @@
 | 
			
		||||
#ifndef GRID_BINARY_IO_H
 | 
			
		||||
#define GRID_BINARY_IO_H
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_ENDIAN_H
 | 
			
		||||
#include <endian.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <arpa/inet.h>
 | 
			
		||||
 | 
			
		||||
// 64bit endian swap is a portability pain
 | 
			
		||||
#ifndef __has_builtin         // Optional of course.
 | 
			
		||||
#define __has_builtin(x) 0  // Compatibility with non-clang compilers.
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if HAVE_DECL_BE64TOH 
 | 
			
		||||
#undef Grid_ntohll
 | 
			
		||||
#define Grid_ntohll be64toh
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if HAVE_DECL_NTOHLL
 | 
			
		||||
#undef  Grid_ntohll
 | 
			
		||||
#define Grid_ntohll ntohll
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef Grid_ntohll
 | 
			
		||||
 | 
			
		||||
#if BYTE_ORDER == BIG_ENDIAN 
 | 
			
		||||
 | 
			
		||||
#define Grid_ntohll(A) (A)
 | 
			
		||||
 | 
			
		||||
#else 
 | 
			
		||||
 | 
			
		||||
#if __has_builtin(__builtin_bswap64)
 | 
			
		||||
#define Grid_ntohll(A) __builtin_bswap64(A)
 | 
			
		||||
#else
 | 
			
		||||
#error
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
  // A little helper
 | 
			
		||||
  inline void removeWhitespace(std::string &key)
 | 
			
		||||
  {
 | 
			
		||||
    key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
class BinaryIO {
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Network is big endian
 | 
			
		||||
  static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);} 
 | 
			
		||||
  static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);} 
 | 
			
		||||
  static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);} 
 | 
			
		||||
  static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);} 
 | 
			
		||||
 | 
			
		||||
  static inline void be32toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint32_t * f = (uint32_t *)file_object;
 | 
			
		||||
    for(int i=0;i*sizeof(uint32_t)<bytes;i++){  
 | 
			
		||||
      f[i] = ntohl(f[i]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // LE must Swap and switch to host
 | 
			
		||||
  static inline void le32toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint32_t *fp = (uint32_t *)file_object;
 | 
			
		||||
    uint32_t f;
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i*sizeof(uint32_t)<bytes;i++){  
 | 
			
		||||
      f = fp[i];
 | 
			
		||||
      // got network order and the network to host
 | 
			
		||||
      f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
      fp[i] = ntohl(f);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // BE is same as network
 | 
			
		||||
  static inline void be64toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint64_t * f = (uint64_t *)file_object;
 | 
			
		||||
    for(int i=0;i*sizeof(uint64_t)<bytes;i++){  
 | 
			
		||||
      f[i] = Grid_ntohll(f[i]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // LE must swap and switch;
 | 
			
		||||
  static inline void le64toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint64_t *fp = (uint64_t *)file_object;
 | 
			
		||||
    uint64_t f,g;
 | 
			
		||||
    
 | 
			
		||||
    for(int i=0;i*sizeof(uint64_t)<bytes;i++){  
 | 
			
		||||
      f = fp[i];
 | 
			
		||||
      // got network order and the network to host
 | 
			
		||||
      g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
      g = g << 32;
 | 
			
		||||
      f = f >> 32;
 | 
			
		||||
      g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
      fp[i] = Grid_ntohll(g);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class vobj,class fobj,class munger> static inline void Uint32Checksum(Lattice<vobj> &lat,munger munge,uint32_t &csum)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    GridBase *grid = lat._grid ;
 | 
			
		||||
 | 
			
		||||
    sobj siteObj;
 | 
			
		||||
    fobj fileObj;
 | 
			
		||||
 | 
			
		||||
    csum = 0;
 | 
			
		||||
    std::vector<int> lcoor;
 | 
			
		||||
    for(int l=0;l<grid->lSites();l++){
 | 
			
		||||
      grid->CoorFromIndex(lcoor,l,grid->_ldimensions);
 | 
			
		||||
      peekLocalSite(siteObj,lat,lcoor);
 | 
			
		||||
      munge(siteObj,fileObj,csum);
 | 
			
		||||
    }
 | 
			
		||||
    grid->GlobalSum(csum);
 | 
			
		||||
  }
 | 
			
		||||
    
 | 
			
		||||
  static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
 | 
			
		||||
  {
 | 
			
		||||
    for(int i=0;i*sizeof(uint32_t)<buf_size_bytes;i++){
 | 
			
		||||
      csum=csum+buf[i];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class vobj,class fobj,class munger>
 | 
			
		||||
  static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl;
 | 
			
		||||
 | 
			
		||||
    int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
    int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
    int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
    // Find the location of each site and send to primary node
 | 
			
		||||
    // Take loop order from Chroma; defines loop order now that NERSC doc no longer
 | 
			
		||||
    // available (how short sighted is that?)
 | 
			
		||||
    std::ifstream fin(file,std::ios::binary|std::ios::in);
 | 
			
		||||
    fin.seekg(offset);
 | 
			
		||||
 | 
			
		||||
    Umu = zero;
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj file_object;
 | 
			
		||||
    sobj munged;
 | 
			
		||||
    
 | 
			
		||||
    for(int t=0;t<grid->_fdimensions[3];t++){
 | 
			
		||||
    for(int z=0;z<grid->_fdimensions[2];z++){
 | 
			
		||||
    for(int y=0;y<grid->_fdimensions[1];y++){
 | 
			
		||||
    for(int x=0;x<grid->_fdimensions[0];x++){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> site({x,y,z,t});
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	fin.read((char *)&file_object,sizeof(file_object));
 | 
			
		||||
	
 | 
			
		||||
	if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee32)    le32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64)    le64toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
 | 
			
		||||
	munge(file_object,munged,csum);
 | 
			
		||||
      }
 | 
			
		||||
      // The boss who read the file has their value poked
 | 
			
		||||
      pokeSite(munged,Umu,site);
 | 
			
		||||
    }}}}
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class vobj,class fobj,class munger> 
 | 
			
		||||
  static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
    int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
    int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////
 | 
			
		||||
    // Serialise through node zero
 | 
			
		||||
    //////////////////////////////////////////////////
 | 
			
		||||
    std::cout<< GridLogMessage<< "Serial write I/O "<< file<<std::endl;
 | 
			
		||||
 | 
			
		||||
    std::ofstream fout;
 | 
			
		||||
    if ( grid->IsBoss() ) {
 | 
			
		||||
      fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
 | 
			
		||||
      fout.seekp(offset);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj file_object;
 | 
			
		||||
    sobj unmunged;
 | 
			
		||||
    for(int t=0;t<grid->_fdimensions[3];t++){
 | 
			
		||||
    for(int z=0;z<grid->_fdimensions[2];z++){
 | 
			
		||||
    for(int y=0;y<grid->_fdimensions[1];y++){
 | 
			
		||||
    for(int x=0;x<grid->_fdimensions[0];x++){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> site({x,y,z,t});
 | 
			
		||||
      // peek & write
 | 
			
		||||
      peekSite(unmunged,Umu,site);
 | 
			
		||||
 | 
			
		||||
      munge(unmunged,file_object,csum);
 | 
			
		||||
 | 
			
		||||
      
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	
 | 
			
		||||
	if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee32)    htole32_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64)    htole64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	
 | 
			
		||||
	fout.write((char *)&file_object,sizeof(file_object));
 | 
			
		||||
      }
 | 
			
		||||
    }}}}
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class vobj,class fobj,class munger>
 | 
			
		||||
  static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
    int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
    int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Take into account block size of parallel file systems want about
 | 
			
		||||
    // 4-16MB chunks.
 | 
			
		||||
    // Ideally one reader/writer per xy plane and read these contiguously
 | 
			
		||||
    // with comms from nominated I/O nodes.
 | 
			
		||||
    std::ifstream fin;
 | 
			
		||||
 | 
			
		||||
    int nd = grid->_ndimension;
 | 
			
		||||
    std::vector<int> parallel(nd,1);
 | 
			
		||||
    std::vector<int> ioproc  (nd);
 | 
			
		||||
    std::vector<int> start(nd);
 | 
			
		||||
    std::vector<int> range(nd);
 | 
			
		||||
 | 
			
		||||
    for(int d=0;d<nd;d++){
 | 
			
		||||
      assert(grid->CheckerBoarded(d) == 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    uint64_t slice_vol = 1;
 | 
			
		||||
 | 
			
		||||
    int IOnode = 1;
 | 
			
		||||
    for(int d=0;d<grid->_ndimension;d++) {
 | 
			
		||||
 | 
			
		||||
      if ( d==0 ) parallel[d] = 0;
 | 
			
		||||
      if (parallel[d]) {
 | 
			
		||||
	range[d] = grid->_ldimensions[d];
 | 
			
		||||
	start[d] = grid->_processor_coor[d]*range[d];
 | 
			
		||||
	ioproc[d]= grid->_processor_coor[d];
 | 
			
		||||
      } else {
 | 
			
		||||
	range[d] = grid->_gdimensions[d];
 | 
			
		||||
	start[d] = 0;
 | 
			
		||||
	ioproc[d]= 0;
 | 
			
		||||
 | 
			
		||||
	if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
 | 
			
		||||
      }
 | 
			
		||||
      slice_vol = slice_vol * range[d];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    {
 | 
			
		||||
      uint32_t tmp = IOnode;
 | 
			
		||||
      grid->GlobalSum(tmp);
 | 
			
		||||
      std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
 | 
			
		||||
      for(int d=0;d<grid->_ndimension;d++){
 | 
			
		||||
	std::cout<< range[d];
 | 
			
		||||
	if( d< grid->_ndimension-1 ) 
 | 
			
		||||
	  std::cout<< " x ";
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int myrank = grid->ThisRank();
 | 
			
		||||
    int iorank = grid->RankFromProcessorCoor(ioproc);
 | 
			
		||||
 | 
			
		||||
    if ( IOnode ) { 
 | 
			
		||||
      fin.open(file,std::ios::binary|std::ios::in);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    // Find the location of each site and send to primary node
 | 
			
		||||
    // Take loop order from Chroma; defines loop order now that NERSC doc no longer
 | 
			
		||||
    // available (how short sighted is that?)
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    Umu = zero;
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj fileObj;
 | 
			
		||||
    sobj siteObj;
 | 
			
		||||
 | 
			
		||||
      // need to implement these loops in Nd independent way with a lexico conversion
 | 
			
		||||
    for(int tlex=0;tlex<slice_vol;tlex++){
 | 
			
		||||
	
 | 
			
		||||
      std::vector<int> tsite(nd); // temporary mixed up site
 | 
			
		||||
      std::vector<int> gsite(nd);
 | 
			
		||||
      std::vector<int> lsite(nd);
 | 
			
		||||
      std::vector<int> iosite(nd);
 | 
			
		||||
 | 
			
		||||
      grid->CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
	gsite[d] = tsite[d]+start[d];               // global site
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      /////////////////////////
 | 
			
		||||
      // Get the rank of owner of data
 | 
			
		||||
      /////////////////////////
 | 
			
		||||
	int rank, o_idx,i_idx, g_idx;
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
 | 
			
		||||
      grid->GlobalCoorToGlobalIndex(gsite,g_idx);
 | 
			
		||||
      
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      // iorank reads from the seek
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      if (myrank == iorank) {
 | 
			
		||||
	
 | 
			
		||||
	fin.seekg(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
	fin.read((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	
 | 
			
		||||
	if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee32)    le32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64)    le64toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	
 | 
			
		||||
	munge(fileObj,siteObj,csum);
 | 
			
		||||
	
 | 
			
		||||
	if ( rank != myrank ) {
 | 
			
		||||
	  grid->SendTo((void *)&siteObj,rank,sizeof(siteObj));
 | 
			
		||||
	} else { 
 | 
			
		||||
	  pokeLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
	}
 | 
			
		||||
	 
 | 
			
		||||
      } else { 
 | 
			
		||||
	if ( myrank == rank ) {
 | 
			
		||||
	  grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj));
 | 
			
		||||
	  pokeLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
	} 
 | 
			
		||||
      }
 | 
			
		||||
      grid->Barrier(); // necessary?
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    grid->GlobalSum(csum);
 | 
			
		||||
    
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////
 | 
			
		||||
  // Parallel writer
 | 
			
		||||
  //////////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj,class fobj,class munger>
 | 
			
		||||
  static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
    int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
    int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
    int nd = grid->_ndimension;
 | 
			
		||||
    for(int d=0;d<nd;d++){
 | 
			
		||||
      assert(grid->CheckerBoarded(d) == 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::vector<int> parallel(nd,1);
 | 
			
		||||
    std::vector<int> ioproc  (nd);
 | 
			
		||||
    std::vector<int> start(nd);
 | 
			
		||||
    std::vector<int> range(nd);
 | 
			
		||||
 | 
			
		||||
    uint64_t slice_vol = 1;
 | 
			
		||||
 | 
			
		||||
    int IOnode = 1;
 | 
			
		||||
 | 
			
		||||
    for(int d=0;d<grid->_ndimension;d++) {
 | 
			
		||||
 | 
			
		||||
      if ( d==0 ) parallel[d] = 0;
 | 
			
		||||
 | 
			
		||||
      if (parallel[d]) {
 | 
			
		||||
	range[d] = grid->_ldimensions[d];
 | 
			
		||||
	start[d] = grid->_processor_coor[d]*range[d];
 | 
			
		||||
	ioproc[d]= grid->_processor_coor[d];
 | 
			
		||||
      } else {
 | 
			
		||||
	range[d] = grid->_gdimensions[d];
 | 
			
		||||
	start[d] = 0;
 | 
			
		||||
	ioproc[d]= 0;
 | 
			
		||||
 | 
			
		||||
	if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      slice_vol = slice_vol * range[d];
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    {
 | 
			
		||||
      uint32_t tmp = IOnode;
 | 
			
		||||
      grid->GlobalSum(tmp);
 | 
			
		||||
      std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
 | 
			
		||||
      for(int d=0;d<grid->_ndimension;d++){
 | 
			
		||||
	std::cout<< range[d];
 | 
			
		||||
	if( d< grid->_ndimension-1 ) 
 | 
			
		||||
	  std::cout<< " x ";
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int myrank = grid->ThisRank();
 | 
			
		||||
    int iorank = grid->RankFromProcessorCoor(ioproc);
 | 
			
		||||
 | 
			
		||||
    // Take into account block size of parallel file systems want about
 | 
			
		||||
    // 4-16MB chunks.
 | 
			
		||||
    // Ideally one reader/writer per xy plane and read these contiguously
 | 
			
		||||
    // with comms from nominated I/O nodes.
 | 
			
		||||
    std::ofstream fout;
 | 
			
		||||
    if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    // Find the location of each site and send to primary node
 | 
			
		||||
    // Take loop order from Chroma; defines loop order now that NERSC doc no longer
 | 
			
		||||
    // available (how short sighted is that?)
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj fileObj;
 | 
			
		||||
    sobj siteObj;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      // need to implement these loops in Nd independent way with a lexico conversion
 | 
			
		||||
    for(int tlex=0;tlex<slice_vol;tlex++){
 | 
			
		||||
	
 | 
			
		||||
      std::vector<int> tsite(nd); // temporary mixed up site
 | 
			
		||||
      std::vector<int> gsite(nd);
 | 
			
		||||
      std::vector<int> lsite(nd);
 | 
			
		||||
      std::vector<int> iosite(nd);
 | 
			
		||||
 | 
			
		||||
      grid->CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
	gsite[d] = tsite[d]+start[d];               // global site
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      /////////////////////////
 | 
			
		||||
      // Get the rank of owner of data
 | 
			
		||||
      /////////////////////////
 | 
			
		||||
      int rank, o_idx,i_idx, g_idx;
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
 | 
			
		||||
      grid->GlobalCoorToGlobalIndex(gsite,g_idx);
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      // iorank writes from the seek
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      if (myrank == iorank) {
 | 
			
		||||
 | 
			
		||||
	if ( rank != myrank ) {
 | 
			
		||||
	  grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj));
 | 
			
		||||
	} else { 
 | 
			
		||||
	  peekLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	munge(siteObj,fileObj,csum);
 | 
			
		||||
 | 
			
		||||
	if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee32)    htole32_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64)    htole64_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	
 | 
			
		||||
	fout.seekp(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
	fout.write((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
 | 
			
		||||
      } else { 
 | 
			
		||||
	if ( myrank == rank ) {
 | 
			
		||||
	  peekLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
	  grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj));
 | 
			
		||||
	} 
 | 
			
		||||
      }
 | 
			
		||||
      grid->Barrier(); // necessary// or every 16 packets to rate throttle??
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    grid->GlobalSum(csum);
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -7,57 +7,23 @@
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <map>
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_ENDIAN_H
 | 
			
		||||
#include <endian.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <arpa/inet.h>
 | 
			
		||||
 | 
			
		||||
// 64bit endian swap is a portability pain
 | 
			
		||||
#ifndef __has_builtin         // Optional of course.
 | 
			
		||||
#define __has_builtin(x) 0  // Compatibility with non-clang compilers.
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if HAVE_DECL_BE64TOH 
 | 
			
		||||
#undef Grid_ntohll
 | 
			
		||||
#define Grid_ntohll be64toh
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if HAVE_DECL_NTOHLL
 | 
			
		||||
#undef  Grid_ntohll
 | 
			
		||||
#define Grid_ntohll ntohll
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef Grid_ntohll
 | 
			
		||||
 | 
			
		||||
#if BYTE_ORDER == BIG_ENDIAN 
 | 
			
		||||
 | 
			
		||||
#define Grid_ntohll(A) (A)
 | 
			
		||||
 | 
			
		||||
#else 
 | 
			
		||||
 | 
			
		||||
#if __has_builtin(__builtin_bswap64)
 | 
			
		||||
#define Grid_ntohll(A) __builtin_bswap64(A)
 | 
			
		||||
#else
 | 
			
		||||
#error
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/utsname.h>
 | 
			
		||||
#include <pwd.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  using namespace QCD;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Some data types for intermediate storage
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
 | 
			
		||||
typedef iLorentzColour2x3<Complex>  LorentzColour2x3;
 | 
			
		||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
 | 
			
		||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
 | 
			
		||||
 | 
			
		||||
  typedef iLorentzColour2x3<Complex>  LorentzColour2x3;
 | 
			
		||||
  typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
 | 
			
		||||
  typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// header specification/interpretation
 | 
			
		||||
@@ -86,50 +52,173 @@ class NerscField {
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Bit and Physical Checksumming and QA of data
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
inline void NerscGrid(GridBase *grid,NerscField &header)
 | 
			
		||||
{
 | 
			
		||||
  assert(grid->_ndimension==4);
 | 
			
		||||
  for(int d=0;d<4;d++) {
 | 
			
		||||
    header.dimension[d] = grid->_fdimensions[d];
 | 
			
		||||
  }
 | 
			
		||||
  for(int d=0;d<4;d++) {
 | 
			
		||||
    header.boundary[d] = std::string("PERIODIC");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class GaugeMatrix,class GaugeField>
 | 
			
		||||
inline void NerscStatistics(GaugeField & data,NerscField &header)
 | 
			
		||||
{
 | 
			
		||||
  header.link_trace=Grid::QCD::WilsonLoops<GaugeMatrix,GaugeField>::linkTrace(data);
 | 
			
		||||
  header.plaquette =Grid::QCD::WilsonLoops<GaugeMatrix,GaugeField>::avgPlaquette(data);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline void NerscMachineCharacteristics(NerscField &header)
 | 
			
		||||
{
 | 
			
		||||
  // Who
 | 
			
		||||
  struct passwd *pw = getpwuid (getuid());
 | 
			
		||||
  if (pw) header.creator = std::string(pw->pw_name); 
 | 
			
		||||
 | 
			
		||||
  // When
 | 
			
		||||
  std::time_t t = std::time(nullptr);
 | 
			
		||||
  std::tm tm = *std::localtime(&t);
 | 
			
		||||
  std::ostringstream oss; 
 | 
			
		||||
  oss << std::put_time(&tm, "%c %Z");
 | 
			
		||||
  header.creation_date = oss.str();
 | 
			
		||||
  header.archive_date  = header.creation_date;
 | 
			
		||||
 | 
			
		||||
  // What
 | 
			
		||||
  struct utsname name;  uname(&name);
 | 
			
		||||
  header.creator_hardware = std::string(name.nodename)+"-";
 | 
			
		||||
  header.creator_hardware+= std::string(name.machine)+"-";
 | 
			
		||||
  header.creator_hardware+= std::string(name.sysname)+"-";
 | 
			
		||||
  header.creator_hardware+= std::string(name.release);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Utilities ; these are QCD aware
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
    inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
 | 
			
		||||
    {
 | 
			
		||||
      BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
 | 
			
		||||
    }
 | 
			
		||||
    inline void reconstruct3(LorentzColourMatrix & cm)
 | 
			
		||||
    {
 | 
			
		||||
      const int x=0;
 | 
			
		||||
      const int y=1;
 | 
			
		||||
      const int z=2;
 | 
			
		||||
      for(int mu=0;mu<4;mu++){
 | 
			
		||||
	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
 | 
			
		||||
	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
 | 
			
		||||
	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class fobj,class sobj>
 | 
			
		||||
    struct NerscSimpleMunger{
 | 
			
		||||
 | 
			
		||||
      void operator() (fobj &in,sobj &out,uint32_t &csum){
 | 
			
		||||
 | 
			
		||||
      for(int mu=0;mu<4;mu++){
 | 
			
		||||
      for(int i=0;i<3;i++){
 | 
			
		||||
      for(int j=0;j<3;j++){
 | 
			
		||||
	out(mu)()(i,j) = in(mu)()(i,j);
 | 
			
		||||
      }}}
 | 
			
		||||
      NerscChecksum((uint32_t *)&in,sizeof(in),csum); 
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class fobj,class sobj>
 | 
			
		||||
    struct NerscSimpleUnmunger{
 | 
			
		||||
      void operator() (sobj &in,fobj &out,uint32_t &csum){
 | 
			
		||||
	for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
	for(int i=0;i<Nc;i++){
 | 
			
		||||
	for(int j=0;j<Nc;j++){
 | 
			
		||||
	  out(mu)()(i,j) = in(mu)()(i,j);
 | 
			
		||||
	}}}
 | 
			
		||||
	NerscChecksum((uint32_t *)&out,sizeof(out),csum); 
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
 
 | 
			
		||||
    template<class fobj,class sobj>
 | 
			
		||||
    struct Nersc3x2munger{
 | 
			
		||||
      void operator() (fobj &in,sobj &out,uint32_t &csum){
 | 
			
		||||
     
 | 
			
		||||
	NerscChecksum((uint32_t *)&in,sizeof(in),csum); 
 | 
			
		||||
 | 
			
		||||
	for(int mu=0;mu<4;mu++){
 | 
			
		||||
	  for(int i=0;i<2;i++){
 | 
			
		||||
	    for(int j=0;j<3;j++){
 | 
			
		||||
	      out(mu)()(i,j) = in(mu)(i)(j);
 | 
			
		||||
	    }}
 | 
			
		||||
	}
 | 
			
		||||
	reconstruct3(out);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class fobj,class sobj>
 | 
			
		||||
    struct Nersc3x2unmunger{
 | 
			
		||||
 | 
			
		||||
      void operator() (sobj &in,fobj &out,uint32_t &csum){
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	for(int mu=0;mu<4;mu++){
 | 
			
		||||
	  for(int i=0;i<2;i++){
 | 
			
		||||
	    for(int j=0;j<3;j++){
 | 
			
		||||
	      out(mu)(i)(j) = in(mu)()(i,j);
 | 
			
		||||
	    }}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	NerscChecksum((uint32_t *)&out,sizeof(out),csum); 
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Write and read from fstream; comput header offset for payload
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
inline unsigned int writeNerscHeader(NerscField &field,std::string file)
 | 
			
		||||
{
 | 
			
		||||
  std::ofstream fout(file,std::ios::out);
 | 
			
		||||
class NerscIO : public BinaryIO { 
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  static inline unsigned int writeHeader(NerscField &field,std::string file)
 | 
			
		||||
  {
 | 
			
		||||
    std::ofstream fout(file,std::ios::out);
 | 
			
		||||
  
 | 
			
		||||
  fout.seekp(0,std::ios::beg);
 | 
			
		||||
  fout << "BEGIN_HEADER"      << std::endl;
 | 
			
		||||
  fout << "HDR_VERSION = "    << field.hdr_version    << std::endl;
 | 
			
		||||
  fout << "DATATYPE = "       << field.data_type      << std::endl;
 | 
			
		||||
  fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
 | 
			
		||||
    fout.seekp(0,std::ios::beg);
 | 
			
		||||
    fout << "BEGIN_HEADER"      << std::endl;
 | 
			
		||||
    fout << "HDR_VERSION = "    << field.hdr_version    << std::endl;
 | 
			
		||||
    fout << "DATATYPE = "       << field.data_type      << std::endl;
 | 
			
		||||
    fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<4;i++){
 | 
			
		||||
    fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
 | 
			
		||||
  }
 | 
			
		||||
  // just to keep the space and write it later
 | 
			
		||||
  fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
 | 
			
		||||
  fout << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;
 | 
			
		||||
  for(int i=0;i<4;i++){
 | 
			
		||||
    fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  fout << "CHECKSUM = "<< std::hex << std::setw(16) << 0 << field.checksum << std::endl;
 | 
			
		||||
    for(int i=0;i<4;i++){
 | 
			
		||||
      fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
 | 
			
		||||
    }
 | 
			
		||||
    // just to keep the space and write it later
 | 
			
		||||
    fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
 | 
			
		||||
    fout << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;
 | 
			
		||||
    for(int i=0;i<4;i++){
 | 
			
		||||
      fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  fout << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;
 | 
			
		||||
  fout << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;
 | 
			
		||||
  fout << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;
 | 
			
		||||
  fout << "CREATOR = "         << field.creator          << std::endl;
 | 
			
		||||
  fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
 | 
			
		||||
  fout << "CREATION_DATE = "   << field.creation_date    << std::endl;
 | 
			
		||||
  fout << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;
 | 
			
		||||
  fout << "FLOATING_POINT = "  << field.floating_point   << std::endl;
 | 
			
		||||
  fout << "END_HEADER"         << std::endl;
 | 
			
		||||
  field.data_start = fout.tellp();
 | 
			
		||||
  return field.data_start;
 | 
			
		||||
    fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::endl;
 | 
			
		||||
    fout << std::dec;
 | 
			
		||||
 | 
			
		||||
    fout << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;
 | 
			
		||||
    fout << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;
 | 
			
		||||
    fout << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;
 | 
			
		||||
    fout << "CREATOR = "         << field.creator          << std::endl;
 | 
			
		||||
    fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
 | 
			
		||||
    fout << "CREATION_DATE = "   << field.creation_date    << std::endl;
 | 
			
		||||
    fout << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;
 | 
			
		||||
    fout << "FLOATING_POINT = "  << field.floating_point   << std::endl;
 | 
			
		||||
    fout << "END_HEADER"         << std::endl;
 | 
			
		||||
    field.data_start = fout.tellp();
 | 
			
		||||
    return field.data_start;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// A little helper
 | 
			
		||||
inline void removeWhitespace(std::string &key)
 | 
			
		||||
{
 | 
			
		||||
  key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
 | 
			
		||||
}
 | 
			
		||||
// for the header-reader
 | 
			
		||||
inline int readNerscHeader(std::string file,GridBase *grid,  NerscField &field)
 | 
			
		||||
static inline int readHeader(std::string file,GridBase *grid,  NerscField &field)
 | 
			
		||||
{
 | 
			
		||||
  int offset=0;
 | 
			
		||||
  std::map<std::string,std::string> header;
 | 
			
		||||
@@ -163,7 +252,6 @@ inline int readNerscHeader(std::string file,GridBase *grid,  NerscField &field)
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // chomp the values
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  field.hdr_version    = header["HDR_VERSION"];
 | 
			
		||||
  field.data_type      = header["DATATYPE"];
 | 
			
		||||
  field.storage_format = header["STORAGE_FORMAT"];
 | 
			
		||||
@@ -200,314 +288,22 @@ inline int readNerscHeader(std::string file,GridBase *grid,  NerscField &field)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Utilities
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
inline void reconstruct3(LorentzColourMatrix & cm)
 | 
			
		||||
{
 | 
			
		||||
  const int x=0;
 | 
			
		||||
  const int y=1;
 | 
			
		||||
  const int z=2;
 | 
			
		||||
  for(int mu=0;mu<4;mu++){
 | 
			
		||||
    cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
 | 
			
		||||
    cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
 | 
			
		||||
    cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 void inline be32toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
 {
 | 
			
		||||
   uint32_t * f = (uint32_t *)file_object;
 | 
			
		||||
   for(int i=0;i*sizeof(uint32_t)<bytes;i++){  
 | 
			
		||||
     f[i] = ntohl(f[i]);
 | 
			
		||||
   }
 | 
			
		||||
 }
 | 
			
		||||
 void inline le32toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
 {
 | 
			
		||||
   uint32_t *fp = (uint32_t *)file_object;
 | 
			
		||||
 | 
			
		||||
   uint32_t f;
 | 
			
		||||
 | 
			
		||||
   for(int i=0;i*sizeof(uint32_t)<bytes;i++){  
 | 
			
		||||
     f = fp[i];
 | 
			
		||||
     // got network order and the network to host
 | 
			
		||||
     f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
     fp[i] = ntohl(f);
 | 
			
		||||
   }
 | 
			
		||||
 }
 | 
			
		||||
 void inline be64toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
 {
 | 
			
		||||
   uint64_t * f = (uint64_t *)file_object;
 | 
			
		||||
   for(int i=0;i*sizeof(uint64_t)<bytes;i++){  
 | 
			
		||||
     f[i] = Grid_ntohll(f[i]);
 | 
			
		||||
   }
 | 
			
		||||
 }
 | 
			
		||||
 void inline le64toh_v(void *file_object,uint32_t bytes)
 | 
			
		||||
 {
 | 
			
		||||
   uint64_t *fp = (uint64_t *)file_object;
 | 
			
		||||
   uint64_t f,g;
 | 
			
		||||
 | 
			
		||||
   for(int i=0;i*sizeof(uint64_t)<bytes;i++){  
 | 
			
		||||
     f = fp[i];
 | 
			
		||||
     // got network order and the network to host
 | 
			
		||||
     g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
     g = g << 32;
 | 
			
		||||
     f = f >> 32;
 | 
			
		||||
     g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
     fp[i] = ntohl(g);
 | 
			
		||||
   }
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size,uint32_t &csum)
 | 
			
		||||
{
 | 
			
		||||
  for(int i=0;i*sizeof(uint32_t)<buf_size;i++){
 | 
			
		||||
    csum=csum+buf[i];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  template<class fobj,class sobj>
 | 
			
		||||
  struct NerscSimpleMunger{
 | 
			
		||||
    void operator() (fobj &in,sobj &out,uint32_t &csum){
 | 
			
		||||
 | 
			
		||||
      for(int mu=0;mu<4;mu++){
 | 
			
		||||
      for(int i=0;i<3;i++){
 | 
			
		||||
      for(int j=0;j<3;j++){
 | 
			
		||||
	out(mu)()(i,j) = in(mu)()(i,j);
 | 
			
		||||
      }}}
 | 
			
		||||
 | 
			
		||||
      NerscChecksum((uint32_t *)&in,sizeof(in),csum); 
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
  template<class fobj,class sobj>
 | 
			
		||||
  struct NerscSimpleUnmunger{
 | 
			
		||||
    void operator() (sobj &in,fobj &out,uint32_t &csum){
 | 
			
		||||
      for(int mu=0;mu<4;mu++){
 | 
			
		||||
      for(int i=0;i<3;i++){
 | 
			
		||||
      for(int j=0;j<3;j++){
 | 
			
		||||
	out(mu)()(i,j) = in(mu)()(i,j);
 | 
			
		||||
      }}}
 | 
			
		||||
      NerscChecksum((uint32_t *)&out,sizeof(out),csum); 
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
 
 | 
			
		||||
 template<class fobj,class sobj>
 | 
			
		||||
 struct Nersc3x2munger{
 | 
			
		||||
   void operator() (fobj &in,sobj &out,uint32_t &csum){
 | 
			
		||||
     
 | 
			
		||||
     NerscChecksum((uint32_t *)&in,sizeof(in),csum); 
 | 
			
		||||
 | 
			
		||||
     for(int mu=0;mu<4;mu++){
 | 
			
		||||
       for(int i=0;i<2;i++){
 | 
			
		||||
       for(int j=0;j<3;j++){
 | 
			
		||||
	   out(mu)()(i,j) = in(mu)(i)(j);
 | 
			
		||||
       }}
 | 
			
		||||
     }
 | 
			
		||||
     reconstruct3(out);
 | 
			
		||||
   }
 | 
			
		||||
 };
 | 
			
		||||
 | 
			
		||||
 template<class fobj,class sobj>
 | 
			
		||||
 struct Nersc3x2unmunger{
 | 
			
		||||
 | 
			
		||||
  void operator() (sobj &in,fobj &out,uint32_t &csum){
 | 
			
		||||
 | 
			
		||||
    NerscChecksum((uint32_t *)&out,sizeof(out),csum); 
 | 
			
		||||
 | 
			
		||||
    for(int mu=0;mu<4;mu++){
 | 
			
		||||
      for(int i=0;i<2;i++){
 | 
			
		||||
      for(int j=0;j<3;j++){
 | 
			
		||||
	out(mu)(i)(j) = in(mu)()(i,j);
 | 
			
		||||
      }}
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Template wizardry to map types to strings for NERSC in an extensible way
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 template<class vobj> struct NerscDataType {
 | 
			
		||||
   static void DataType     (std::string &str) { str = std::string("4D_BINARY_UNKNOWN"); };
 | 
			
		||||
   static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG"); };
 | 
			
		||||
 };
 | 
			
		||||
 | 
			
		||||
 template<> struct NerscDataType<iColourMatrix<ComplexD> > {
 | 
			
		||||
   static void DataType     (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
 | 
			
		||||
   static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG");};
 | 
			
		||||
 };
 | 
			
		||||
 | 
			
		||||
 template<> struct NerscDataType<iColourMatrix<ComplexF> > {
 | 
			
		||||
   static void DataType     (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
 | 
			
		||||
   static void FloatingPoint(std::string &str) { str = std::string("IEEE32BIG");};
 | 
			
		||||
 };
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Bit and Physical Checksumming and QA of data
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
/*
 | 
			
		||||
template<class vobj> inline uint32_t NerscChecksum(Lattice<vobj> & data)
 | 
			
		||||
{
 | 
			
		||||
  uint32_t sum;
 | 
			
		||||
  for(int ss=0;ss<data._grid->Osites();ss++){
 | 
			
		||||
    uint32_t *iptr = (uint32_t *)& data._odata[0] ;
 | 
			
		||||
    for(int i=0;i<sizeof(vobj);i+=sizeof(uint32_t)){
 | 
			
		||||
      sum=sum+iptr[i];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  data._grid->globalSum(sum);
 | 
			
		||||
  return sum;
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
template<class vobj> inline void NerscPhysicalCharacteristics(Lattice<vobj> & data,NerscField &header)
 | 
			
		||||
{
 | 
			
		||||
  header.data_type      = NerscDataType<vobj>::DataType;
 | 
			
		||||
  header.floating_point = NerscDataType<vobj>::FloatingPoint;
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 template<> inline void NerscPhysicalCharacteristics(LatticeGaugeField & data,NerscField &header)
 | 
			
		||||
{
 | 
			
		||||
  NerscDataType<decltype(data._odata[0])>::DataType(header.data_type);
 | 
			
		||||
  NerscDataType<decltype(data._odata[0])>::FloatingPoint(header.floating_point);
 | 
			
		||||
  header.link_trace=1.0;
 | 
			
		||||
  header.plaquette =1.0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj> inline void NerscStatisics(Lattice<vobj> & data,NerscField &header)
 | 
			
		||||
{
 | 
			
		||||
  assert(data._grid->_ndimension==4);
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<4;d++)
 | 
			
		||||
    header.dimension[d] = data._grid->_fdimensions[d];
 | 
			
		||||
 | 
			
		||||
  // compute checksum and any physical properties contained for this type
 | 
			
		||||
  //  header.checksum = NerscChecksum(data);
 | 
			
		||||
 | 
			
		||||
  NerscPhysicalCharacteristics(data,header);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Now the meat: the object readers
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class vobj,class sobj,class fobj,class munger>
 | 
			
		||||
inline void readNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,std::string &format)
 | 
			
		||||
 | 
			
		||||
template<class vsimd>
 | 
			
		||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
 | 
			
		||||
{
 | 
			
		||||
  typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
 | 
			
		||||
  typedef Lattice<iColourMatrix<vsimd> > GaugeMatrix;
 | 
			
		||||
 | 
			
		||||
  GridBase *grid = Umu._grid;
 | 
			
		||||
  int offset = readHeader(file,Umu._grid,header);
 | 
			
		||||
 | 
			
		||||
  int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
  int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
  int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
  int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
  // Find the location of each site and send to primary node
 | 
			
		||||
  //  for(int site=0; site < Layout::vol(); ++site){
 | 
			
		||||
  //     multi1d<int> coord = crtesn(site, Layout::lattSize());
 | 
			
		||||
  //     for(int dd=0; dd<Nd; dd++){        /* dir */
 | 
			
		||||
  //        cfg_in.readArray(su3_buffer, float_size, mat_size);
 | 
			
		||||
  //
 | 
			
		||||
  // Above from Chroma; defines loop order now that NERSC doc no longer
 | 
			
		||||
  // available (how short sighted is that?)
 | 
			
		||||
  {
 | 
			
		||||
    std::ifstream fin(file,std::ios::binary|std::ios::in);
 | 
			
		||||
    fin.seekg(offset);
 | 
			
		||||
 | 
			
		||||
    Umu = zero;
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj file_object;
 | 
			
		||||
    sobj munged;
 | 
			
		||||
    
 | 
			
		||||
    for(int t=0;t<grid->_fdimensions[3];t++){
 | 
			
		||||
    for(int z=0;z<grid->_fdimensions[2];z++){
 | 
			
		||||
    for(int y=0;y<grid->_fdimensions[1];y++){
 | 
			
		||||
    for(int x=0;x<grid->_fdimensions[0];x++){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> site({x,y,z,t});
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	fin.read((char *)&file_object,sizeof(file_object));
 | 
			
		||||
 | 
			
		||||
	if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee32)    le32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64)    le64toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
 | 
			
		||||
	munge(file_object,munged,csum);
 | 
			
		||||
      }
 | 
			
		||||
      // The boss who read the file has their value poked
 | 
			
		||||
      pokeSite(munged,Umu,site);
 | 
			
		||||
    }}}}
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj,class sobj,class fobj,class munger>
 | 
			
		||||
inline void writeNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
 | 
			
		||||
			     int sequence,double lt,double pl)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid = Umu._grid;
 | 
			
		||||
  NerscField header;
 | 
			
		||||
  
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // First write the header; this is in wrong place
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  assert(grid->_ndimension == 4);
 | 
			
		||||
  for(int d=0;d<4;d++){
 | 
			
		||||
    header.dimension[d]=grid->_fdimensions[d];
 | 
			
		||||
    header.boundary [d]=std::string("PERIODIC");; 
 | 
			
		||||
  }
 | 
			
		||||
  header.hdr_version=std::string("WHATDAHECK");
 | 
			
		||||
  //  header.storage_format=storage_format<vobj>::string; // use template specialisation
 | 
			
		||||
  //  header.data_type=data_type<vobj>::string;
 | 
			
		||||
  header.storage_format=std::string("debug");
 | 
			
		||||
  header.data_type     =std::string("debug");
 | 
			
		||||
 | 
			
		||||
  //FIXME; use template specialisation to fill these out
 | 
			
		||||
  header.link_trace   =lt;
 | 
			
		||||
  header.plaquette    =pl;
 | 
			
		||||
  header.checksum     =0;
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  header.sequence_number =sequence;
 | 
			
		||||
  header.ensemble_id     =std::string("UKQCD");
 | 
			
		||||
  header.ensemble_label  =std::string("UKQCD");
 | 
			
		||||
  header.creator         =std::string("Tadahito");
 | 
			
		||||
  header.creator_hardware=std::string("BlueGene/Q");
 | 
			
		||||
  header.creation_date   =std::string("AnnoDomini");
 | 
			
		||||
  header.archive_date    =std::string("AnnoDomini");
 | 
			
		||||
  header.floating_point  =std::string("IEEE64BIG");
 | 
			
		||||
  //  header.data_start=;
 | 
			
		||||
  //  unsigned int checksum;
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // Now write the body
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  {
 | 
			
		||||
    std::ofstream fout(file,std::ios::binary|std::ios::out);
 | 
			
		||||
    fout.seekp(offset);
 | 
			
		||||
 | 
			
		||||
    Umu = zero;
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj file_object;
 | 
			
		||||
    sobj unmunged;
 | 
			
		||||
    for(int t=0;t<grid->_fdimensions[3];t++){
 | 
			
		||||
    for(int z=0;z<grid->_fdimensions[2];z++){
 | 
			
		||||
    for(int y=0;y<grid->_fdimensions[1];y++){
 | 
			
		||||
    for(int x=0;x<grid->_fdimensions[0];x++){
 | 
			
		||||
      std::vector<int> site({x,y,z,t});
 | 
			
		||||
      peekSite(unmunged,Umu,site);
 | 
			
		||||
      munge(unmunged,file_object,csum);
 | 
			
		||||
      // broadcast & insert
 | 
			
		||||
      fout.write((char *)&file_object,sizeof(file_object));
 | 
			
		||||
    }}}}
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std::string file)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
  int offset = readNerscHeader(file,Umu._grid,header);
 | 
			
		||||
  NerscField clone(header);
 | 
			
		||||
 | 
			
		||||
  std::string format(header.floating_point);
 | 
			
		||||
 | 
			
		||||
@@ -516,48 +312,106 @@ inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std
 | 
			
		||||
  int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
  int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
  uint32_t csum;
 | 
			
		||||
  // depending on datatype, set up munger;
 | 
			
		||||
  // munger is a function of <floating point, Real, data_type>
 | 
			
		||||
  if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
 | 
			
		||||
    if ( ieee32 || ieee32big ) {
 | 
			
		||||
      readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3F> 
 | 
			
		||||
	(Umu,file,
 | 
			
		||||
	 Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(),
 | 
			
		||||
	 offset,format);
 | 
			
		||||
      //      csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> 
 | 
			
		||||
      csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> 
 | 
			
		||||
	(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
 | 
			
		||||
    }
 | 
			
		||||
    if ( ieee64 || ieee64big ) {
 | 
			
		||||
      readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3D> 
 | 
			
		||||
	(Umu,file,
 | 
			
		||||
	 Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),
 | 
			
		||||
	 offset,format);
 | 
			
		||||
      //      csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> 
 | 
			
		||||
      csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> 
 | 
			
		||||
	(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
 | 
			
		||||
    }
 | 
			
		||||
  } else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
 | 
			
		||||
    if ( ieee32 || ieee32big ) {
 | 
			
		||||
      readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixF>
 | 
			
		||||
      //      csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
 | 
			
		||||
      csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
 | 
			
		||||
	(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
 | 
			
		||||
    }
 | 
			
		||||
    if ( ieee64 || ieee64big ) {
 | 
			
		||||
      readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixD>
 | 
			
		||||
      //      csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
 | 
			
		||||
      csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
 | 
			
		||||
	(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  NerscStatistics<GaugeMatrix,GaugeField>(Umu,clone);
 | 
			
		||||
 | 
			
		||||
  assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
 | 
			
		||||
  assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
 | 
			
		||||
  assert(csum == header.checksum );
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"Read NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
inline void writeNerscConfiguration(Lattice<vobj> &Umu,NerscField &header,std::string file)
 | 
			
		||||
template<class vsimd>
 | 
			
		||||
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
 | 
			
		||||
{
 | 
			
		||||
  GridBase &grid = Umu._grid;
 | 
			
		||||
  typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
 | 
			
		||||
  typedef Lattice<iColourMatrix<vsimd> > GaugeMatrix;
 | 
			
		||||
  typedef iLorentzColourMatrix<vsimd> vobj;
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
  // Following should become arguments
 | 
			
		||||
  NerscField header;
 | 
			
		||||
  header.sequence_number = 1;
 | 
			
		||||
  header.ensemble_id     = "UKQCD";
 | 
			
		||||
  header.ensemble_label  = "DWF";
 | 
			
		||||
 | 
			
		||||
  typedef LorentzColourMatrixD fobj3D;
 | 
			
		||||
  typedef LorentzColour2x3D    fobj2D;
 | 
			
		||||
  typedef LorentzColourMatrixF fobj3f;
 | 
			
		||||
  typedef LorentzColour2x3F    fobj2f;
 | 
			
		||||
 | 
			
		||||
  GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
  NerscGrid(grid,header);
 | 
			
		||||
  NerscStatistics<GaugeMatrix,GaugeField>(Umu,header);
 | 
			
		||||
  NerscMachineCharacteristics(header);
 | 
			
		||||
 | 
			
		||||
  uint32_t csum;
 | 
			
		||||
  int offset;
 | 
			
		||||
  
 | 
			
		||||
  NerscStatisics(Umu,header);
 | 
			
		||||
  if ( two_row ) { 
 | 
			
		||||
 | 
			
		||||
  int offset = writeNerscHeader(header,file);
 | 
			
		||||
    header.floating_point = std::string("IEEE64BIG");
 | 
			
		||||
    header.data_type      = std::string("4D_SU3_GAUGE");
 | 
			
		||||
    Nersc3x2unmunger<fobj2D,sobj> munge;
 | 
			
		||||
    BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
 | 
			
		||||
    offset = writeHeader(header,file);
 | 
			
		||||
    csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
 | 
			
		||||
 | 
			
		||||
  writeNerscObject(Umu,NerscSimpleMunger<vobj,vobj>(),offset);
 | 
			
		||||
}
 | 
			
		||||
    std::string file1 = file+"para";
 | 
			
		||||
    int offset1 = writeHeader(header,file1);
 | 
			
		||||
    int csum1=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file1,munge,offset,header.floating_point);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage <<std::hex<< " TESTING PARALLEL WRITE csums   " << csum1 << " "<< csum << std::endl;
 | 
			
		||||
    std::cout << std::dec;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
    assert(offset1==offset);  
 | 
			
		||||
    assert(csum1==csum);  
 | 
			
		||||
 | 
			
		||||
  } else { 
 | 
			
		||||
    header.floating_point = std::string("IEEE64BIG");
 | 
			
		||||
    header.data_type      = std::string("4D_SU3_GAUGE_3X3");
 | 
			
		||||
    NerscSimpleUnmunger<fobj3D,sobj> munge;
 | 
			
		||||
    BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
 | 
			
		||||
    offset = writeHeader(header,file);
 | 
			
		||||
    csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
 | 
			
		||||
 | 
			
		||||
 }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -68,6 +68,22 @@ public:
 | 
			
		||||
    
 | 
			
		||||
    return sumplaq/vol/faces/Nc; // Nd , Nc dependent... FIXME
 | 
			
		||||
  }
 | 
			
		||||
  static RealD linkTrace(const GaugeLorentz &Umu){
 | 
			
		||||
    std::vector<GaugeMat> U(4,Umu._grid);
 | 
			
		||||
 | 
			
		||||
    LatticeComplex Tr(Umu._grid); Tr=zero;
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
      U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
      Tr = Tr+trace(U[mu]);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    TComplex Tp = sum(Tr);
 | 
			
		||||
    Complex p  = TensorRemove(Tp);
 | 
			
		||||
 | 
			
		||||
    double vol = Umu._grid->gSites();
 | 
			
		||||
 | 
			
		||||
    return p.real()/vol/4.0/3.0;
 | 
			
		||||
  };
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // the sum over all staples on each site
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user