2016-01-02 14:51:32 +00:00
|
|
|
/*************************************************************************************
|
|
|
|
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
|
|
|
|
Source file: ./tests/Test_nersc_io.cc
|
|
|
|
|
|
|
|
Copyright (C) 2015
|
|
|
|
|
|
|
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
|
|
*************************************************************************************/
|
|
|
|
/* END LEGAL */
|
2016-07-07 22:31:07 +01:00
|
|
|
#include <Grid/Grid.h>
|
2015-04-22 22:46:48 +01:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
using namespace Grid;
|
|
|
|
using namespace Grid::QCD;
|
|
|
|
|
|
|
|
|
|
|
|
int main (int argc, char ** argv)
|
|
|
|
{
|
|
|
|
Grid_init(&argc,&argv);
|
|
|
|
|
2015-05-11 12:43:10 +01:00
|
|
|
|
2015-06-30 15:01:26 +01:00
|
|
|
std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
|
2015-05-11 18:59:03 +01:00
|
|
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
2017-05-05 16:54:44 +01:00
|
|
|
std::vector<int> latt_size ({16,16,16,16});
|
2015-04-23 20:42:30 +01:00
|
|
|
std::vector<int> clatt_size ({4,4,4,8});
|
2015-04-23 15:13:00 +01:00
|
|
|
int orthodir=3;
|
|
|
|
int orthosz =latt_size[orthodir];
|
2015-04-22 22:46:48 +01:00
|
|
|
|
|
|
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
2015-04-23 20:42:30 +01:00
|
|
|
GridCartesian Coarse(clatt_size,simd_layout,mpi_layout);
|
|
|
|
|
2015-12-19 18:32:25 +00:00
|
|
|
GridParallelRNG pRNGa(&Fine);
|
|
|
|
GridParallelRNG pRNGb(&Fine);
|
|
|
|
GridSerialRNG sRNGa;
|
|
|
|
GridSerialRNG sRNGb;
|
|
|
|
|
2017-05-05 16:54:44 +01:00
|
|
|
pRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
|
|
|
sRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
2015-12-19 18:32:25 +00:00
|
|
|
|
|
|
|
std::string rfile("./ckpoint_rng.4000");
|
|
|
|
NerscIO::writeRNGState(sRNGa,pRNGa,rfile);
|
|
|
|
NerscField rngheader;
|
|
|
|
NerscIO::readRNGState (sRNGb,pRNGb,rngheader,rfile);
|
|
|
|
|
|
|
|
LatticeComplex tmpa(&Fine); random(pRNGa,tmpa);
|
|
|
|
LatticeComplex tmpb(&Fine); random(pRNGb,tmpb);
|
|
|
|
tmpa = tmpa - tmpb;
|
|
|
|
std::cout << " difference between restored randoms and orig "<<norm2( tmpa ) <<" / "<< norm2(tmpb)<<std::endl;
|
|
|
|
|
|
|
|
ComplexD a,b;
|
|
|
|
|
|
|
|
random(sRNGa,a);
|
|
|
|
random(sRNGb,b);
|
|
|
|
std::cout << " serial RNG numbers "<<a<<" "<<b<<std::endl;
|
|
|
|
|
2016-10-20 17:01:59 +01:00
|
|
|
|
2015-04-22 22:46:48 +01:00
|
|
|
LatticeGaugeField Umu(&Fine);
|
2015-06-16 20:23:27 +01:00
|
|
|
LatticeGaugeField Umu_diff(&Fine);
|
|
|
|
LatticeGaugeField Umu_saved(&Fine);
|
2015-04-22 22:46:48 +01:00
|
|
|
|
|
|
|
std::vector<LatticeColourMatrix> U(4,&Fine);
|
|
|
|
|
2017-05-25 13:32:24 +01:00
|
|
|
SU3::HotConfiguration(pRNGa,Umu);
|
2017-05-05 16:54:44 +01:00
|
|
|
|
2015-04-22 22:46:48 +01:00
|
|
|
NerscField header;
|
|
|
|
std::string file("./ckpoint_lat.4000");
|
2017-05-05 16:54:44 +01:00
|
|
|
|
|
|
|
int precision32 = 0;
|
|
|
|
int tworow = 0;
|
|
|
|
NerscIO::writeConfiguration(Umu,file,tworow,precision32);
|
Binary IO file for generic Grid array parallel I/O.
Number of IO MPI tasks can be varied by selecting which
dimensions use parallel IO and which dimensions use Serial send to boss
I/O.
Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes
doing the I/O.
Interpolates nicely between ALL nodes write their data, a single boss per time-plane
in processor space [old UKQCD fortran code did this], and a single node doing all I/O.
Not sure I have the transfer sizes big enough and am not overly convinced fstream
is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero.
Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations
on my MacOS + OpenMPI and Clang environment.
It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from
each node in order to gather bigger chunks at the syscall level.
That would push us up to the circa 8x 18*4*8 == 4KB size write chunk, and by taking, say, x/y non
parallel we get to 16MB contiguous chunks written in multi 4KB transactions
per IOnode in 64^3 lattices for configuration I/O.
I suspect this is fine for system performance.
2015-08-26 13:40:29 +01:00
|
|
|
NerscIO::readConfiguration(Umu,header,file);
|
2015-04-22 22:46:48 +01:00
|
|
|
|
|
|
|
for(int mu=0;mu<Nd;mu++){
|
2015-06-30 15:01:26 +01:00
|
|
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
2015-04-22 22:46:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Painful ; fix syntactical niceness
|
|
|
|
LatticeComplex LinkTrace(&Fine);
|
|
|
|
LinkTrace=zero;
|
|
|
|
for(int mu=0;mu<Nd;mu++){
|
|
|
|
LinkTrace = LinkTrace + trace(U[mu]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// (1+2+3)=6 = N(N-1)/2 terms
|
|
|
|
LatticeComplex Plaq(&Fine);
|
2015-04-23 20:42:30 +01:00
|
|
|
LatticeComplex cPlaq(&Coarse);
|
2015-05-19 21:29:07 +01:00
|
|
|
|
2015-04-22 22:46:48 +01:00
|
|
|
Plaq = zero;
|
2015-05-19 21:29:07 +01:00
|
|
|
#if 1
|
2015-04-22 22:46:48 +01:00
|
|
|
for(int mu=1;mu<Nd;mu++){
|
|
|
|
for(int nu=0;nu<mu;nu++){
|
|
|
|
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
|
|
|
|
}
|
|
|
|
}
|
2015-05-19 21:29:07 +01:00
|
|
|
#endif
|
2015-04-22 22:46:48 +01:00
|
|
|
double vol = Fine.gSites();
|
|
|
|
Complex PlaqScale(1.0/vol/6.0/3.0);
|
2015-07-23 17:31:13 +01:00
|
|
|
std::cout<<GridLogMessage <<"PlaqScale" << PlaqScale<<std::endl;
|
2015-04-23 15:13:00 +01:00
|
|
|
|
|
|
|
std::vector<TComplex> Plaq_T(orthosz);
|
|
|
|
sliceSum(Plaq,Plaq_T,Nd-1);
|
|
|
|
int Nt = Plaq_T.size();
|
|
|
|
|
2015-04-26 15:51:09 +01:00
|
|
|
TComplex Plaq_T_sum;
|
|
|
|
Plaq_T_sum=zero;
|
2015-04-23 15:13:00 +01:00
|
|
|
for(int t=0;t<Nt;t++){
|
|
|
|
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
|
|
|
|
Complex Pt=TensorRemove(Plaq_T[t]);
|
2015-07-23 17:31:13 +01:00
|
|
|
std::cout<<GridLogMessage << "sliced ["<<t<<"]" <<Pt*PlaqScale*Real(Nt)<<std::endl;
|
2015-04-23 15:13:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
Complex Pt = TensorRemove(Plaq_T_sum);
|
2015-07-23 17:31:13 +01:00
|
|
|
std::cout<<GridLogMessage << "total " <<Pt*PlaqScale<<std::endl;
|
2015-04-23 15:13:00 +01:00
|
|
|
}
|
|
|
|
|
2015-06-16 20:23:27 +01:00
|
|
|
|
2015-04-22 22:46:48 +01:00
|
|
|
TComplex Tp = sum(Plaq);
|
|
|
|
Complex p = TensorRemove(Tp);
|
2015-07-23 17:31:13 +01:00
|
|
|
std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl;
|
2015-04-22 22:46:48 +01:00
|
|
|
|
2015-04-23 15:13:00 +01:00
|
|
|
|
2015-04-22 22:46:48 +01:00
|
|
|
Complex LinkTraceScale(1.0/vol/4.0/3.0);
|
|
|
|
TComplex Tl = sum(LinkTrace);
|
|
|
|
Complex l = TensorRemove(Tl);
|
2015-07-23 17:31:13 +01:00
|
|
|
std::cout<<GridLogMessage << "calculated link trace " <<l*LinkTraceScale<<std::endl;
|
2015-04-22 22:46:48 +01:00
|
|
|
|
2015-06-08 12:04:59 +01:00
|
|
|
blockSum(cPlaq,Plaq);
|
2015-04-23 20:42:30 +01:00
|
|
|
TComplex TcP = sum(cPlaq);
|
|
|
|
Complex ll= TensorRemove(TcP);
|
2015-07-23 17:31:13 +01:00
|
|
|
std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
|
Binary IO file for generic Grid array parallel I/O.
Number of IO MPI tasks can be varied by selecting which
dimensions use parallel IO and which dimensions use Serial send to boss
I/O.
Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes
doing the I/O.
Interpolates nicely between ALL nodes write their data, a single boss per time-plane
in processor space [old UKQCD fortran code did this], and a single node doing all I/O.
Not sure I have the transfer sizes big enough and am not overly convinced fstream
is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero.
Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations
on my MacOS + OpenMPI and Clang environment.
It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from
each node in order to gather bigger chunks at the syscall level.
That would push us up to the circa 8x 18*4*8 == 4KB size write chunk, and by taking, say, x/y non
parallel we get to 16MB contiguous chunks written in multi 4KB transactions
per IOnode in 64^3 lattices for configuration I/O.
I suspect this is fine for system performance.
2015-08-26 13:40:29 +01:00
|
|
|
|
|
|
|
std::string clone2x3("./ckpoint_clone2x3.4000");
|
|
|
|
std::string clone3x3("./ckpoint_clone3x3.4000");
|
|
|
|
|
|
|
|
NerscIO::writeConfiguration(Umu,clone3x3,0,precision32);
|
|
|
|
NerscIO::writeConfiguration(Umu,clone2x3,1,precision32);
|
2015-04-23 20:42:30 +01:00
|
|
|
|
2015-04-22 22:46:48 +01:00
|
|
|
Grid_finalize();
|
|
|
|
}
|