1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-15 14:27:06 +01:00

Compare commits

...

80 Commits

Author SHA1 Message Date
9e56c65730 Updated TODO list 2017-06-21 14:02:58 +01:00
ef4f2b8c41 todo update 2017-06-21 09:22:20 +01:00
e8b95bd35b Clean up finished. Could shrink Lanczos to around 400 lines at a push 2017-06-21 02:50:09 +01:00
7e35286860 Simplified lanczos, added Eigen diagonalisation.
Curious if we can deprecate dependencly on BLAS.
Will see when we get 48^3 running on our BG/Q port
2017-06-21 02:26:03 +01:00
0486ff8e79 Improved the lancos 2017-06-20 18:46:01 +01:00
e9cc21900f Block solver complete for staggered. Now stable on mass 0.003 and
gives 8x (!) speed up on Haswell laptop vs. standard CG for 8 RHS solves.

166 iterations vs. 537 iterations so algorithmic gain + 2x in flop rate gain.

Better than a slap in the face with a wet kipper.
2017-06-20 12:37:41 +01:00
0a8faac271 Fix make tests compile 2017-06-19 22:54:18 +01:00
abc4de0fd2 No compile make tests fix 2017-06-19 22:03:03 +01:00
cfe3cd76d1 Block solver improvements 2017-06-19 14:04:21 +01:00
3fa5e3109f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-19 14:01:44 +01:00
8b7049f737 Improved detectino of usqcdInfo for plaq/linktr 2017-06-19 08:46:07 +01:00
c85024683e Merge branch 'feature/parallelio' into develop 2017-06-19 01:39:48 +01:00
1300b0b04b Update to enable multiple records per file more consistent with SciDAC.
open, close, write records...
2017-06-19 01:01:48 +01:00
e6d984b484 ILDG tests 2017-06-18 00:13:22 +01:00
1d18d95d4f Class name return 2017-06-18 00:13:03 +01:00
ae39ec85a3 ComplexField defined 2017-06-18 00:12:48 +01:00
b96daf53a0 Query tensor structures 2017-06-18 00:12:15 +01:00
46879e1658 Complex defined in Impl even for gauge. 2017-06-18 00:11:45 +01:00
ae4de94798 SciDAC I/O support 2017-06-18 00:11:23 +01:00
0ab555b4f5 SciDAC I/O and ILDG improvements 2017-06-18 00:11:02 +01:00
8e9be9f84f Updates for SciDAC IO 2017-06-18 00:10:42 +01:00
d572170170 Update for SciDAC 2017-06-18 00:10:20 +01:00
12ccc73cf5 Serialisation no compile fix 2017-06-14 05:19:17 +01:00
e7564f8330 Starting a test for reading an ILDG file. 2017-06-13 12:22:50 +01:00
91199a8ea0 openmpi is not const safe 2017-06-13 12:21:29 +01:00
0494feec98 Libz dependency 2017-06-13 12:00:23 +01:00
a16b1e134e gcc 4.9 fix 2017-06-13 10:48:43 +01:00
769ad578f5 Odd new error on G++ 49 on travis 2017-06-12 00:41:21 +01:00
eaac0044b5 Compile fixes 2017-06-12 00:20:49 +01:00
56042f002c New files 2017-06-11 23:19:20 +01:00
3bfd1f13e6 I/O improvements 2017-06-11 23:14:10 +01:00
70ab598c96 Move gfix into utils 2017-06-08 22:22:23 +01:00
1d0ca65e28 Move Gfix into utils 2017-06-08 22:21:50 +01:00
2bc4d0a20e Move code into utils 2017-06-08 22:21:25 +01:00
092dcd4e04 MPI I/O only if MPI compiled 2017-06-02 22:50:25 +01:00
4a8c4ccfba Test wilson flow, added maxTau for adaptive flow 2017-06-02 17:02:29 +01:00
9b44189d5a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-02 16:56:00 +01:00
7da4856e8e Wilson flow with adaptive steps 2017-06-02 16:55:53 +01:00
aaf1e33a77 Adding adaptive integration in the WilsonFlow 2017-06-02 16:32:35 +01:00
094c3d091a Improved and RNG's now survive checkpoint 2017-06-02 00:38:58 +01:00
4b98e524a0 Roll over to MPI version of I/O 2017-06-01 17:38:18 -04:00
1a1f6d55f9 Roll over to MPI IO for parallel IO 2017-06-01 17:37:26 -04:00
21421656ab Big changes improving the code to use MPI IO 2017-06-01 17:36:53 -04:00
6f687a67cd As local vols increase, use 64 bits for safety 2017-06-01 17:36:18 -04:00
b30754e762 Merge branch 'feature/parallelio' of https://github.com/paboyle/Grid into feature/parallelio 2017-05-30 23:41:28 +01:00
1e429a0d57 Added MPI version 2017-05-30 23:41:07 +01:00
d38a4de36c Beginning move to MPI IO 2017-05-30 23:40:39 +01:00
ef1b7db374 Diff comparison check 2017-05-30 23:40:11 +01:00
53a9aeb965 Cosmetic only 2017-05-30 23:39:53 +01:00
e30fa9f4b8 RankCount; need to clean up ambigious ProcessCount 2017-05-30 23:39:16 +01:00
58e8d0a10d reverse direction lexico mapping 2017-05-30 23:38:30 +01:00
62cf9cf638 Cleaner code 2017-05-30 23:38:02 +01:00
0fb458879d Precision safe compile 2017-05-30 23:37:02 +01:00
725c513d94 Better MPI3 benchmarking 2017-05-29 16:47:32 -04:00
d8648307ff Merge branch 'develop' into feature/hadrons 2017-05-29 12:58:08 +01:00
064315c00b Hadrons: mesons gamma list fix 2017-05-29 12:57:33 +01:00
7c6cc85df6 Updating WilsonFlow test 2017-05-27 18:03:49 +01:00
a6691ef87c Merge pull request #110 from Lanny91/feature/hadrons
Hadrons: Fermion boundary conditions can now be set in measurement code.
2017-05-26 16:43:22 +01:00
8e0ced627a Hadrons: Fermion boundary conditions can now be set in measurement code. 2017-05-26 15:59:15 +01:00
0de314870d Faster derivative for WilsonGauge 2017-05-26 14:31:49 +01:00
ffb91e53d2 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-26 12:46:02 +01:00
f4e8bf2858 Fixing the topological charge. Wilson Flow tested, ok 2017-05-26 12:45:59 +01:00
a74c34315c Bootstrap script fix 2017-05-25 14:27:49 +01:00
69470ccc10 Update to do list 2017-05-25 13:41:26 +01:00
b8b5934193 Attempts to speed up the parallel IO 2017-05-25 13:32:24 +01:00
75856f2945 Compilation fix in the Tensor_exp 2017-05-25 12:44:56 +01:00
3c112a7a25 Small correction to the general exp definition 2017-05-25 12:09:00 +01:00
ab3596d4d3 Using Cayley-Hamilton form for the exponential of SU(3) matrices 2017-05-25 12:07:47 +01:00
a8c10b1933 Use a global-X x Local-Y chunksize for parallel binary I/O.
Gives O(32 x 8 x 18*8*8) chunk size on configuration I/O.

At 150KB should be getting close to packet sizes and 4MB filesystem
block sizes that are reasonably (!?) performant. We shall see once I move
this off my laptop and over to BNL and time it.
2017-05-25 11:43:33 +01:00
15e801af3f Fixing a compilation error for generic SIMD 2017-05-19 16:39:36 +01:00
0ffc235741 Adding more statistics to the Benchmark_comms. Min and max 2017-05-19 10:55:04 +01:00
8e19c99c7d Adding more statistical info in the Benchmark_comms 2017-05-18 19:07:35 +01:00
a0bc0ad06f Reverting change in Bechmark_comms. Keeping 300 iterations 2017-05-18 17:48:11 +01:00
a8fb2835ca Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-18 14:45:00 +01:00
bc862ce3ab Fixing an allocation issue in Benchmark_comms 2017-05-18 14:44:56 +01:00
3267683e22 Union workaround for g++ 2017-05-17 11:26:18 +01:00
f46a67ffb3 No compile issue on clang on mac fixed.
Compiler version was clang++-3.9 under mpicxx
2017-05-17 10:51:01 +01:00
f7b8383ef5 Half precisoin comms mixed prec test 2017-05-16 14:52:51 +01:00
10f2872aae Faster exponentiation for lattice fields 2017-05-15 15:51:16 +01:00
cd73897b8d Merge branch 'release/v0.7.0' into develop 2017-05-12 01:16:02 +01:00
87 changed files with 4118 additions and 3900 deletions

31
TODO
View File

@ -1,23 +1,30 @@
TODO: TODO:
--------------- ---------------
Peter's work list: Large item work list:
2)- Precision conversion and sort out localConvert <-- 1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O
3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started
4)- Binary I/O speed up & x-strips 2)- Christoph's local basis expansion Lanczos
-- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet 3)- BG/Q port and check
-- Physical propagator interface 4)- Precision conversion and sort out localConvert <-- partial
-- Conserved currents - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
-- GaugeFix into central location 5)- Physical propagator interface
-- Multigrid Wilson and DWF, compare to other Multigrid implementations 6)- Conserved currents
-- HDCR resume 7)- Multigrid Wilson and DWF, compare to other Multigrid implementations
8)- HDCR resume
Recent DONE Recent DONE
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
-- GaugeFix into central location <-- DONE
-- Scidac and Ildg metadata handling <-- DONE
-- Binary I/O MPI2 IO <-- DONE
-- Binary I/O speed up & x-strips <-- DONE
-- Cut down the exterior overhead <-- DONE -- Cut down the exterior overhead <-- DONE
-- Interior legs from SHM comms <-- DONE -- Interior legs from SHM comms <-- DONE
-- Half-precision comms <-- DONE -- Half-precision comms <-- DONE
-- Merge high precision reduction into develop -- Merge high precision reduction into develop <-- DONE
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination -- BlockCG, BCGrQ <-- DONE
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
-- slice* linalg routines for multiRHS, BlockCG -- slice* linalg routines for multiRHS, BlockCG
----- -----

View File

@ -31,6 +31,32 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
struct time_statistics{
double mean;
double err;
double min;
double max;
void statistics(std::vector<double> v){
double sum = std::accumulate(v.begin(), v.end(), 0.0);
mean = sum / v.size();
std::vector<double> diff(v.size());
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
auto result = std::minmax_element(v.begin(), v.end());
min = *result.first;
max = *result.second;
}
};
void header(){
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -40,15 +66,19 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
int Nloop=10; int Nloop=100;
int nmu=0; int nmu=0;
int maxlat=24;
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
std::vector<double> t_time(Nloop);
time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
int maxlat=24;
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -58,6 +88,9 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]}); lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
@ -65,8 +98,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
@ -102,18 +135,24 @@ int main (int argc, char ** argv)
} }
Grid.SendToRecvFromComplete(requests); Grid.SendToRecvFromComplete(requests);
Grid.Barrier(); Grid.Barrier();
}
double stop=usecond(); double stop=usecond();
t_time[i] = stop-start; // microseconds
}
double dbytes = bytes; timestat.statistics(t_time);
double xbytes = Nloop*dbytes*2.0*ncomm;
double dbytes = bytes*ppn;
double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
@ -121,8 +160,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -130,6 +168,9 @@ int main (int argc, char ** argv)
std::vector<int> latt_size ({lat,lat,lat,lat}); std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
@ -138,8 +179,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -178,27 +219,34 @@ int main (int argc, char ** argv)
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond(); timestat.statistics(t_time);
double dbytes = bytes; double dbytes = bytes*ppn;
double xbytes = Nloop*dbytes*2.0*ncomm; double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
double time = stop-start; std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
Nloop=10;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -209,6 +257,9 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]}); lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8); std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8); std::vector<HalfSpinColourVectorD *> rbuf(8);
@ -216,27 +267,33 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){ for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond(); double dbytes;
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
dbytes=0;
ncomm=0;
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) { if (mpi_layout[mu]>1 ) {
ncomm++; ncomm++;
int comm_proc=1; int comm_proc=1;
int xmit_to_rank; int xmit_to_rank;
int recv_from_rank; int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests, Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0], (void *)&xbuf[mu][0],
xmit_to_rank, xmit_to_rank,
@ -247,6 +304,7 @@ int main (int argc, char ** argv)
comm_proc = mpi_layout[mu]-1; comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests, Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0], (void *)&xbuf[mu+4][0],
xmit_to_rank, xmit_to_rank,
@ -258,28 +316,34 @@ int main (int argc, char ** argv)
} }
Grid.StencilSendToRecvFromComplete(requests); Grid.StencilSendToRecvFromComplete(requests);
Grid.Barrier(); Grid.Barrier();
}
double stop=usecond(); double stop=usecond();
t_time[i] = stop-start; // microseconds
double dbytes = bytes; }
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes; timestat.statistics(t_time);
double bidibytes = xbytes+rbytes;
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
double time = stop-start; // microseconds
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -290,6 +354,9 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]}); lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8); std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8); std::vector<HalfSpinColourVectorD *> rbuf(8);
@ -297,16 +364,18 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){ for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double dbytes;
double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
dbytes=0;
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -318,6 +387,7 @@ int main (int argc, char ** argv)
int recv_from_rank; int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests, Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0], (void *)&xbuf[mu][0],
xmit_to_rank, xmit_to_rank,
@ -330,6 +400,7 @@ int main (int argc, char ** argv)
comm_proc = mpi_layout[mu]-1; comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests, Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0], (void *)&xbuf[mu+4][0],
xmit_to_rank, xmit_to_rank,
@ -342,18 +413,26 @@ int main (int argc, char ** argv)
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
double dbytes = bytes; timestat.statistics(t_time);
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }

View File

@ -55,8 +55,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=44; uint64_t lmax=64;
#define NLOOP (1*lmax*lmax*lmax*lmax/vol) #define NLOOP (100*lmax*lmax*lmax*lmax/vol)
for(int lat=4;lat<=lmax;lat+=4){ for(int lat=4;lat<=lmax;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});

View File

@ -35,9 +35,9 @@ using namespace Grid::QCD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
#define LMAX (32) #define LMAX (64)
int Nloop=200; int Nloop=20;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_layout = GridDefaultMpi();

View File

@ -1,4 +1,4 @@
]#!/usr/bin/env bash #!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'

View File

@ -27,7 +27,7 @@ AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code]) [version of g++ that will compile the code])
CXXFLAGS="-O3 $CXXFLAGS" CXXFLAGS="-g $CXXFLAGS"
############### Checks for typedefs, structures, and compiler characteristics ############### Checks for typedefs, structures, and compiler characteristics
@ -184,6 +184,10 @@ AC_SEARCH_LIBS([limeCreateReader], [lime],
In order to use ILGG file format please install or provide the correct path to your installation In order to use ILGG file format please install or provide the correct path to your installation
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
AC_SEARCH_LIBS([crc32], [z],
[AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])]
[have_zlib=true],
[AC_MSG_ERROR(zlib library was not found in your system.)])
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]

View File

@ -48,7 +48,8 @@ public:
std::string, gauge, std::string, gauge,
unsigned int, Ls, unsigned int, Ls,
double , mass, double , mass,
double , M5); double , M5,
std::string , boundary);
}; };
template <typename FImpl> template <typename FImpl>
@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void)
<< par().mass << ", M5= " << par().M5 << " and Ls= " << par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'" << par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl; << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
env().createGrid(par().Ls); env().createGrid(par().Ls);
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &g4 = *env().getGrid(); auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid(); auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls); auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls); auto &grb5 = *env().getRbGrid(par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
par().mass, par().M5); par().mass, par().M5,
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }

View File

@ -46,7 +46,8 @@ class WilsonPar: Serializable
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
std::string, gauge, std::string, gauge,
double , mass); double , mass,
std::string, boundary);
}; };
template <typename FImpl> template <typename FImpl>
@ -112,10 +113,15 @@ void TWilson<FImpl>::execute()
{ {
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl; << " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid(); auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid(); auto &gridRb = *env().getRbGrid();
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }

View File

@ -131,12 +131,11 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
{ {
gammaList.clear();
// Determine gamma matrices to insert at source/sink. // Determine gamma matrices to insert at source/sink.
if (par().gammas.compare("all") == 0) if (par().gammas.compare("all") == 0)
{ {
// Do all contractions. // Do all contractions.
unsigned int n_gam = Ns * Ns;
gammaList.resize(n_gam*n_gam);
for (unsigned int i = 1; i < Gamma::nGamma; i += 2) for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
{ {
for (unsigned int j = 1; j < Gamma::nGamma; j += 2) for (unsigned int j = 1; j < Gamma::nGamma; j += 2)

View File

@ -65,7 +65,7 @@ void TLoad::setup(void)
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
void TLoad::execute(void) void TLoad::execute(void)
{ {
NerscField header; FieldMetaData header;
std::string fileName = par().file + "." std::string fileName = par().file + "."
+ std::to_string(env().getTrajectory()); + std::to_string(env().getTrajectory());
@ -74,5 +74,5 @@ void TLoad::execute(void)
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
NerscIO::readConfiguration(U, header, fileName); NerscIO::readConfiguration(U, header, fileName);
LOG(Message) << "NERSC header:" << std::endl; LOG(Message) << "NERSC header:" << std::endl;
dump_nersc_header(header, LOG(Message)); dump_meta_data(header, LOG(Message));
} }

View File

@ -41,7 +41,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h> #include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/Action.h> #include <Grid/qcd/action/Action.h>
#include <Grid/qcd/utils/GaugeFix.h>
#include <Grid/qcd/smearing/Smearing.h> #include <Grid/qcd/smearing/Smearing.h>
#include <Grid/parallelIO/MetaData.h>
#include <Grid/qcd/hmc/HMC_aggregate.h> #include <Grid/qcd/hmc/HMC_aggregate.h>
#endif #endif

View File

@ -7,6 +7,7 @@
#include <cassert> #include <cassert>
#include <complex> #include <complex>
#include <vector> #include <vector>
#include <string>
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <random> #include <random>
@ -18,6 +19,7 @@
#include <ctime> #include <ctime>
#include <sys/time.h> #include <sys/time.h>
#include <chrono> #include <chrono>
#include <zlib.h>
/////////////////// ///////////////////
// Grid config // Grid config

View File

@ -1,137 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/DenseMatrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_DENSE_MATRIX_H
#define GRID_DENSE_MATRIX_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Matrix untils
/////////////////////////////////////////////////////////////
template<class T> using DenseVector = std::vector<T>;
template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
template<class T> void Size(DenseVector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void Resize(DenseVector<T > & mat, int N) {
mat.resize(N);
}
template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Fill(DenseMatrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
int N,M;
Size(mat,N,M);
DenseMatrix<T> C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set DenseMatrix to unit matrix **/
template<class T> void Unity(DenseMatrix<T> &A){
int N; SizeSquare(A,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(DenseMatrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
template<class T>
DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
int dim; SizeSquare(mat,dim);
DenseMatrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/**Get a square submatrix**/
template <class T>
DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
}
#include "Householder.h"
#include "Francis.h"
#endif

View File

@ -1,525 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Francis.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef FRANCIS_H
#define FRANCIS_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
//#include <timer.h>
//#include <lapacke.h>
//#include <Eigen/Dense>
namespace Grid {
template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
template <class T> int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
/**
Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
H =
x x x x x x x x x
x x x x x x x x x
0 x x x x x x x x
0 0 x x x x x x x
0 0 0 x x x x x x
0 0 0 0 x x x x x
0 0 0 0 0 x x x x
0 0 0 0 0 0 x x x
0 0 0 0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
**/
template <class T>
int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
DenseMatrix<T> H = Hin;
int N ; SizeSquare(H,N);
int M = N;
Fill(evals,0);
Fill(evecs,0);
T s,t,x=0,y=0,z=0;
T u,d;
T apd,amd,bc;
DenseVector<T> p(N,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N); Unity(P);
DenseVector<int> trows(N,0);
/// Check if the matrix is really hessenberg, if not abort
RealD sth = 0;
for(int j=0;j<N;j++){
for(int i=j+2;i<N;i++){
sth = abs(H[i][j]);
if(sth > small){
std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
exit(1);
}
}
}
do{
std::cout << "Francis QR Step N = " << N << std::endl;
/** Check for convergence
x x x x x
0 x x x x
0 0 x x x
0 0 x x x
0 0 0 0 x
for this matrix l = 4
**/
do{
l = Chop_subdiag(H,nrm,e,small);
r = 0; ///May have converged on more than one eval
///Single eval
if(l == N-1){
evals[e] = H[l][l];
N--; e++; r++; it = 0;
}
///RealD eval
if(l == N-2){
trows[l+1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l+1][l+1];
amd = H[l][l] - H[l+1][l+1];
bc = (T)4.0*H[l+1][l]*H[l][l+1];
evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) );
evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
N-=2; e+=2; r++; it = 0;
}
} while(r>0);
if(N ==0) break;
DenseVector<T > ck; Resize(ck,3);
DenseVector<T> v; Resize(v,3);
for(int m = N-3; m >= l; m--){
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0){
s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
///Starting vector implicit Q theorem
else{
s = (H[N-2][N-2] + H[N-1][N-1]);
t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
ck[0] = x; ck[1] = y; ck[2] = z;
if(m == l) break;
/** Some stupid thing from numerical recipies, seems to work**/
// PAB.. for heaven's sake quote page, purpose, evidence it works.
// what sort of comment is that!?!?!?
u=abs(H[m][m-1])*(abs(y)+abs(z));
d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
if ((T)abs(u+d) == (T)abs(d) ){
l = m; break;
}
//if (u < small){l = m; break;}
}
if(it > 100000){
std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, 2, v, beta);
Householder_mult<T >(H,v,beta,0,l,l+2,0);
Householder_mult<T >(H,v,beta,0,l,l+2,1);
///Accumulate eigenvector
Householder_mult<T >(P,v,beta,0,l,l+2,1);
int sw = 0; ///Are we on the last row?
for(int k=l;k<N-2;k++){
x = H[k+1][k];
y = H[k+2][k];
z = (T)0.0;
if(k+3 <= N-1){
z = H[k+3][k];
} else{
sw = 1;
v[2] = (T)0.0;
}
ck[0] = x; ck[1] = y; ck[2] = z;
normalize(ck);
Householder_vector<T >(ck, 0, 2-sw, v, beta);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
///Accumulate eigenvector
Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp; Resize(tmp,N);
for(int i=0;i<N;i++){
tmp[i] = evals[N-i-1];
}
evals = tmp;
UTeigenvectors(H, trows, evals, evecs);
for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
return tot_it;
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
/**
Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
H =
x x 0 0 0 0
x x x 0 0 0
0 x x x 0 0
0 0 x x x 0
0 0 0 x x x
0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/
return my_Wilkinson(Hin, evals, evecs, small, small);
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
{
int N; SizeSquare(Hin,N);
int M = N;
///I don't want to modify the input but matricies must be passed by reference
//Scale a matrix by its "norm"
//RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm);
DenseMatrix<T> H; H = Hin;
RealD Hnorm = abs(Norm(Hin));
H = H * (1.0 / Hnorm);
// TODO use openmp and memset
Fill(evals,0);
Fill(evecs,0);
T s, t, x = 0, y = 0, z = 0;
T u, d;
T apd, amd, bc;
DenseVector<T> p; Resize(p,N); Fill(p,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N);
Unity(P);
DenseVector<int> trows(N, 0);
/// Check if the matrix is really symm tridiag
RealD sth = 0;
for(int j = 0; j < N; ++j)
{
for(int i = j + 2; i < N; ++i)
{
if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
{
std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
std::cout << "Warning tridiagonalize and call again" << std::endl;
// exit(1); // see what is going on
//return;
}
}
}
do{
do{
//Jasper
//Check if the subdiagonal term is small enough (<small)
//if true then it is converged.
//check start from H.dim - e - 1
//How to deal with more than 2 are converged?
//What if Chop_symm_subdiag return something int the middle?
//--------------
l = Chop_symm_subdiag(H,nrm, e, small);
r = 0; ///May have converged on more than one eval
//Jasper
//In this case
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x x 0
// 0 0 0 x x 0
// 0 0 0 0 0 x <- l
//--------------
///Single eval
if(l == N - 1)
{
evals[e] = H[l][l];
N--;
e++;
r++;
it = 0;
}
//Jasper
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x 0 0
// 0 0 0 0 x x <- l
// 0 0 0 0 x x
//--------------
///RealD eval
if(l == N - 2)
{
trows[l + 1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l + 1][ l + 1];
amd = H[l][l] - H[l + 1][l + 1];
bc = (T) 4.0 * H[l + 1][l] * H[l][l + 1];
evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
N -= 2;
e += 2;
r++;
it = 0;
}
}while(r > 0);
//Jasper
//Already converged
//--------------
if(N == 0) break;
DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
for(int m = N - 3; m >= l; m--)
{
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0)
{
t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
x = H[m][m] - t;
z = H[m + 1][m];
} else {
///Starting vector implicit Q theorem
d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]
/ (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
x = H[m][m] - t;
z = H[m + 1][m];
}
//Jasper
//why it is here????
//-----------------------
if(m == l)
break;
u = abs(H[m][m - 1]) * (abs(y) + abs(z));
d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
if ((T)abs(u + d) == (T)abs(d))
{
l = m;
break;
}
}
//Jasper
if(it > 1000000)
{
std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
//
T s, c;
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, l, l + 1, c, -s, 0);
Givens_mult<T>(H, l, l + 1, c, s, 1);
Givens_mult<T>(P, l, l + 1, c, s, 1);
//
for(int k = l; k < N - 2; ++k)
{
x = H.A[k + 1][k];
z = H.A[k + 2][k];
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
Givens_mult<T>(H, k + 1, k + 2, c, s, 1);
Givens_mult<T>(P, k + 1, k + 2, c, s, 1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp(N);
for(int i = 0; i < N; ++i)
tmp[i] = evals[N-i-1];
evals = tmp;
//
UTeigenvectors(H, trows, evals, evecs);
//UTSymmEigenvectors(H, trows, evals, evecs);
for(int i = 0; i < evals.size(); ++i)
{
evecs[i] = P * evecs[i];
normalize(evecs[i]);
evals[i] = evals[i] * Hnorm;
}
// // FIXME this is to test
// Hin.write("evecs3", evecs);
// Hin.write("evals3", evals);
// // check rsd
// for(int i = 0; i < M; i++) {
// vector<T> Aevec = Hin * evecs[i];
// RealD norm2(0.);
// for(int j = 0; j < M; j++) {
// norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
// }
// }
return tot_it;
}
template <class T>
void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
/**
turn a matrix A =
x x x x x
x x x x x
x x x x x
x x x x x
x x x x x
into
x x x x x
x x x x x
0 x x x x
0 0 x x x
0 0 0 x x
with householder rotations
Slow.
*/
int N ; SizeSquare(A,N);
DenseVector<T > p; Resize(p,N); Fill(p,0);
for(int k=start;k<N-2;k++){
//cerr << "hess" << k << std::endl;
DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);} ///kth column
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, ck.size()-1, v, beta); ///Householder vector
Householder_mult<T>(A,v,beta,start,k+1,N-1,0); ///A -> PA
Householder_mult<T >(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H
///Accumulate eigenvector
Householder_mult<T >(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H
}
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,k,l);
}
}*/
}
template <class T>
void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
///Tridiagonalize a matrix
int N; SizeSquare(A,N);
Hess(A,Q,start);
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,l,k);
}
}*/
}
template <class T>
void ForceTridiagonal(DenseMatrix<T> &A){
///Tridiagonalize a matrix
int N ; SizeSquare(A,N);
for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A[l][k]=0;
A[k][l]=0;
}
}
}
template <class T>
int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a symmetric eigensystem, not necessarily in tridiagonal form
int N; SizeSquare(Ain,N);
DenseMatrix<T > A; A = Ain;
DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
Tri(A,Q,0);
int it = my_Wilkinson<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
template <class T>
int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_Wilkinson(Ain, evals, evecs, small);
}
template <class T>
int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_SymmEigensystem(Ain, evals, evecs, small);
}
template <class T>
int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a general eigensystem, not necessarily in tridiagonal form
int N = Ain.dim;
DenseMatrix<T > A(N); A = Ain;
DenseMatrix<T > Q(N);Q.Unity();
Hess(A,Q,0);
int it = QReigensystem<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
}
#endif

View File

@ -1,242 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Householder.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef HOUSEHOLDER_H
#define HOUSEHOLDER_H
#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
namespace Grid {
/** Comparison function for finding the max element in a vector **/
template <class T> bool cf(T i, T j) {
return abs(i) < abs(j);
}
/**
Calculate a real Givens angle
**/
template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
RealD mz = (RealD)abs(z);
if(mz==0.0){
c = 1; s = 0;
}
if(mz >= (RealD)abs(y)){
T t = -y/z;
s = (T)1.0 / sqrt ((T)1.0 + t * t);
c = s * t;
} else {
T t = -z/y;
c = (T)1.0 / sqrt ((T)1.0 + t * t);
s = c * t;
}
}
template <class T> inline void Givens_mult(DenseMatrix<T> &A, int i, int k, T c, T s, int dir)
{
int q ; SizeSquare(A,q);
if(dir == 0){
for(int j=0;j<q;j++){
T nu = A[i][j];
T w = A[k][j];
A[i][j] = (c*nu + s*w);
A[k][j] = (-s*nu + c*w);
}
}
if(dir == 1){
for(int j=0;j<q;j++){
T nu = A[j][i];
T w = A[j][k];
A[j][i] = (c*nu - s*w);
A[j][k] = (s*nu + c*w);
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
P | x | | x | k = 0
| x | | 0 |
| x | = | 0 |
| x | | 0 | j = 3
| x | | x |
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
{
int N ; Size(input,N);
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
if(abs(v[k]) > 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
else v[k] = -alpha;
} else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
Px = alpha*e_dir
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
{
int N = input.size();
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
else v[dir] = -alpha;
}else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
**/
template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
{
int N ; SizeSquare(A,N);
if(abs(beta) > 0.0){
for(int p=l; p<N; p++){
T s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
s *= beta;
for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
} else {
for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
s *= beta;
for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
}
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
A is tridiagonal
**/
template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
{
if(abs(beta) > 0.0){
int N ; SizeSquare(A,N);
DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);
T s;
for(int p=l; p<M; p++){
s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
}else{
for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
}
s = beta*s;
if(trans==0){
for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
}else{
for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
}
}
for(int p=l; p<M; p++){
if(trans==0){
for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
}else{
for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
}
}
}
}
}
#endif

View File

@ -33,6 +33,8 @@ directory
namespace Grid { namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction // Block conjugate gradient. Dimension zero should be the block direction
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -40,25 +42,273 @@ template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> { class BlockConjugateGradient : public OperatorFunction<Field> {
public: public:
typedef typename Field::scalar_type scomplex; typedef typename Field::scalar_type scomplex;
const int blockDim = 0; int blockDim ;
int Nblock; int Nblock;
BlockCGtype CGtype;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true. // Defaults true.
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
MaxIterations(maxit), {};
ErrorOnNoConverge(err_on_no_conv){};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it)
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
//
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
//
// Q C = R => Q = R C^{-1}
//
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
//
// Set C = L^{dag}, and then Q^dag Q = ident
//
// Checks:
// Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes
////////////////////////////////////////////////////////////////////////////////////////////////////
sliceInnerProductMatrix(m_rr,R,R,Orthog);
////////////////////////////////////////////////////////////////////////////////////////////////////
// Cholesky from Eigen
// There exists a ldlt that is documented as more stable
////////////////////////////////////////////////////////////////////////////////////////////////////
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
////////////////////////////////////////////////////////////////////////////////////////////////////
// Q = R C^{-1}
//
// Q_j = R_i Cinv(i,j)
//
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
////////////////////////////////////////////////////////////////////////////////////////////////////
// FIXME:: make a sliceMulMatrix to avoid zero vector
sliceMulMatrix(Q,Cinv,R,Orthog);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations
////////////////////////////////////////////////////////////////////////////////////////////////////
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{ {
int Orthog = 0; // First dimension is block dim if ( CGtype == BlockCGrQ ) {
BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == BlockCG ) {
BlockCGsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi);
} else {
assert(0);
}
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard;
conformable(X, B);
Field tmp(B);
Field Q(B);
Field D(B);
Field Z(B);
Field AD(B);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,B,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,B,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,X,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD);
tmp = B - AD;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
Linop.HermOp(D, Z);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(X, AD);
AD = AD-B;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = Src._grid->_fdimensions[Orthog]; Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
@ -162,8 +412,9 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
********************* *********************
*/ */
RealD max_resid=0; RealD max_resid=0;
RealD rr;
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
RealD rr = real(m_rr(b,b))/ssq[b]; rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr; if ( rr > max_resid ) max_resid = rr;
} }
@ -173,7 +424,8 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
} }
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
@ -197,35 +449,13 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k; IterationsToComplete = k;
} }
};
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction // multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
template <class Field> void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
class MultiRHSConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
const int blockDim = 0;
int Nblock;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{ {
int Orthog = 0; // First dimension is block dim int Orthog = blockDim; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog]; Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
@ -285,12 +515,10 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
MatrixTimer.Stop(); MatrixTimer.Stop();
// Alpha // Alpha
// sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog);
sliceInnerTimer.Start(); sliceInnerTimer.Start();
sliceInnerProductVector(v_pAp,P,AP,Orthog); sliceInnerProductVector(v_pAp,P,AP,Orthog);
sliceInnerTimer.Stop(); sliceInnerTimer.Stop();
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
// std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl;
v_alpha[b] = v_rr[b]/real(v_pAp[b]); v_alpha[b] = v_rr[b]/real(v_pAp[b]);
} }
@ -332,7 +560,7 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
} }
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
@ -358,9 +586,8 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k; IterationsToComplete = k;
} }
}; };
} }
#endif #endif

View File

@ -1,81 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/EigenSort.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_EIGENSORT_H
#define GRID_EIGENSORT_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Eigen sorter to begin with
/////////////////////////////////////////////////////////////
template<class Field>
class SortEigen {
private:
//hacking for testing for now
private:
static bool less_lmd(RealD left,RealD right){
return left > right;
}
static bool less_pair(std::pair<RealD,Field const*>& left,
std::pair<RealD,Field const*>& right){
return left.first > (right.first);
}
public:
void push(DenseVector<RealD>& lmd,
DenseVector<Field>& evec,int N) {
DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());
for(int i=0;i<lmd.size();++i)
emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
for(int i=0;i<N;++i){
lmd[i]=it->first;
evec[i]=*(it->second);
++it;
}
}
void push(DenseVector<RealD>& lmd,int N) {
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
}
bool saturated(RealD lmd, RealD thrs) {
return fabs(lmd) > fabs(thrs);
}
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -50,7 +50,6 @@ public:
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
// Physics Grid information. // Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
@ -67,9 +66,8 @@ public:
std::vector<int> _slice_stride; std::vector<int> _slice_stride;
std::vector<int> _slice_nblock; std::vector<int> _slice_nblock;
// Might need these at some point std::vector<int> _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
// std::vector<int> _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] std::vector<int> _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
// std::vector<int> _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
public: public:
@ -176,6 +174,7 @@ public:
inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
inline int Nd (void) const { return _ndimension;}; inline int Nd (void) const { return _ndimension;};
inline const std::vector<int> LocalStarts(void) { return _lstart; };
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;}; inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;}; inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;}; inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};

View File

@ -76,6 +76,8 @@ public:
_ldimensions.resize(_ndimension); _ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension); _rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension); _simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension); _ostride.resize(_ndimension);
_istride.resize(_ndimension); _istride.resize(_ndimension);
@ -94,6 +96,8 @@ public:
// Use a reduced simd grid // Use a reduced simd grid
_ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions _ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions
_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition
_lstart[d] = _processor_coor[d]*_ldimensions[d];
_lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1;
_osites *= _rdimensions[d]; _osites *= _rdimensions[d];
_isites *= _simd_layout[d]; _isites *= _simd_layout[d];

View File

@ -151,6 +151,8 @@ public:
_ldimensions.resize(_ndimension); _ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension); _rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension); _simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension); _ostride.resize(_ndimension);
_istride.resize(_ndimension); _istride.resize(_ndimension);
@ -169,6 +171,8 @@ public:
_gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard
} }
_ldimensions[d] = _gdimensions[d]/_processors[d]; _ldimensions[d] = _gdimensions[d]/_processors[d];
_lstart[d] = _processor_coor[d]*_ldimensions[d];
_lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1;
// Use a reduced simd grid // Use a reduced simd grid
_simd_layout[d] = simd_layout[d]; _simd_layout[d] = simd_layout[d];

View File

@ -60,6 +60,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) {
///////////////////////////////// /////////////////////////////////
// Grid information queries // Grid information queries
///////////////////////////////// /////////////////////////////////
int CartesianCommunicator::Dimensions(void) { return _ndimension; };
int CartesianCommunicator::IsBoss(void) { return _processor==0; }; int CartesianCommunicator::IsBoss(void) { return _processor==0; };
int CartesianCommunicator::BossRank(void) { return 0; }; int CartesianCommunicator::BossRank(void) { return 0; };
int CartesianCommunicator::ThisRank(void) { return _processor; }; int CartesianCommunicator::ThisRank(void) { return _processor; };
@ -91,6 +92,7 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,

View File

@ -148,6 +148,7 @@ class CartesianCommunicator {
int RankFromProcessorCoor(std::vector<int> &coor); int RankFromProcessorCoor(std::vector<int> &coor);
void ProcessorCoorFromRank(int rank,std::vector<int> &coor); void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
int Dimensions(void) ;
int IsBoss(void) ; int IsBoss(void) ;
int BossRank(void) ; int BossRank(void) ;
int ThisRank(void) ; int ThisRank(void) ;
@ -155,6 +156,7 @@ class CartesianCommunicator {
const std::vector<int> & ProcessorGrid(void) ; const std::vector<int> & ProcessorGrid(void) ;
int ProcessorCount(void) ; int ProcessorCount(void) ;
int NodeCount(void) ; int NodeCount(void) ;
int RankCount(void) ;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// very VERY rarely (Log, serial RNG) we need world without a grid // very VERY rarely (Log, serial RNG) we need world without a grid
@ -175,6 +177,8 @@ class CartesianCommunicator {
void GlobalSumVector(ComplexF *c,int N); void GlobalSumVector(ComplexF *c,int N);
void GlobalSum(ComplexD &c); void GlobalSum(ComplexD &c);
void GlobalSumVector(ComplexD *c,int N); void GlobalSumVector(ComplexD *c,int N);
void GlobalXOR(uint32_t &);
void GlobalXOR(uint64_t &);
template<class obj> void GlobalSum(obj &o){ template<class obj> void GlobalSum(obj &o){
typedef typename obj::scalar_type scalar_type; typedef typename obj::scalar_type scalar_type;

View File

@ -83,6 +83,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){ void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);

View File

@ -65,6 +65,7 @@ std::vector<int> CartesianCommunicator::MyGroup;
std::vector<void *> CartesianCommunicator::ShmCommBufs; std::vector<void *> CartesianCommunicator::ShmCommBufs;
int CartesianCommunicator::NodeCount(void) { return GroupSize;}; int CartesianCommunicator::NodeCount(void) { return GroupSize;};
int CartesianCommunicator::RankCount(void) { return WorldSize;};
#undef FORCE_COMMS #undef FORCE_COMMS
@ -509,6 +510,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){ void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);

View File

@ -59,6 +59,8 @@ void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSum(uint64_t &){} void CartesianCommunicator::GlobalSum(uint64_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){} void CartesianCommunicator::GlobalSumVector(double *,int N){}
void CartesianCommunicator::GlobalXOR(uint32_t &){}
void CartesianCommunicator::GlobalXOR(uint64_t &){}
void CartesianCommunicator::SendRecvPacket(void *xmit, void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv, void *recv,

View File

@ -328,6 +328,8 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced tensor_reduced; typedef typename vobj::tensor_reduced tensor_reduced;
scalar_type zscale(scale);
GridBase *grid = X._grid; GridBase *grid = X._grid;
int Nsimd =grid->Nsimd(); int Nsimd =grid->Nsimd();
@ -353,7 +355,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
grid->iCoorFromIindex(icoor,l); grid->iCoorFromIindex(icoor,l);
int ldx =r+icoor[orthogdim]*rd; int ldx =r+icoor[orthogdim]*rd;
scalar_type *as =(scalar_type *)&av; scalar_type *as =(scalar_type *)&av;
as[l] = scalar_type(a[ldx])*scale; as[l] = scalar_type(a[ldx])*zscale;
} }
tensor_reduced at; at=av; tensor_reduced at; at=av;
@ -367,71 +369,6 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
} }
}; };
/*
template<class vobj>
static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
int Orthog,RealD scale=1.0)
{
// FIXME: Implementation is slow
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
// If we based this on Cshift it would work for spread out
// but it would be even slower
for(int i=0;i<Nblock;i++){
ExtractSlice(Rslice,Y,i,Orthog);
ExtractSlice(Xslice,X,i,Orthog);
Rslice = Rslice + Xslice*(scale*a[i]);
InsertSlice(Rslice,R,i,Orthog);
}
};
template<class vobj>
static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{
// FIXME: Implementation is slow
// Look at localInnerProduct implementation,
// and do inside a site loop with block strided iterators
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced scalar;
typedef typename scalar::scalar_object scomplex;
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
vec.resize(Nblock);
std::vector<scomplex> sip(Nblock);
Lattice<scalar> IP(lhs._grid);
IP=localInnerProduct(lhs,rhs);
sliceSum(IP,sip,Orthog);
for(int ss=0;ss<Nblock;ss++){
vec[ss] = TensorRemove(sip[ss]);
}
}
*/
//////////////////////////////////////////////////////////////////////////////////////////
// FIXME: Implementation is slow
// If we based this on Cshift it would work for spread out
// but it would be even slower
//
// Repeated extract slice is inefficient
//
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
//////////////////////////////////////////////////////////////////////////////////////////
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
{ {
int NN = BlockSolverGrid->_ndimension; int NN = BlockSolverGrid->_ndimension;
@ -451,7 +388,6 @@ inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys); return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
} }
template<class vobj> template<class vobj>
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0) static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
{ {
@ -467,21 +403,96 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
Lattice<vobj> Xslice(SliceGrid); Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid); Lattice<vobj> Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
int nl = SliceGrid->_ndimension;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
#pragma omp parallel
{
std::vector<vobj> s_x(Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){ for(int i=0;i<Nblock;i++){
ExtractSlice(Rslice,Y,i,Orthog); s_x[i] = X[o+i*ostride];
for(int j=0;j<Nblock;j++){
ExtractSlice(Xslice,X,j,Orthog);
Rslice = Rslice + Xslice*(scale*aa(j,i));
} }
InsertSlice(Rslice,R,i,Orthog);
vobj dot;
for(int i=0;i<Nblock;i++){
dot = Y[o+i*ostride];
for(int j=0;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
}
}}
} }
}; };
template<class vobj>
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
int nl = SliceGrid->_ndimension;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
#pragma omp parallel
{
std::vector<vobj> s_x(Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
s_x[i] = X[o+i*ostride];
}
vobj dot;
for(int i=0;i<Nblock;i++){
dot = s_x[0]*(scale*aa(0,i));
for(int j=1;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
}
}}
}
};
template<class vobj> template<class vobj>
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog) static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{ {
// FIXME: Implementation is slow
// Not sure of best solution.. think about it
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
@ -496,21 +507,48 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
int nl = SliceGrid->_ndimension;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
typedef typename vobj::vector_typeD vector_typeD;
#pragma omp parallel
{
std::vector<vobj> Left(Nblock);
std::vector<vobj> Right(Nblock);
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){ for(int i=0;i<Nblock;i++){
ExtractSlice(Lslice,lhs,i,Orthog); Left [i] = lhs[o+i*ostride];
for(int j=0;j<Nblock;j++){ Right[i] = rhs[o+i*ostride];
ExtractSlice(Rslice,rhs,j,Orthog);
mat(i,j) = innerProduct(Lslice,Rslice);
} }
}
#undef FORCE_DIAG
#ifdef FORCE_DIAG
for(int i=0;i<Nblock;i++){ for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){ for(int j=0;j<Nblock;j++){
if ( i != j ) mat(i,j)=0.0; auto tmp = innerProduct(Left[i],Right[j]);
vector_typeD rtmp = TensorRemove(tmp);
mat_thread(i,j) += Reduce(rtmp);
}}
}}
#pragma omp critical
{
mat += mat_thread;
} }
} }
#endif
return; return;
} }

View File

@ -551,7 +551,10 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj> template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){ typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
{
typedef typename vobj::vector_type vtype; typedef typename vobj::vector_type vtype;
GridBase* in_grid = in._grid; GridBase* in_grid = in._grid;
@ -590,6 +593,54 @@ typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>
extract1(in_vobj, out_ptrs, 0); extract1(in_vobj, out_ptrs, 0);
} }
} }
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value
&& !isSIMDvectorized<sobj>::value, void>::type
vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
{
typedef typename vobj::vector_type vtype;
GridBase* grid = out._grid;
assert(in.size()==grid->lSites());
int ndim = grid->Nd();
int nsimd = vtype::Nsimd();
std::vector<std::vector<int> > icoor(nsimd);
for(int lane=0; lane < nsimd; lane++){
icoor[lane].resize(ndim);
grid->iCoorFromIindex(icoor[lane],lane);
}
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
//Assemble vector of pointers to output elements
std::vector<sobj*> ptrs(nsimd);
std::vector<int> ocoor(ndim);
grid->oCoorFromOindex(ocoor, oidx);
std::vector<int> lcoor(grid->Nd());
for(int lane=0; lane < nsimd; lane++){
for(int mu=0;mu<ndim;mu++){
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
}
int lex;
Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions);
ptrs[lane] = &in[lex];
}
//pack from those ptrs
vobj vecobj;
merge1(vecobj, ptrs, 0);
out._odata[oidx] = vecobj;
}
}
//Convert a Lattice from one precision to another //Convert a Lattice from one precision to another
template<class VobjOut, class VobjIn> template<class VobjOut, class VobjIn>
@ -615,7 +666,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
std::vector<SobjOut> in_slex_conv(in_grid->lSites()); std::vector<SobjOut> in_slex_conv(in_grid->lSites());
unvectorizeToLexOrdArray(in_slex_conv, in); unvectorizeToLexOrdArray(in_slex_conv, in);
parallel_for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ parallel_for(uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){
std::vector<int> out_ocoor(ndim); std::vector<int> out_ocoor(ndim);
out_grid->oCoorFromOindex(out_ocoor, out_oidx); out_grid->oCoorFromOindex(out_ocoor, out_oidx);

View File

@ -62,14 +62,20 @@ namespace Grid {
return ret; return ret;
} }
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
Lattice<obj> ret(rhs._grid); Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard; ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs); conformable(ret,rhs);
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp);
} }
return ret; return ret;
} }

File diff suppressed because it is too large Load Diff

View File

@ -27,6 +27,7 @@ directory
#ifndef GRID_ILDG_IO_H #ifndef GRID_ILDG_IO_H
#define GRID_ILDG_IO_H #define GRID_ILDG_IO_H
#ifdef HAVE_LIME
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
@ -37,214 +38,678 @@ directory
#include <sys/utsname.h> #include <sys/utsname.h>
#include <unistd.h> #include <unistd.h>
#ifdef HAVE_LIME //C-Lime is a must have for this functionality
extern "C" {
extern "C" { // for linkage
#include "lime.h" #include "lime.h"
} }
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
inline void ILDGGrid(GridBase *grid, ILDGField &header) { /////////////////////////////////
assert(grid->_ndimension == 4); // emit error if not // Encode word types as strings
header.dimension.resize(4); /////////////////////////////////
header.boundary.resize(4); template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
for (int d = 0; d < 4; d++) { template<> inline std::string ScidacWordMnemonic<double> (void){ return std::string("D"); }
header.dimension[d] = grid->_fdimensions[d]; template<> inline std::string ScidacWordMnemonic<float> (void){ return std::string("F"); }
// Read boundary conditions from ... ? template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
header.boundary[d] = std::string("periodic"); template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
} template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
/////////////////////////////////////////
// Encode a generic tensor as a string
/////////////////////////////////////////
template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {
typedef typename getPrecision<vobj>::real_scalar_type stype;
int _ColourN = indexRank<ColourIndex,vobj>();
int _ColourScalar = isScalar<ColourIndex,vobj>();
int _ColourVector = isVector<ColourIndex,vobj>();
int _ColourMatrix = isMatrix<ColourIndex,vobj>();
int _SpinN = indexRank<SpinIndex,vobj>();
int _SpinScalar = isScalar<SpinIndex,vobj>();
int _SpinVector = isVector<SpinIndex,vobj>();
int _SpinMatrix = isMatrix<SpinIndex,vobj>();
int _LorentzN = indexRank<LorentzIndex,vobj>();
int _LorentzScalar = isScalar<LorentzIndex,vobj>();
int _LorentzVector = isVector<LorentzIndex,vobj>();
int _LorentzMatrix = isMatrix<LorentzIndex,vobj>();
std::stringstream stream;
stream << "GRID_";
stream << ScidacWordMnemonic<stype>();
// std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl;
// std::cout << " Spin N/S/V/M : " << _SpinN <<" "<<_SpinScalar <<"/"<<_SpinVector <<"/"<<_SpinMatrix<<std::endl;
// std::cout << " Colour N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl;
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
if ( _SpinVector ) stream << "_SpinVector"<<_SpinN;
if ( _SpinMatrix ) stream << "_SpinMatrix"<<_SpinN;
if ( _ColourVector ) stream << "_ColourVector"<<_ColourN;
if ( _ColourMatrix ) stream << "_ColourMatrix"<<_ColourN;
if ( _ColourScalar && _LorentzScalar && _SpinScalar ) stream << "_Complex";
typesize = sizeof(typename vobj::scalar_type);
if ( _ColourMatrix ) typesize*= _ColourN*_ColourN;
else typesize*= _ColourN;
if ( _SpinMatrix ) typesize*= _SpinN*_SpinN;
else typesize*= _SpinN;
colors = _ColourN;
spins = _SpinN;
datacount = _LorentzN;
return stream.str();
} }
inline void ILDGChecksum(uint32_t *buf, uint32_t buf_size_bytes, template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {
uint32_t &csum) { return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount);
BinaryIO::Uint32Checksum(buf, buf_size_bytes, csum);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
template <class GaugeField>
inline void ILDGStatistics(GaugeField &data, ILDGField &header) {
// How to convert data precision etc...
header.link_trace = Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette = Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
// header.polyakov =
}
// Forcing QCD here
template <class fobj, class sobj>
struct ILDGMunger {
void operator()(fobj &in, sobj &out, uint32_t &csum) {
for (int mu = 0; mu < 4; mu++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
ILDGChecksum((uint32_t *)&in, sizeof(in), csum);
};
}; };
template <class fobj, class sobj>
struct ILDGUnmunger {
void operator()(sobj &in, fobj &out, uint32_t &csum) {
for (int mu = 0; mu < 4; mu++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
ILDGChecksum((uint32_t *)&out, sizeof(out), csum);
};
};
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
// Write and read from fstream; compute header offset for payload // Helper to fill out metadata
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
enum ILDGstate {ILDGread, ILDGwrite}; template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
FieldMetaData &header,
scidacRecord & _scidacRecord,
scidacFile & _scidacFile)
{
typedef typename getPrecision<vobj>::real_scalar_type stype;
/////////////////////////////////////
// Pull Grid's metadata
/////////////////////////////////////
PrepareMetaData(field,header);
/////////////////////////////////////
// Scidac Private File structure
/////////////////////////////////////
_scidacFile = scidacFile(field._grid);
/////////////////////////////////////
// Scidac Private Record structure
/////////////////////////////////////
scidacRecord sr;
sr.datatype = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount);
sr.date = header.creation_date;
sr.precision = ScidacWordMnemonic<stype>();
sr.recordtype = GRID_IO_FIELD;
_scidacRecord = sr;
std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
}
///////////////////////////////////////////////////////
// Scidac checksum
///////////////////////////////////////////////////////
static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
{
uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
if ( scidac_csuma !=scidac_checksuma) return 0;
if ( scidac_csumb !=scidac_checksumb) return 0;
return 1;
}
////////////////////////////////////////////////////////////////////////////////////
// Lime, ILDG and Scidac I/O classes
////////////////////////////////////////////////////////////////////////////////////
class GridLimeReader : public BinaryIO {
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
///////////////////////////////////////////////////
class ILDGIO : public BinaryIO {
FILE *File; FILE *File;
LimeWriter *LimeW;
LimeRecordHeader *LimeHeader;
LimeReader *LimeR; LimeReader *LimeR;
std::string filename; std::string filename;
/////////////////////////////////////////////
public: // Open the file
ILDGIO(std::string file, ILDGstate RW) { /////////////////////////////////////////////
filename = file; void open(std::string &_filename)
if (RW == ILDGwrite){ {
File = fopen(file.c_str(), "w"); filename= _filename;
// check if opened correctly File = fopen(filename.c_str(), "r");
LimeW = limeCreateWriter(File);
} else {
File = fopen(file.c_str(), "r");
// check if opened correctly
LimeR = limeCreateReader(File); LimeR = limeCreateReader(File);
} }
/////////////////////////////////////////////
// Close the file
/////////////////////////////////////////////
void close(void){
fclose(File);
// limeDestroyReader(LimeR);
} }
~ILDGIO() { fclose(File); } ////////////////////////////////////////////
// Read a generic lattice field and verify checksum
////////////////////////////////////////////
template<class vobj>
void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{
typedef typename vobj::scalar_object sobj;
scidacChecksum scidacChecksum_;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L){ std::string format = getFormatString<vobj>();
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl;
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
off_t offset= ftell(File);
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
/////////////////////////////////////////////
// Insist checksum is next record
/////////////////////////////////////////////
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name);
/////////////////////////////////////////////
// Verify checksums
/////////////////////////////////////////////
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
return;
}
}
}
////////////////////////////////////////////
// Read a generic serialisable object
////////////////////////////////////////////
template<class serialisable_object>
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
{
std::string xmlstring;
// should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
XmlReader RD(&xmlc[0],"");
read(RD,object_name,object);
return;
}
}
assert(0);
}
};
class GridLimeWriter : public BinaryIO {
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
///////////////////////////////////////////////////
FILE *File;
LimeWriter *LimeW;
std::string filename;
void open(std::string &_filename) {
filename= _filename;
File = fopen(filename.c_str(), "w");
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
}
/////////////////////////////////////////////
// Close the file
/////////////////////////////////////////////
void close(void) {
fclose(File);
// limeDestroyWriter(LimeW);
}
///////////////////////////////////////////////////////
// Lime utility functions
///////////////////////////////////////////////////////
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
{
LimeRecordHeader *h; LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize); h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
int status = limeWriteRecordHeader(h, L); assert(limeWriteRecordHeader(h, LimeW) >= 0);
if (status < 0) {
std::cerr << "ILDG Header error\n";
return status;
}
limeDestroyHeader(h); limeDestroyHeader(h);
return LIME_SUCCESS; return LIME_SUCCESS;
} }
////////////////////////////////////////////
// Write a generic serialisable object
////////////////////////////////////////////
template<class serialisable_object>
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
{
std::string xmlstring;
{
XmlWriter WR("","");
write(WR,object_name,object);
xmlstring = WR.XmlString();
}
uint64_t nbytes = xmlstring.size();
int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL);
unsigned int writeHeader(ILDGField &header) { err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
// write header in LIME err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
n_uint64_t nbytes; err=limeWriterCloseRecord(LimeW); assert(err>=0);
int MB_flag = 1, ME_flag = 0; limeDestroyHeader(h);
}
////////////////////////////////////////////
// Write a generic lattice field and csum
////////////////////////////////////////////
template<class vobj>
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
int err;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
char message[] = "ildg-format"; ////////////////////////////////////////////////////////////////////
nbytes = strlen(message); // NB: FILE and iostream are jointly writing disjoint sequences in the
LimeHeader = limeCreateHeader(MB_flag, ME_flag, message, nbytes); // the same file through different file handles (integer units).
limeWriteRecordHeader(LimeHeader, LimeW); //
limeDestroyHeader(LimeHeader); // These are both buffered, so why I think this code is right is as follows.
// save the xml header here //
// use the xml_writer to c++ streams in pugixml // i) write record header to FILE *File, telegraphing the size.
// and convert to char message // ii) ftell reads the offset from FILE *File .
limeWriteRecordData(message, &nbytes, LimeW); // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
limeWriterCloseRecord(LimeW); // Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
off_t offset = ftell(File);
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
////////////////////////////////////////
// Write checksum element, propagaing forward from the BinaryIO
// Always pair a checksum with a binary object, and close message
////////////////////////////////////////
scidacChecksum checksum;
std::stringstream streama; streama << std::hex << scidac_csuma;
std::stringstream streamb; streamb << std::hex << scidac_csumb;
checksum.suma= streama.str();
checksum.sumb= streamb.str();
std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
writeLimeObject(0,1,checksum,std::string("scidacChecksum" ),std::string(SCIDAC_CHECKSUM));
}
};
return 0; class ScidacWriter : public GridLimeWriter {
} public:
unsigned int readHeader(ILDGField &header) { template<class SerialisableUserFile>
return 0; void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
{
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
GridBase * grid = field._grid;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
ScidacMetaData(field,header,_scidacRecord,_scidacFile);
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
}
};
class IldgWriter : public ScidacWriter {
public:
///////////////////////////////////
// A little helper
///////////////////////////////////
void writeLimeIldgLFN(std::string &LFN)
{
uint64_t PayloadSize = LFN.size();
int err;
createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize);
err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
} }
////////////////////////////////////////////////////////////////
// Special ILDG operations ; gauge configs only.
// Don't require scidac records EXCEPT checksum
// Use Grid MetaData object if present.
////////////////////////////////////////////////////////////////
template <class vsimd> template <class vsimd>
uint32_t readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu) { void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; {
typedef LorentzColourMatrixD sobjd;
typedef LorentzColourMatrixF sobjf;
typedef iLorentzColourMatrix<vsimd> itype;
typedef LorentzColourMatrix sobj;
GridBase * grid = Umu._grid; GridBase * grid = Umu._grid;
ILDGField header;
readHeader(header);
// now just the conf, ignore the header
std::string format = std::string("IEEE64BIG");
do {limeReaderNextRecord(LimeR);}
while (strncmp(limeReaderType(LimeR), "ildg-binary-data",16));
n_uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
ILDGtype ILDGt(true, LimeR);
// this is special for double prec data, just for the moment
uint32_t csum = BinaryIO::readObjectParallel< itype, sobjd >(
Umu, filename, ILDGMunger<sobjd, sobj>(), 0, format, ILDGt);
// Check configuration
// todo
return csum;
}
template <class vsimd>
uint32_t writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, std::string format) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj; typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef LorentzColourMatrixD fobj;
ILDGField header; uint64_t nbytes;
// fill the header
header.floating_point = format;
ILDGUnmunger<fobj, sobj> munge; ////////////////////////////////////////
unsigned int offset = writeHeader(header); // fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
BinaryIO::Uint32Checksum<vobj, fobj>(Umu, munge, header.checksum); ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
// Write data record header std::string format = header.floating_point;
n_uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites; header.ensemble_id = description;
createHeader("ildg-binary-data", 0, 1, PayloadSize, LimeW); header.ensemble_label = description;
header.sequence_number = sequence;
header.ildg_lfn = LFN;
ILDGtype ILDGt(true, LimeW); assert ( (format == std::string("IEEE32BIG"))
uint32_t csum = BinaryIO::writeObjectParallel<vobj, fobj>( ||(format == std::string("IEEE64BIG")) );
Umu, filename, munge, 0, header.floating_point, ILDGt);
limeWriterCloseRecord(LimeW); //////////////////////////////////////////////////////
// Fill ILDG header data struct
//////////////////////////////////////////////////////
ildgFormat ildgfmt ;
ildgfmt.field = std::string("su3gauge");
// Last record if ( format == std::string("IEEE32BIG") ) {
// the logical file name LNF ildgfmt.precision = 32;
// look into documentation on how to generate this string } else {
std::string LNF = "empty"; ildgfmt.precision = 64;
PayloadSize = sizeof(LNF);
createHeader("ildg-binary-lfn", 1 , 1, PayloadSize, LimeW);
limeWriteRecordData(const_cast<char*>(LNF.c_str()), &PayloadSize, LimeW);
limeWriterCloseRecord(LimeW);
return csum;
} }
ildgfmt.version = 1.0;
ildgfmt.lx = header.dimension[0];
ildgfmt.ly = header.dimension[1];
ildgfmt.lz = header.dimension[2];
ildgfmt.lt = header.dimension[3];
assert(header.nd==4);
assert(header.nd==header.dimension.size());
// format for RNG? Now just binary out //////////////////////////////////////////////////////////////////////////////
// Fill the USQCD info field
//////////////////////////////////////////////////////////////////////////////
usqcdInfo info;
info.version=1.0;
info.plaq = header.plaquette;
info.linktr = header.link_trace;
std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT)); // rec
writeLimeIldgLFN(header.ildg_lfn); // rec
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
// limeDestroyWriter(LimeW);
fclose(File);
}
}; };
class IldgReader : public GridLimeReader {
public:
////////////////////////////////////////////////////////////////
// Read either Grid/SciDAC/ILDG configuration
// Don't require scidac records EXCEPT checksum
// Use Grid MetaData object if present.
// Else use ILDG MetaData object if present.
// Else use SciDAC MetaData object if present.
////////////////////////////////////////////////////////////////
template <class vsimd>
void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef typename GaugeField::vector_object vobj;
typedef typename vobj::scalar_object sobj;
typedef LorentzColourMatrixF fobj;
typedef LorentzColourMatrixD dobj;
GridBase *grid = Umu._grid;
std::vector<int> dims = Umu._grid->FullDimensions();
assert(dims.size()==4);
// Metadata holders
ildgFormat ildgFormat_ ;
std::string ildgLFN_ ;
scidacChecksum scidacChecksum_;
usqcdInfo usqcdInfo_ ;
// track what we read from file
int found_ildgFormat =0;
int found_ildgLFN =0;
int found_scidacChecksum=0;
int found_usqcdInfo =0;
int found_ildgBinary =0;
int found_FieldMetaData =0;
uint32_t nersc_csum;
uint32_t scidac_csuma;
uint32_t scidac_csumb;
// Binary format
std::string format;
//////////////////////////////////////////////////////////////////////////
// Loop over all records
// -- Order is poorly guaranteed except ILDG header preceeds binary section.
// -- Run like an event loop.
// -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing
// that Scidac.
// -- Insist on Scidac checksum record.
//////////////////////////////////////////////////////////////////////////
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
//////////////////////////////////////////////////////////////////
// If not BINARY_DATA read a string and parse
//////////////////////////////////////////////////////////////////
if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) ) ) {
// Copy out the string
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
//////////////////////////////////
// ILDG format record
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],"");
read(RD,"ildgFormat",ildgFormat_);
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG");
assert( ildgFormat_.lx == dims[0]);
assert( ildgFormat_.ly == dims[1]);
assert( ildgFormat_.lz == dims[2]);
assert( ildgFormat_.lt == dims[3]);
found_ildgFormat = 1;
}
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
found_ildgLFN = 1;
}
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],"");
read(RD,"FieldMetaData",FieldMetaData_);
format = FieldMetaData_.floating_point;
assert(FieldMetaData_.dimension[0] == dims[0]);
assert(FieldMetaData_.dimension[1] == dims[1]);
assert(FieldMetaData_.dimension[2] == dims[2]);
assert(FieldMetaData_.dimension[3] == dims[3]);
found_FieldMetaData = 1;
}
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
std::string xmls(&xmlc[0]);
// is it a USQCD info field
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
XmlReader RD(&xmlc[0],"");
read(RD,"usqcdInfo",usqcdInfo_);
found_usqcdInfo = 1;
} }
} }
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
XmlReader RD(&xmlc[0],"");
read(RD,"scidacChecksum",scidacChecksum_);
found_scidacChecksum = 1;
}
} else {
/////////////////////////////////
// Binary data
/////////////////////////////////
std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
off_t offset= ftell(File);
if ( format == std::string("IEEE64BIG") ) {
GaugeSimpleMunger<dobj, sobj> munge;
BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
} else {
GaugeSimpleMunger<fobj, sobj> munge;
BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
}
found_ildgBinary = 1;
}
}
//////////////////////////////////////////////////////
// Minimally must find binary segment and checksum
// Since this is an ILDG reader require ILDG format
//////////////////////////////////////////////////////
assert(found_ildgBinary);
assert(found_ildgFormat);
assert(found_scidacChecksum);
// Must find something with the lattice dimensions
assert(found_FieldMetaData||found_ildgFormat);
if ( found_FieldMetaData ) {
std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl;
} else {
assert(found_ildgFormat);
assert ( ildgFormat_.field == std::string("su3gauge") );
///////////////////////////////////////////////////////////////////////////////////////
// Populate our Grid metadata as best we can
///////////////////////////////////////////////////////////////////////////////////////
std::ostringstream vers; vers << ildgFormat_.version;
FieldMetaData_.hdr_version = vers.str();
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3");
FieldMetaData_.nd=4;
FieldMetaData_.dimension.resize(4);
FieldMetaData_.dimension[0] = ildgFormat_.lx ;
FieldMetaData_.dimension[1] = ildgFormat_.ly ;
FieldMetaData_.dimension[2] = ildgFormat_.lz ;
FieldMetaData_.dimension[3] = ildgFormat_.lt ;
if ( found_usqcdInfo ) {
FieldMetaData_.plaquette = usqcdInfo_.plaq;
FieldMetaData_.link_trace= usqcdInfo_.linktr;
std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl;
std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl;
std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl;
} else {
FieldMetaData_.plaquette = 0.0;
FieldMetaData_.link_trace= 0.0;
std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl;
}
}
////////////////////////////////////////////////////////////
// Really really want to mandate a scidac checksum
////////////////////////////////////////////////////////////
if ( found_scidacChecksum ) {
FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
assert( scidac_csuma ==FieldMetaData_.scidac_checksuma);
assert( scidac_csumb ==FieldMetaData_.scidac_checksumb);
std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl;
} else {
std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl;
assert(0); // Can I insist always checksum ?
}
if ( found_FieldMetaData || found_usqcdInfo ) {
FieldMetaData checker;
GaugeStatistics(Umu,checker);
assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5);
assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
}
}
};
}}
//HAVE_LIME //HAVE_LIME
#endif #endif

View File

@ -34,47 +34,198 @@ extern "C" { // for linkage
namespace Grid { namespace Grid {
struct ILDGtype { /////////////////////////////////////////////////////////////////////////////////
bool is_ILDG; // Data representation of records that enter ILDG and SciDac formats
LimeWriter* LW; /////////////////////////////////////////////////////////////////////////////////
LimeReader* LR;
ILDGtype(bool is, LimeWriter* L) : is_ILDG(is), LW(L), LR(NULL) {} #define GRID_FORMAT "grid-format"
ILDGtype(bool is, LimeReader* L) : is_ILDG(is), LW(NULL), LR(L) {} #define ILDG_FORMAT "ildg-format"
ILDGtype() : is_ILDG(false), LW(NULL), LR(NULL) {} #define ILDG_BINARY_DATA "ildg-binary-data"
}; #define ILDG_DATA_LFN "ildg-data-lfn"
#define SCIDAC_CHECKSUM "scidac-checksum"
#define SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml"
#define SCIDAC_FILE_XML "scidac-file-xml"
#define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml"
#define SCIDAC_RECORD_XML "scidac-record-xml"
#define SCIDAC_BINARY_DATA "scidac-binary-data"
// Unused SCIDAC records names; could move to support this functionality
#define SCIDAC_SITELIST "scidac-sitelist"
class ILDGField { ////////////////////////////////////////////////////////////
const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat
const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat
const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat
const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat
////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////
// QIO uses mandatory "private" records fixed format
// Private is in principle "opaque" however it can't be changed now because that would break existing
// file compatability, so should be correct to assume the undocumented but defacto file structure.
/////////////////////////////////////////////////////////////////////////////////
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
////////////////////////
struct scidacFile : Serializable {
public: public:
// header strings (not in order) GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile,
std::vector<int> dimension; double, version,
std::vector<std::string> boundary; int, spacetime,
int data_start; std::string, dims, // must convert to int
std::string hdr_version; int, volfmt);
std::string storage_format;
// Checks on data
double link_trace;
double plaquette;
uint32_t checksum;
unsigned int sequence_number;
std::string data_type;
std::string ensemble_id;
std::string ensemble_label;
std::string creator;
std::string creator_hardware;
std::string creation_date;
std::string archive_date;
std::string floating_point;
};
}
#else
namespace Grid {
struct ILDGtype { std::vector<int> getDimensions(void) {
bool is_ILDG; std::stringstream stream(dims);
ILDGtype() : is_ILDG(false) {} std::vector<int> dimensions;
}; int n;
while(stream >> n){
dimensions.push_back(n);
}
return dimensions;
} }
void setDimensions(std::vector<int> dimensions) {
char delimiter = ' ';
std::stringstream stream;
for(int i=0;i<dimensions.size();i++){
stream << dimensions[i];
if ( i != dimensions.size()-1) {
stream << delimiter <<std::endl;
}
}
dims = stream.str();
}
// Constructor provides Grid
scidacFile() =default; // default constructor
scidacFile(GridBase * grid){
version = 1.0;
spacetime = grid->_ndimension;
setDimensions(grid->FullDimensions());
volfmt = GRID_IO_SINGLEFILE;
}
};
///////////////////////////////////////////////////////////////////////
// scidac-private-record-xml : example
// <scidacRecord>
// <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype>
// <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins>
// <typesize>144</typesize><datacount>4</datacount>
// </scidacRecord>
///////////////////////////////////////////////////////////////////////
struct scidacRecord : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord,
double, version,
std::string, date,
int, recordtype,
std::string, datatype,
std::string, precision,
int, colors,
int, spins,
int, typesize,
int, datacount);
scidacRecord() { version =1.0; }
};
////////////////////////
// ILDG format
////////////////////////
struct ildgFormat : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat,
double, version,
std::string, field,
int, precision,
int, lx,
int, ly,
int, lz,
int, lt);
ildgFormat() { version=1.0; };
};
////////////////////////
// USQCD info
////////////////////////
struct usqcdInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo,
double, version,
double, plaq,
double, linktr,
std::string, info);
usqcdInfo() {
version=1.0;
};
};
////////////////////////
// Scidac Checksum
////////////////////////
struct scidacChecksum : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum,
double, version,
std::string, suma,
std::string, sumb);
scidacChecksum() {
version=1.0;
};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Type: scidac-file-xml <title>MILC ILDG archival gauge configuration</title>
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Type:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
////////////////////////
#if 0
////////////////////////////////////////////////////////////////////////////////////////
// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf
////////////////////////////////////////////////////////////////////////////////////////
struct usqcdPropFile : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile,
double, version,
std::string, type,
std::string, info);
usqcdPropFile() {
version=1.0;
};
};
struct usqcdSourceInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo,
double, version,
std::string, info);
usqcdSourceInfo() {
version=1.0;
};
};
struct usqcdPropInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo,
double, version,
int, spin,
int, color,
std::string, info);
usqcdPropInfo() {
version=1.0;
};
};
#endif
}
#endif #endif
#endif #endif

325
lib/parallelIO/MetaData.h Normal file
View File

@ -0,0 +1,325 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/NerscIO.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <map>
#include <unistd.h>
#include <sys/utsname.h>
#include <pwd.h>
namespace Grid {
///////////////////////////////////////////////////////
// Precision mapping
///////////////////////////////////////////////////////
template<class vobj> static std::string getFormatString (void)
{
std::string format;
typedef typename getPrecision<vobj>::real_scalar_type stype;
if ( sizeof(stype) == sizeof(float) ) {
format = std::string("IEEE32BIG");
}
if ( sizeof(stype) == sizeof(double) ) {
format = std::string("IEEE64BIG");
}
return format;
}
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class FieldMetaData : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData,
int, nd,
std::vector<int>, dimension,
std::vector<std::string>, boundary,
int, data_start,
std::string, hdr_version,
std::string, storage_format,
double, link_trace,
double, plaquette,
uint32_t, checksum,
uint32_t, scidac_checksuma,
uint32_t, scidac_checksumb,
unsigned int, sequence_number,
std::string, data_type,
std::string, ensemble_id,
std::string, ensemble_label,
std::string, ildg_lfn,
std::string, creator,
std::string, creator_hardware,
std::string, creation_date,
std::string, archive_date,
std::string, floating_point);
FieldMetaData(void) {
nd=4;
dimension.resize(4);
boundary.resize(4);
}
};
namespace QCD {
using namespace Grid;
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
inline void GridMetaData(GridBase *grid,FieldMetaData &header)
{
int nd = grid->_ndimension;
header.nd = nd;
header.dimension.resize(nd);
header.boundary.resize(nd);
for(int d=0;d<nd;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<nd;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
inline void MachineCharacteristics(FieldMetaData &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
// When
std::time_t t = std::time(nullptr);
std::tm tm_ = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm_, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
}
#define dump_meta_data(field, s) \
s << "BEGIN_HEADER" << std::endl; \
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
s << "DATATYPE = " << field.data_type << std::endl; \
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
for(int i=0;i<4;i++){ \
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
} \
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
for(int i=0;i<4;i++){ \
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
} \
\
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \
s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
s << "CREATOR = " << field.creator << std::endl; \
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
s << "CREATION_DATE = " << field.creation_date << std::endl; \
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
s << "END_HEADER" << std::endl;
template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vobj>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
MachineCharacteristics(header);
}
inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data);
}
inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data);
}
template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vLorentzColourMatrixF>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
GaugeStatistics(field,header);
MachineCharacteristics(header);
}
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vLorentzColourMatrixD>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
GaugeStatistics(field,header);
MachineCharacteristics(header);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
for(int mu=0;mu<Nd;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
}
}
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
/////////////////////////////////////////////////////////////////////////////////
// Simple classes for precision conversion
/////////////////////////////////////////////////////////////////////////////////
template <class fobj, class sobj>
struct BinarySimpleUnmunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(sobj &in, fobj &out) {
// take word by word and transform accoding to the status
fobj_stype *out_buffer = (fobj_stype *)&out;
sobj_stype *in_buffer = (sobj_stype *)&in;
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
}
};
template <class fobj, class sobj>
struct BinarySimpleMunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(fobj &in, sobj &out) {
// take word by word and transform accoding to the status
fobj_stype *in_buffer = (fobj_stype *)&in;
sobj_stype *out_buffer = (sobj_stype *)&out;
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
}
};
template<class fobj,class sobj>
struct GaugeSimpleMunger{
void operator()(fobj &in, sobj &out) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template <class fobj, class sobj>
struct GaugeSimpleUnmunger {
void operator()(sobj &in, fobj &out) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template<class fobj,class sobj>
struct Gauge3x2munger{
void operator() (fobj &in,sobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
reconstruct3(out);
}
};
template<class fobj,class sobj>
struct Gauge3x2unmunger{
void operator() (sobj &in,fobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}
}
};
}
}

View File

@ -30,182 +30,11 @@
#ifndef GRID_NERSC_IO_H #ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H #define GRID_NERSC_IO_H
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <map>
#include <unistd.h>
#include <sys/utsname.h>
#include <pwd.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
using namespace Grid; using namespace Grid;
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class NerscField {
public:
// header strings (not in order)
int dimension[4];
std::string boundary[4];
int data_start;
std::string hdr_version;
std::string storage_format;
// Checks on data
double link_trace;
double plaquette;
uint32_t checksum;
unsigned int sequence_number;
std::string data_type;
std::string ensemble_id ;
std::string ensemble_label ;
std::string creator ;
std::string creator_hardware ;
std::string creation_date ;
std::string archive_date ;
std::string floating_point;
};
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
inline void NerscGrid(GridBase *grid,NerscField &header)
{
assert(grid->_ndimension==4);
for(int d=0;d<4;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<4;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
template<class GaugeField>
inline void NerscStatistics(GaugeField & data,NerscField &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
}
inline void NerscMachineCharacteristics(NerscField &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
// When
std::time_t t = std::time(nullptr);
std::tm tm = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
{
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
}
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
for(int mu=0;mu<4;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
}
}
template<class fobj,class sobj>
struct NerscSimpleMunger{
void operator()(fobj &in, sobj &out, uint32_t &csum) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
NerscChecksum((uint32_t *)&in, sizeof(in), csum);
};
};
template <class fobj, class sobj>
struct NerscSimpleUnmunger {
void operator()(sobj &in, fobj &out, uint32_t &csum) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
NerscChecksum((uint32_t *)&out, sizeof(out), csum);
};
};
template<class fobj,class sobj>
struct Nersc3x2munger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
reconstruct3(out);
}
};
template<class fobj,class sobj>
struct Nersc3x2unmunger{
void operator() (sobj &in,fobj &out,uint32_t &csum){
for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
}
};
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload // Write and read from fstream; comput header offset for payload
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -216,42 +45,17 @@ namespace Grid {
std::ofstream fout(file,std::ios::out); std::ofstream fout(file,std::ios::out);
} }
#define dump_nersc_header(field, s) \ static inline unsigned int writeHeader(FieldMetaData &field,std::string file)
s << "BEGIN_HEADER" << std::endl; \
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
s << "DATATYPE = " << field.data_type << std::endl; \
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
for(int i=0;i<4;i++){ \
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
} \
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
for(int i=0;i<4;i++){ \
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
} \
\
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
s << "CREATOR = " << field.creator << std::endl; \
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
s << "CREATION_DATE = " << field.creation_date << std::endl; \
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
s << "END_HEADER" << std::endl;
static inline unsigned int writeHeader(NerscField &field,std::string file)
{ {
std::ofstream fout(file,std::ios::out|std::ios::in); std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg); fout.seekp(0,std::ios::beg);
dump_nersc_header(field, fout); dump_meta_data(field, fout);
field.data_start = fout.tellp(); field.data_start = fout.tellp();
return field.data_start; return field.data_start;
} }
// for the header-reader // for the header-reader
static inline int readHeader(std::string file,GridBase *grid, NerscField &field) static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
{ {
int offset=0; int offset=0;
std::map<std::string,std::string> header; std::map<std::string,std::string> header;
@ -326,18 +130,18 @@ namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers // Now the meat: the object readers
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define PARALLEL_READ
#define PARALLEL_WRITE
template<class vsimd> template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file) static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
FieldMetaData& header,
std::string file)
{ {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header); int offset = readHeader(file,Umu._grid,header);
NerscField clone(header); FieldMetaData clone(header);
std::string format(header.floating_point); std::string format(header.floating_point);
@ -346,76 +150,78 @@ namespace Grid {
int ieee64big = (format == std::string("IEEE64BIG")); int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64")); int ieee64 = (format == std::string("IEEE64"));
uint32_t csum; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
// depending on datatype, set up munger; // depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type> // munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) { if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
#ifdef PARALLEL_READ BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> (Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); nersc_csum,scidac_csuma,scidac_csumb);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
#endif
} }
if ( ieee64 || ieee64big ) { if ( ieee64 || ieee64big ) {
#ifdef PARALLEL_READ BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> (Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); nersc_csum,scidac_csuma,scidac_csumb);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
#endif
} }
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
#ifdef PARALLEL_READ BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> (Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); nersc_csum,scidac_csuma,scidac_csumb);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
#endif
} }
if ( ieee64 || ieee64big ) { if ( ieee64 || ieee64big ) {
#ifdef PARALLEL_READ BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> (Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); nersc_csum,scidac_csuma,scidac_csumb);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
#endif
} }
} else { } else {
assert(0); assert(0);
} }
NerscStatistics<GaugeField>(Umu,clone); GaugeStatistics(Umu,clone);
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<< csum<< std::dec std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl; <<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
<<" header "<<header.plaquette<<std::endl; <<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl; <<" header "<<header.link_trace<<std::endl;
if ( fabs(clone.plaquette -header.plaquette ) >= 1.0e-5 ) {
std::cout << " Plaquette mismatch "<<std::endl;
std::cout << Umu[0]<<std::endl;
std::cout << Umu[1]<<std::endl;
}
if ( nersc_csum != header.checksum ) {
std::cerr << " checksum mismatch " << std::endl;
std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl;
std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl;
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
exit(0);
}
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(csum == header.checksum ); assert(nersc_csum == header.checksum );
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
} }
template<class vsimd> template<class vsimd>
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32) static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
std::string file,
int two_row,
int bits32)
{ {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj; typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
FieldMetaData header;
///////////////////////////////////////////
// Following should become arguments // Following should become arguments
NerscField header; ///////////////////////////////////////////
header.sequence_number = 1; header.sequence_number = 1;
header.ensemble_id = "UKQCD"; header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF"; header.ensemble_label = "DWF";
@ -425,45 +231,32 @@ namespace Grid {
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
NerscGrid(grid,header); GridMetaData(grid,header);
NerscStatistics<GaugeField>(Umu,header); assert(header.nd==4);
NerscMachineCharacteristics(header); GaugeStatistics(Umu,header);
MachineCharacteristics(header);
uint32_t csum;
int offset; int offset;
truncate(file); truncate(file);
if ( two_row ) { // Sod it -- always write 3x3 double
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE");
Nersc3x2unmunger<fobj2D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#else
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#endif
} else {
header.floating_point = std::string("IEEE64BIG"); header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3"); header.data_type = std::string("4D_SU3_GAUGE_3x3");
NerscSimpleUnmunger<fobj3D,sobj> munge; GaugeSimpleUnmunger<fobj3D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
offset = writeHeader(header,file); offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#else
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#endif
}
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum;
writeHeader(header,file);
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
<<std::hex<<header.checksum
<<std::dec<<" plaq "<< header.plaquette <<std::endl;
} }
/////////////////////////////// ///////////////////////////////
// RNG state // RNG state
/////////////////////////////// ///////////////////////////////
@ -472,19 +265,19 @@ namespace Grid {
typedef typename GridParallelRNG::RngStateType RngStateType; typedef typename GridParallelRNG::RngStateType RngStateType;
// Following should become arguments // Following should become arguments
NerscField header; FieldMetaData header;
header.sequence_number = 1; header.sequence_number = 1;
header.ensemble_id = "UKQCD"; header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF"; header.ensemble_label = "DWF";
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
NerscGrid(grid,header); GridMetaData(grid,header);
assert(header.nd==4);
header.link_trace=0.0; header.link_trace=0.0;
header.plaquette=0.0; header.plaquette=0.0;
NerscMachineCharacteristics(header); MachineCharacteristics(header);
uint32_t csum;
int offset; int offset;
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
@ -502,15 +295,19 @@ namespace Grid {
truncate(file); truncate(file);
offset = writeHeader(header,file); offset = writeHeader(header,file);
csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset); uint32_t nersc_csum,scidac_csuma,scidac_csumb;
header.checksum = csum; BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum;
offset = writeHeader(header,file); offset = writeHeader(header,file);
std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl; std::cout<<GridLogMessage
<<"Written NERSC RNG STATE "<<file<< " checksum "
<<std::hex<<header.checksum
<<std::dec<<std::endl;
} }
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file) static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file)
{ {
typedef typename GridParallelRNG::RngStateType RngStateType; typedef typename GridParallelRNG::RngStateType RngStateType;
@ -518,7 +315,7 @@ namespace Grid {
int offset = readHeader(file,grid,header); int offset = readHeader(file,grid,header);
NerscField clone(header); FieldMetaData clone(header);
std::string format(header.floating_point); std::string format(header.floating_point);
std::string data_type(header.data_type); std::string data_type(header.data_type);
@ -538,15 +335,19 @@ namespace Grid {
// depending on datatype, set up munger; // depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type> // munger is a function of <floating point, Real, data_type>
uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
assert(csum == header.checksum ); if ( nersc_csum != header.checksum ) {
std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl;
exit(0);
}
assert(nersc_csum == header.checksum );
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl; std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
} }
}; };
}} }}
#endif #endif

View File

@ -644,19 +644,16 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >; template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef iImplPropagator<Simd> SitePropagator; typedef iImplPropagator<Simd> SitePropagator;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField; typedef Lattice<SitePropagator> PropagatorField;
@ -775,7 +772,6 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
@ -792,12 +788,10 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField; typedef Lattice<SitePropagator> PropagatorField;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef SimpleCompressor<SiteSpinor> Compressor; typedef SimpleCompressor<SiteSpinor> Compressor;

View File

@ -40,11 +40,14 @@ namespace QCD {
typedef typename GImpl::Simd Simd; \ typedef typename GImpl::Simd Simd; \
typedef typename GImpl::LinkField GaugeLinkField; \ typedef typename GImpl::LinkField GaugeLinkField; \
typedef typename GImpl::Field GaugeField; \ typedef typename GImpl::Field GaugeField; \
typedef typename GImpl::ComplexField ComplexField;\
typedef typename GImpl::SiteField SiteGaugeField; \ typedef typename GImpl::SiteField SiteGaugeField; \
typedef typename GImpl::SiteComplex SiteComplex; \
typedef typename GImpl::SiteLink SiteGaugeLink; typedef typename GImpl::SiteLink SiteGaugeLink;
#define INHERIT_FIELD_TYPES(Impl) \ #define INHERIT_FIELD_TYPES(Impl) \
typedef typename Impl::Simd Simd; \ typedef typename Impl::Simd Simd; \
typedef typename Impl::ComplexField ComplexField; \
typedef typename Impl::SiteField SiteField; \ typedef typename Impl::SiteField SiteField; \
typedef typename Impl::Field Field; typedef typename Impl::Field Field;
@ -53,12 +56,15 @@ template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplType
public: public:
typedef S Simd; typedef S Simd;
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>; template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplGaugeLink<Simd> SiteLink; typedef iImplGaugeLink<Simd> SiteLink;
typedef iImplGaugeField<Simd> SiteField; typedef iImplGaugeField<Simd> SiteField;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteLink> LinkField; typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field; typedef Lattice<SiteField> Field;
@ -94,12 +100,17 @@ public:
static inline Field projectForce(Field &P) { return Ta(P); } static inline Field projectForce(Field &P) { return Ta(P); }
static inline void update_field(Field& P, Field& U, double ep){ static inline void update_field(Field& P, Field& U, double ep){
for (int mu = 0; mu < Nd; mu++) { //static std::chrono::duration<double> diff;
auto Umu = PeekIndex<LorentzIndex>(U, mu);
auto Pmu = PeekIndex<LorentzIndex>(P, mu); //auto start = std::chrono::high_resolution_clock::now();
Umu = expMat(Pmu, ep, Nexp) * Umu; parallel_for(int ss=0;ss<P._grid->oSites();ss++){
PokeIndex<LorentzIndex>(U, ProjectOnGroup(Umu), mu); for (int mu = 0; mu < Nd; mu++)
U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]);
} }
//auto end = std::chrono::high_resolution_clock::now();
// diff += end - start;
// std::cout << "Time to exponentiate matrix " << diff.count() << " s\n";
} }
static inline RealD FieldSquareNorm(Field& U){ static inline RealD FieldSquareNorm(Field& U){

View File

@ -71,14 +71,18 @@ class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> {
RealD factor = 0.5 * beta / RealD(Nc); RealD factor = 0.5 * beta / RealD(Nc);
GaugeLinkField Umu(U._grid); //GaugeLinkField Umu(U._grid);
GaugeLinkField dSdU_mu(U._grid); GaugeLinkField dSdU_mu(U._grid);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
Umu = PeekIndex<LorentzIndex>(U, mu); //Umu = PeekIndex<LorentzIndex>(U, mu);
// Staple in direction mu // Staple in direction mu
WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu); //WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu);
dSdU_mu = Ta(Umu * dSdU_mu) * factor; //dSdU_mu = Ta(Umu * dSdU_mu) * factor;
WilsonLoops<Gimpl>::StapleMult(dSdU_mu, U, mu);
dSdU_mu = Ta(dSdU_mu) * factor;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu); PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
} }

View File

@ -15,6 +15,8 @@ namespace Grid {
typedef iImplField<Simd> SiteField; typedef iImplField<Simd> SiteField;
template <typename vtype> using iImplScalar= iScalar<iScalar<iScalar<vtype > > >;
typedef iImplScalar<Simd> ComplexField;
typedef Lattice<SiteField> Field; typedef Lattice<SiteField> Field;
@ -51,14 +53,15 @@ namespace Grid {
public: public:
typedef S Simd; typedef S Simd;
template <typename vtype> template <typename vtype> using iImplField = iScalar<iScalar<iMatrix<vtype, N> > >;
using iImplField = iScalar<iScalar<iMatrix<vtype, N> > >;
typedef iImplField<Simd> SiteField; typedef iImplField<Simd> SiteField;
typedef Lattice<SiteField> Field; typedef Lattice<SiteField> Field;
template <typename vtype> using iImplScalar= iScalar<iScalar<iScalar<vtype > > >;
typedef iImplScalar<Simd> ComplexField;
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){
gaussian(pRNG, P); gaussian(pRNG, P);
} }

View File

@ -62,36 +62,50 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer<Impl> {
fout.close(); fout.close();
} }
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) { if ((traj % Params.saveInterval) == 0) {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
BinaryIO::BinarySimpleUnmunger<sobj_double, sobj> munge; uint32_t nersc_csum;
uint32_t scidac_csuma;
uint32_t scidac_csumb;
BinarySimpleUnmunger<sobj_double, sobj> munge;
truncate(rng); truncate(rng);
BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
truncate(config); truncate(config);
uint32_t csum = BinaryIO::writeObjectParallel<vobj, sobj_double>(
U, config, munge, 0, Params.format); BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written Binary Configuration " << config std::cout << GridLogMessage << "Written Binary Configuration " << config
<< " checksum " << std::hex << csum << std::dec << std::endl; << " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
} }
}; };
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
GridParallelRNG &pRNG) {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
BinaryIO::BinarySimpleMunger<sobj_double, sobj> munge; BinarySimpleMunger<sobj_double, sobj> munge;
BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0);
uint32_t csum = BinaryIO::readObjectParallel<vobj, sobj_double>( uint32_t nersc_csum;
U, config, munge, 0, Params.format); uint32_t scidac_csuma;
uint32_t scidac_csumb;
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
BinaryIO::readLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Read Binary Configuration " << config std::cout << GridLogMessage << "Read Binary Configuration " << config
<< " checksum " << std::hex << csum << std::dec << std::endl; << " checksums " << std::hex << nersc_csum<<"/"<<scidac_csuma<<"/"<<scidac_csumb
<< std::dec << std::endl;
}; };
}; };
} }

View File

@ -75,12 +75,19 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
ILDGIO IO(config, ILDGwrite); uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
uint32_t csum = IO.writeConfiguration(U, Params.format); IldgWriter _IldgWriter;
_IldgWriter.open(config);
_IldgWriter.writeConfiguration(U, traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << config std::cout << GridLogMessage << "Written ILDG Configuration on " << config
<< " checksum " << std::hex << csum << std::dec << std::endl; << " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
} }
}; };
@ -89,12 +96,21 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
ILDGIO IO(config, ILDGread); uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0); BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
uint32_t csum = IO.readConfiguration(U); // format from the header
FieldMetaData header;
IldgReader _IldgReader;
_IldgReader.open(config);
_IldgReader.readConfiguration(U,header); // format from the header
_IldgReader.close();
std::cout << GridLogMessage << "Read ILDG Configuration from " << config std::cout << GridLogMessage << "Read ILDG Configuration from " << config
<< " checksum " << std::hex << csum << std::dec << std::endl; << " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
}; };
}; };
} }

View File

@ -70,7 +70,7 @@ class NerscHmcCheckpointer : public BaseHmcCheckpointer<Gimpl> {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
NerscField header; FieldMetaData header;
NerscIO::readRNGState(sRNG, pRNG, header, rng); NerscIO::readRNGState(sRNG, pRNG, header, rng);
NerscIO::readConfiguration(U, header, config); NerscIO::readConfiguration(U, header, config);
}; };

View File

@ -58,6 +58,8 @@ class Smear_Stout : public Smear<Gimpl> {
SmearBase->smear(C, U); SmearBase->smear(C, U);
}; };
// Repetion of code here (use the Tensor_exp.h function)
void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const { void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const {
// Put this outside // Put this outside
// only valid for SU(3) matrices // only valid for SU(3) matrices

View File

@ -36,20 +36,23 @@ namespace QCD {
template <class Gimpl> template <class Gimpl>
class WilsonFlow: public Smear<Gimpl>{ class WilsonFlow: public Smear<Gimpl>{
unsigned int Nstep; unsigned int Nstep;
RealD epsilon; unsigned int measure_interval;
mutable RealD epsilon, taus;
mutable WilsonGaugeAction<Gimpl> SG; mutable WilsonGaugeAction<Gimpl> SG;
void evolve_step(typename Gimpl::GaugeField&) const; void evolve_step(typename Gimpl::GaugeField&) const;
void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
RealD tau(unsigned int t)const {return epsilon*(t+1.0); } RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
public: public:
INHERIT_GIMPL_TYPES(Gimpl) INHERIT_GIMPL_TYPES(Gimpl)
explicit WilsonFlow(unsigned int Nstep, RealD epsilon): explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
Nstep(Nstep), Nstep(Nstep),
epsilon(epsilon), epsilon(epsilon),
measure_interval(interval),
SG(WilsonGaugeAction<Gimpl>(3.0)) { SG(WilsonGaugeAction<Gimpl>(3.0)) {
// WilsonGaugeAction with beta 3.0 // WilsonGaugeAction with beta 3.0
assert(epsilon > 0.0); assert(epsilon > 0.0);
@ -72,7 +75,9 @@ class WilsonFlow: public Smear<Gimpl>{
// undefined for WilsonFlow // undefined for WilsonFlow
} }
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const; RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
RealD energyDensityPlaquette(const GaugeField& U) const;
}; };
@ -98,22 +103,110 @@ void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2 Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
} }
template <class Gimpl>
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
if (maxTau - taus < epsilon){
epsilon = maxTau-taus;
}
std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
GaugeField Z(U._grid);
GaugeField Zprime(U._grid);
GaugeField tmp(U._grid), Uprime(U._grid);
Uprime = U;
SG.deriv(U, Z);
Zprime = -Z;
Z *= 0.25; // Z0 = 1/4 * F(U)
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
Z *= -17.0/8.0;
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
Zprime += 2.0*tmp;
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
Z *= -4.0/3.0;
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
// Ramos
Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
// Compute distance as norm^2 of the difference
GaugeField diffU = U - Uprime;
RealD diff = norm2(diffU);
// adjust integration step
taus += epsilon;
std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
}
template <class Gimpl> template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const { RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
RealD td = tau(step); RealD td = tau(step);
return 2.0 * td * td * SG.S(U)/U._grid->gSites(); return 2.0 * td * td * SG.S(U)/U._grid->gSites();
} }
template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
return 2.0 * taus * taus * SG.S(U)/U._grid->gSites();
}
//#define WF_TIMING
template <class Gimpl> template <class Gimpl>
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const { void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
out = in; out = in;
for (unsigned int step = 0; step < Nstep; step++) { for (unsigned int step = 1; step <= Nstep; step++) {
auto start = std::chrono::high_resolution_clock::now();
std::cout << GridLogMessage << "Evolution time :"<< tau(step) << std::endl;
evolve_step(out); evolve_step(out);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
#ifdef WF_TIMING
std::cout << "Time to evolve " << diff.count() << " s\n";
#endif
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << step << " "
<< energyDensityPlaquette(step,out) << std::endl; << energyDensityPlaquette(step,out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
} }
} }
}
template <class Gimpl>
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
out = in;
taus = epsilon;
unsigned int step = 0;
do{
step++;
std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
evolve_step_adaptive(out, maxTau);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " "
<< energyDensityPlaquette(out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
}
} while (taus < maxTau);
}
} // namespace QCD } // namespace QCD
} // namespace Grid } // namespace Grid

188
lib/qcd/utils/GaugeFix.h Normal file
View File

@ -0,0 +1,188 @@
/*************************************************************************************
grid` physics library, www.github.com/paboyle/Grid
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//#include <Grid/Grid.h>
using namespace Grid;
using namespace Grid::QCD;
template <class Gimpl>
class FourierAcceleratedGaugeFixer : public Gimpl {
public:
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
for(int mu=0;mu<Nd;mu++){
Complex cmi(0.0,-1.0);
A[mu] = Ta(U[mu]) * cmi;
}
}
static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) {
dmuAmu=zero;
for(int mu=0;mu<Nd;mu++){
dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
}
}
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false) {
GridBase *grid = Umu._grid;
Real org_plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
Real old_trace = org_link_trace;
Real trG;
std::vector<GaugeMat> U(Nd,grid);
GaugeMat dmuAmu(grid);
for(int i=0;i<maxiter;i++){
for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu);
if ( Fourier==false ) {
trG = SteepestDescentStep(U,alpha,dmuAmu);
} else {
trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu);
}
for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu);
// Monitor progress and convergence test
// infrequently to minimise cost overhead
if ( i %20 == 0 ) {
Real plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
if (Fourier)
std::cout << GridLogMessage << "Fourier Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
else
std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
Real Phi = 1.0 - old_trace / link_trace ;
Real Omega= 1.0 - trG;
std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl;
if ( (Omega < Omega_tol) && ( ::fabs(Phi) < Phi_tol) ) {
std::cout << GridLogMessage << "Converged ! "<<std::endl;
return;
}
old_trace = link_trace;
}
}
};
static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
GridBase *grid = U[0]._grid;
std::vector<GaugeMat> A(Nd,grid);
GaugeMat g(grid);
GaugeLinkToLieAlgebraField(U,A);
ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);
Real vol = grid->gSites();
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
GridBase *grid = U[0]._grid;
Real vol = grid->gSites();
FFT theFFT((GridCartesian *)grid);
LatticeComplex Fp(grid);
LatticeComplex psq(grid); psq=zero;
LatticeComplex pmu(grid);
LatticeComplex one(grid); one = Complex(1.0,0.0);
GaugeMat g(grid);
GaugeMat dmuAmu_p(grid);
std::vector<GaugeMat> A(Nd,grid);
GaugeLinkToLieAlgebraField(U,A);
DmuAmu(A,dmuAmu);
theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward);
//////////////////////////////////
// Work out Fp = psq_max/ psq...
//////////////////////////////////
std::vector<int> latt_size = grid->GlobalDimensions();
std::vector<int> coor(grid->_ndimension,0);
for(int mu=0;mu<Nd;mu++) {
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);
}
Complex psqMax(16.0);
Fp = psqMax*one/psq;
/*
static int once;
if ( once == 0 ) {
std::cout << " Fp " << Fp <<std::endl;
once ++;
}*/
pokeSite(TComplex(1.0),Fp,coor);
dmuAmu_p = dmuAmu_p * Fp;
theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);
GaugeMat ciadmam(grid);
Complex cialpha(0.0,-alpha);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
GridBase *grid = g._grid;
Complex cialpha(0.0,-alpha);
GaugeMat ciadmam(grid);
DmuAmu(A,dmuAmu);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
}
};

View File

@ -12,7 +12,4 @@
#include <Grid/qcd/utils/SUnAdjoint.h> #include <Grid/qcd/utils/SUnAdjoint.h>
#include <Grid/qcd/utils/SUnTwoIndex.h> #include <Grid/qcd/utils/SUnTwoIndex.h>
#endif #endif

View File

@ -73,7 +73,7 @@ public:
////////////////////////////////////////////////// //////////////////////////////////////////////////
// trace of directed plaquette oriented in mu,nu plane // trace of directed plaquette oriented in mu,nu plane
////////////////////////////////////////////////// //////////////////////////////////////////////////
static void traceDirPlaquette(LatticeComplex &plaq, static void traceDirPlaquette(ComplexField &plaq,
const std::vector<GaugeMat> &U, const int mu, const std::vector<GaugeMat> &U, const int mu,
const int nu) { const int nu) {
GaugeMat sp(U[0]._grid); GaugeMat sp(U[0]._grid);
@ -83,9 +83,9 @@ public:
////////////////////////////////////////////////// //////////////////////////////////////////////////
// sum over all planes of plaquette // sum over all planes of plaquette
////////////////////////////////////////////////// //////////////////////////////////////////////////
static void sitePlaquette(LatticeComplex &Plaq, static void sitePlaquette(ComplexField &Plaq,
const std::vector<GaugeMat> &U) { const std::vector<GaugeMat> &U) {
LatticeComplex sitePlaq(U[0]._grid); ComplexField sitePlaq(U[0]._grid);
Plaq = zero; Plaq = zero;
for (int mu = 1; mu < Nd; mu++) { for (int mu = 1; mu < Nd; mu++) {
for (int nu = 0; nu < mu; nu++) { for (int nu = 0; nu < mu; nu++) {
@ -104,11 +104,11 @@ public:
U[mu] = PeekIndex<LorentzIndex>(Umu, mu); U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
} }
LatticeComplex Plaq(Umu._grid); ComplexField Plaq(Umu._grid);
sitePlaquette(Plaq, U); sitePlaquette(Plaq, U);
TComplex Tp = sum(Plaq); auto Tp = sum(Plaq);
Complex p = TensorRemove(Tp); auto p = TensorRemove(Tp);
return p.real(); return p.real();
} }
@ -129,15 +129,15 @@ public:
static RealD linkTrace(const GaugeLorentz &Umu) { static RealD linkTrace(const GaugeLorentz &Umu) {
std::vector<GaugeMat> U(Nd, Umu._grid); std::vector<GaugeMat> U(Nd, Umu._grid);
LatticeComplex Tr(Umu._grid); ComplexField Tr(Umu._grid);
Tr = zero; Tr = zero;
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu); U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
Tr = Tr + trace(U[mu]); Tr = Tr + trace(U[mu]);
} }
TComplex Tp = sum(Tr); auto Tp = sum(Tr);
Complex p = TensorRemove(Tp); auto p = TensorRemove(Tp);
double vol = Umu._grid->gSites(); double vol = Umu._grid->gSites();
@ -188,6 +188,32 @@ public:
} }
} }
// For the force term
static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
GridBase *grid = Umu._grid;
std::vector<GaugeMat> U(Nd, grid);
for (int d = 0; d < Nd; d++) {
// this operation is taking too much time
U[d] = PeekIndex<LorentzIndex>(Umu, d);
}
staple = zero;
GaugeMat tmp1(grid);
GaugeMat tmp2(grid);
for (int nu = 0; nu < Nd; nu++) {
if (nu != mu) {
// this is ~10% faster than the Staple
tmp1 = Cshift(U[nu], mu, 1);
tmp2 = Cshift(U[mu], nu, 1);
staple += tmp1* adj(U[nu]*tmp2);
tmp2 = adj(U[mu]*tmp1)*U[nu];
staple += Cshift(tmp2, nu, -1);
}
}
staple = U[mu]*staple;
}
////////////////////////////////////////////////// //////////////////////////////////////////////////
// the sum over all staples on each site // the sum over all staples on each site
////////////////////////////////////////////////// //////////////////////////////////////////////////
@ -200,7 +226,6 @@ public:
U[d] = PeekIndex<LorentzIndex>(Umu, d); U[d] = PeekIndex<LorentzIndex>(Umu, d);
} }
staple = zero; staple = zero;
GaugeMat tmp(grid);
for (int nu = 0; nu < Nd; nu++) { for (int nu = 0; nu < Nd; nu++) {
@ -227,6 +252,7 @@ public:
// |__ // |__
// //
// //
staple += Gimpl::ShiftStaple( staple += Gimpl::ShiftStaple(
Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CovShiftBackward(U[nu], nu,
Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu);
@ -289,8 +315,7 @@ public:
// //
staple = Gimpl::ShiftStaple( staple = Gimpl::ShiftStaple(
Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CovShiftBackward(U[nu], nu,
Gimpl::CovShiftBackward(U[mu], mu, U[nu])), Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu);
mu);
} }
} }
@ -307,10 +332,10 @@ public:
GaugeMat Vup(Umu._grid), Vdn(Umu._grid); GaugeMat Vup(Umu._grid), Vdn(Umu._grid);
StapleUpper(Vup, Umu, mu, nu); StapleUpper(Vup, Umu, mu, nu);
StapleLower(Vdn, Umu, mu, nu); StapleLower(Vdn, Umu, mu, nu);
GaugeMat v = adj(Vup) - adj(Vdn); GaugeMat v = Vup - Vdn;
GaugeMat u = PeekIndex<LorentzIndex>(Umu, mu); // some redundant copies GaugeMat u = PeekIndex<LorentzIndex>(Umu, mu); // some redundant copies
GaugeMat vu = v*u; GaugeMat vu = v*u;
FS = 0.25*Ta(u*v + Cshift(vu, mu, +1)); FS = 0.25*Ta(u*v + Cshift(vu, mu, -1));
} }
static Real TopologicalCharge(GaugeLorentz &U){ static Real TopologicalCharge(GaugeLorentz &U){
@ -330,8 +355,8 @@ public:
double coeff = 8.0/(32.0*M_PI*M_PI); double coeff = 8.0/(32.0*M_PI*M_PI);
LatticeComplex qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez); ComplexField qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez);
TComplex Tq = sum(qfield); auto Tq = sum(qfield);
return TensorRemove(Tq).real(); return TensorRemove(Tq).real();
} }
@ -350,16 +375,16 @@ public:
adj(Gimpl::CovShiftForward( adj(Gimpl::CovShiftForward(
U[nu], nu, Gimpl::CovShiftForward(U[nu], nu, U[mu]))); U[nu], nu, Gimpl::CovShiftForward(U[nu], nu, U[mu])));
} }
static void traceDirRectangle(LatticeComplex &rect, static void traceDirRectangle(ComplexField &rect,
const std::vector<GaugeMat> &U, const int mu, const std::vector<GaugeMat> &U, const int mu,
const int nu) { const int nu) {
GaugeMat sp(U[0]._grid); GaugeMat sp(U[0]._grid);
dirRectangle(sp, U, mu, nu); dirRectangle(sp, U, mu, nu);
rect = trace(sp); rect = trace(sp);
} }
static void siteRectangle(LatticeComplex &Rect, static void siteRectangle(ComplexField &Rect,
const std::vector<GaugeMat> &U) { const std::vector<GaugeMat> &U) {
LatticeComplex siteRect(U[0]._grid); ComplexField siteRect(U[0]._grid);
Rect = zero; Rect = zero;
for (int mu = 1; mu < Nd; mu++) { for (int mu = 1; mu < Nd; mu++) {
for (int nu = 0; nu < mu; nu++) { for (int nu = 0; nu < mu; nu++) {
@ -379,12 +404,12 @@ public:
U[mu] = PeekIndex<LorentzIndex>(Umu, mu); U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
} }
LatticeComplex Rect(Umu._grid); ComplexField Rect(Umu._grid);
siteRectangle(Rect, U); siteRectangle(Rect, U);
TComplex Tp = sum(Rect); auto Tp = sum(Rect);
Complex p = TensorRemove(Tp); auto p = TensorRemove(Tp);
return p.real(); return p.real();
} }
////////////////////////////////////////////////// //////////////////////////////////////////////////

View File

@ -115,6 +115,7 @@ THE SOFTWARE.
#define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B); #define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B);
#define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\ #define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\
std::string SerialisableClassName(void) {return std::string(#cname);} \
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
template <typename T>\ template <typename T>\
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \ static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \

View File

@ -32,17 +32,22 @@ using namespace Grid;
using namespace std; using namespace std;
// Writer implementation /////////////////////////////////////////////////////// // Writer implementation ///////////////////////////////////////////////////////
XmlWriter::XmlWriter(const string &fileName) XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName)
: fileName_(fileName)
{ {
if ( toplev == std::string("") ) {
node_=doc_;
} else {
node_=doc_.append_child(); node_=doc_.append_child();
node_.set_name("grid"); node_.set_name(toplev.c_str());
}
} }
XmlWriter::~XmlWriter(void) XmlWriter::~XmlWriter(void)
{ {
if ( fileName_ != std::string("") ) {
doc_.save_file(fileName_.c_str(), " "); doc_.save_file(fileName_.c_str(), " ");
} }
}
void XmlWriter::push(const string &s) void XmlWriter::push(const string &s)
{ {
@ -53,21 +58,44 @@ void XmlWriter::pop(void)
{ {
node_ = node_.parent(); node_ = node_.parent();
} }
std::string XmlWriter::XmlString(void)
// Reader implementation ///////////////////////////////////////////////////////
XmlReader::XmlReader(const string &fileName)
: fileName_(fileName)
{ {
pugi::xml_parse_result result = doc_.load_file(fileName_.c_str()); std::ostringstream oss;
doc_.save(oss);
return oss.str();
}
if ( !result ) XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("")
{ {
pugi::xml_parse_result result;
result = doc_.load_string(xmlstring);
if ( !result ) {
cerr << "XML error description: " << result.description() << "\n"; cerr << "XML error description: " << result.description() << "\n";
cerr << "XML error offset : " << result.offset << "\n"; cerr << "XML error offset : " << result.offset << "\n";
abort(); abort();
} }
if ( toplev == std::string("") ) {
node_ = doc_;
} else {
node_ = doc_.child(toplev.c_str());
}
}
node_ = doc_.child("grid"); // Reader implementation ///////////////////////////////////////////////////////
XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName)
{
pugi::xml_parse_result result;
result = doc_.load_file(fileName_.c_str());
if ( !result ) {
cerr << "XML error description: " << result.description() << "\n";
cerr << "XML error offset : " << result.offset << "\n";
abort();
}
if ( toplev == std::string("") ) {
node_ = doc_;
} else {
node_ = doc_.child(toplev.c_str());
}
} }
bool XmlReader::push(const string &s) bool XmlReader::push(const string &s)

View File

@ -45,9 +45,8 @@ namespace Grid
class XmlWriter: public Writer<XmlWriter> class XmlWriter: public Writer<XmlWriter>
{ {
public: public:
XmlWriter(const std::string &fileName); XmlWriter(const std::string &fileName,std::string toplev = std::string("grid") );
virtual ~XmlWriter(void); virtual ~XmlWriter(void);
void push(const std::string &s); void push(const std::string &s);
void pop(void); void pop(void);
@ -55,6 +54,7 @@ namespace Grid
void writeDefault(const std::string &s, const U &x); void writeDefault(const std::string &s, const U &x);
template <typename U> template <typename U>
void writeDefault(const std::string &s, const std::vector<U> &x); void writeDefault(const std::string &s, const std::vector<U> &x);
std::string XmlString(void);
private: private:
pugi::xml_document doc_; pugi::xml_document doc_;
pugi::xml_node node_; pugi::xml_node node_;
@ -64,7 +64,8 @@ namespace Grid
class XmlReader: public Reader<XmlReader> class XmlReader: public Reader<XmlReader>
{ {
public: public:
XmlReader(const std::string &fileName); XmlReader(const char *xmlstring,std::string toplev = std::string("grid") );
XmlReader(const std::string &fileName,std::string toplev = std::string("grid") );
virtual ~XmlReader(void) = default; virtual ~XmlReader(void) = default;
bool push(const std::string &s); bool push(const std::string &s);
void pop(void); void pop(void);
@ -118,7 +119,7 @@ namespace Grid
std::string buf; std::string buf;
readDefault(s, buf); readDefault(s, buf);
std::cout << s << " " << buf << std::endl; // std::cout << s << " " << buf << std::endl;
fromString(output, buf); fromString(output, buf);
} }

View File

@ -281,8 +281,8 @@ namespace Optimization {
struct PrecisionChange { struct PrecisionChange {
static inline vech StoH (const vecf &a,const vecf &b) { static inline vech StoH (const vecf &a,const vecf &b) {
#ifdef USE_FP16
vech ret; vech ret;
#ifdef USE_FP16
vech *ha = (vech *)&a; vech *ha = (vech *)&a;
vech *hb = (vech *)&b; vech *hb = (vech *)&b;
const int nf = W<float>::r; const int nf = W<float>::r;
@ -493,6 +493,8 @@ namespace Optimization {
return a; return a;
} }
#undef acc // EIGEN compatibility
} }
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////

View File

@ -327,18 +327,16 @@ class Grid_simd {
// provides support // provides support
/////////////////////////////////////// ///////////////////////////////////////
#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 )
#pragma GCC push_options
#pragma GCC optimize ("O0")
#endif
template <class functor> template <class functor>
friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) { friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) {
Grid_simd ret; Grid_simd ret;
Grid_simd::conv_t conv; Grid_simd::conv_t conv;
Grid_simd::scalar_type s;
conv.v = v.v; conv.v = v.v;
for (int i = 0; i < Nsimd(); i++) { for (int i = 0; i < Nsimd(); i++) {
conv.s[i] = func(conv.s[i]); s = conv.s[i];
conv.s[i] = func(s);
} }
ret.v = conv.v; ret.v = conv.v;
return ret; return ret;
@ -350,18 +348,18 @@ class Grid_simd {
Grid_simd ret; Grid_simd ret;
Grid_simd::conv_t cx; Grid_simd::conv_t cx;
Grid_simd::conv_t cy; Grid_simd::conv_t cy;
Grid_simd::scalar_type sx,sy;
cx.v = x.v; cx.v = x.v;
cy.v = y.v; cy.v = y.v;
for (int i = 0; i < Nsimd(); i++) { for (int i = 0; i < Nsimd(); i++) {
cx.s[i] = func(cx.s[i], cy.s[i]); sx = cx.s[i];
sy = cy.s[i];
cx.s[i] = func(sx,sy);
} }
ret.v = cx.v; ret.v = cx.v;
return ret; return ret;
} }
#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 )
#pragma GCC pop_options
#endif
/////////////////////// ///////////////////////
// Exchange // Exchange
// Al Ah , Bl Bh -> Al Bl Ah,Bh // Al Ah , Bl Bh -> Al Bl Ah,Bh
@ -423,7 +421,6 @@ class Grid_simd {
}; // end of Grid_simd class definition }; // end of Grid_simd class definition
inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; } inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; }
inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; } inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }
inline void permute(RealD &y,RealD b, int perm) { y=b; } inline void permute(RealD &y,RealD b, int perm) { y=b; }
@ -754,8 +751,8 @@ inline Grid_simd<std::complex<R>, V> toComplex(const Grid_simd<R, V> &in) {
conv.v = in.v; conv.v = in.v;
for (int i = 0; i < Rsimd::Nsimd(); i += 2) { for (int i = 0; i < Rsimd::Nsimd(); i += 2) {
assert(conv.s[i + 1] == assert(conv.s[i + 1] == conv.s[i]);
conv.s[i]); // trap any cases where real was not duplicated // trap any cases where real was not duplicated
// indicating the SIMD grids of real and imag assignment did not correctly // indicating the SIMD grids of real and imag assignment did not correctly
// match // match
conv.s[i + 1] = 0.0; // zero imaginary parts conv.s[i + 1] = 0.0; // zero imaginary parts
@ -833,8 +830,6 @@ inline void precisionChange(vComplexD *out,vComplexF *in,int nvec){ precisionCha
inline void precisionChange(vComplexD *out,vComplexH *in,int nvec){ precisionChange((vRealD *)out,(vRealH *)in,nvec);} inline void precisionChange(vComplexD *out,vComplexH *in,int nvec){ precisionChange((vRealD *)out,(vRealH *)in,nvec);}
inline void precisionChange(vComplexF *out,vComplexH *in,int nvec){ precisionChange((vRealF *)out,(vRealH *)in,nvec);} inline void precisionChange(vComplexF *out,vComplexH *in,int nvec){ precisionChange((vRealF *)out,(vRealH *)in,nvec);}
// Check our vector types are of an appropriate size. // Check our vector types are of an appropriate size.
#if defined QPX #if defined QPX
static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect");
@ -849,21 +844,14 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc
///////////////////////////////////////// /////////////////////////////////////////
template <typename T> template <typename T>
struct is_simd : public std::false_type {}; struct is_simd : public std::false_type {};
template <> template <> struct is_simd<vRealF> : public std::true_type {};
struct is_simd<vRealF> : public std::true_type {}; template <> struct is_simd<vRealD> : public std::true_type {};
template <> template <> struct is_simd<vComplexF> : public std::true_type {};
struct is_simd<vRealD> : public std::true_type {}; template <> struct is_simd<vComplexD> : public std::true_type {};
template <> template <> struct is_simd<vInteger> : public std::true_type {};
struct is_simd<vComplexF> : public std::true_type {};
template <>
struct is_simd<vComplexD> : public std::true_type {};
template <>
struct is_simd<vInteger> : public std::true_type {};
template <typename T> template <typename T> using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >;
using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >; template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >;
template <typename T>
using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >;
} }
#endif #endif

View File

@ -179,13 +179,6 @@ inline Grid_simd<S, V> div(const Grid_simd<S, V> &r, Integer y) {
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Allows us to assign into **conformable** real vectors from complex // Allows us to assign into **conformable** real vectors from complex
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// template < class S, class V >
// inline auto ComplexRemove(const Grid_simd<S,V> &c) ->
// Grid_simd<Grid_simd<S,V>::Real,V> {
// Grid_simd<Grid_simd<S,V>::Real,V> ret;
// ret.v = c.v;
// return ret;
// }
template <class scalar> template <class scalar>
struct AndFunctor { struct AndFunctor {
scalar operator()(const scalar &x, const scalar &y) const { return x & y; } scalar operator()(const scalar &x, const scalar &y) const { return x & y; }

View File

@ -161,6 +161,13 @@ class iScalar {
return *this; return *this;
} }
// Convert elements
template <class ttype>
strong_inline iScalar<vtype> operator=(iScalar<ttype> &&arg) {
_internal = arg._internal;
return *this;
}
friend std::ostream &operator<<(std::ostream &stream,const iScalar<vtype> &o) { friend std::ostream &operator<<(std::ostream &stream,const iScalar<vtype> &o) {
stream << "S {" << o._internal << "}"; stream << "S {" << o._internal << "}";
return stream; return stream;

View File

@ -37,30 +37,108 @@ namespace Grid {
/////////////////////////////////////////////// ///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, ComplexD alpha , Integer Nexp = DEFAULT_MAT_EXP) template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = Exponentiate(r._internal, alpha, Nexp); ret._internal = Exponentiate(r._internal, alpha, Nexp);
return ret; return ret;
} }
template<class vtype, int N> inline iVector<vtype, N> Exponentiate(const iVector<vtype,N>&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP)
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, ComplexD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{ {
iMatrix<vtype,N> unit(1.0); iVector<vtype, N> ret;
iMatrix<vtype,N> temp(unit); for (int i = 0; i < N; i++)
ret._internal[i] = Exponentiate(r._internal[i], alpha, Nexp);
for(int i=Nexp; i>=1;--i){ return ret;
temp *= alpha/ComplexD(i);
temp = unit + temp*arg;
} }
// Specialisation: Cayley-Hamilton exponential for SU(3)
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{
// for SU(3) 2x faster than the std implementation using Nexp=12
// notice that it actually computes
// exp ( input matrix )
// the i sign is coming from outside
// input matrix is anti-hermitian NOT hermitian
typedef iMatrix<vtype,3> mat;
typedef iScalar<vtype> scalar;
mat unit(1.0);
mat temp(unit);
const Complex one_over_three = 1.0 / 3.0;
const Complex one_over_two = 1.0 / 2.0;
scalar c0, c1, tmp, c0max, theta, u, w;
scalar xi0, u2, w2, cosw;
scalar fden, h0, h1, h2;
scalar e2iu, emiu, ixi0, qt;
scalar f0, f1, f2;
scalar unity(1.0);
mat iQ2 = arg*arg*alpha*alpha;
mat iQ3 = arg*iQ2*alpha;
// sign in c0 from the conventions on the Ta
scalar imQ3, reQ2;
imQ3 = imag( trace(iQ3) );
reQ2 = real( trace(iQ2) );
c0 = -imQ3 * one_over_three;
c1 = -reQ2 * one_over_two;
// Cayley Hamilton checks to machine precision, tested
tmp = c1 * one_over_three;
c0max = 2.0 * pow(tmp, 1.5);
theta = acos(c0 / c0max) * one_over_three;
u = sqrt(tmp) * cos(theta);
w = sqrt(c1) * sin(theta);
xi0 = sin(w) / w;
u2 = u * u;
w2 = w * w;
cosw = cos(w);
ixi0 = timesI(xi0);
emiu = cos(u) - timesI(sin(u));
e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));
h0 = e2iu * (u2 - w2) +
emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0));
h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0);
h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0);
fden = unity / (9.0 * u2 - w2); // reals
f0 = h0 * fden;
f1 = h1 * fden;
f2 = h2 * fden;
return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2);
}
// General exponential
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{
// notice that it actually computes
// exp ( input matrix )
// the i sign is coming from outside
// input matrix is anti-hermitian NOT hermitian
typedef iMatrix<vtype,N> mat;
mat unit(1.0);
mat temp(unit);
for(int i=Nexp; i>=1;--i){
temp *= alpha/RealD(i);
temp = unit + temp*arg;
}
return temp; return temp;
} }
} }
#endif #endif

View File

@ -47,6 +47,28 @@ template<int Level>
class TensorIndexRecursion { class TensorIndexRecursion {
public: public:
////////////////////////////////////////////////////
// Type Queries
////////////////////////////////////////////////////
template<class vtype> static inline int indexRank(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::indexRank(tmp._internal); }
template<class vtype,int N> static inline int indexRank(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0]); }
template<class vtype,int N> static inline int indexRank(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0][0]); }
template<class vtype> static inline int isScalar(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isScalar(tmp._internal); }
template<class vtype,int N> static inline int isScalar(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0]); }
template<class vtype,int N> static inline int isScalar(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0][0]); }
template<class vtype> static inline int isVector(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isVector(tmp._internal); }
template<class vtype,int N> static inline int isVector(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0]); }
template<class vtype,int N> static inline int isVector(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0][0]); }
template<class vtype> static inline int isMatrix(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal); }
template<class vtype,int N> static inline int isMatrix(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0]); }
template<class vtype,int N> static inline int isMatrix(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0][0]); }
////////////////////////////////////////////////////
// Trace
////////////////////////////////////////////////////
template<class vtype> template<class vtype>
static auto traceIndex(const iScalar<vtype> arg) -> iScalar<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal))> static auto traceIndex(const iScalar<vtype> arg) -> iScalar<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal))>
{ {
@ -215,6 +237,24 @@ class TensorIndexRecursion {
template<> template<>
class TensorIndexRecursion<0> { class TensorIndexRecursion<0> {
public: public:
////////////////////////////////////////////////////
// Type Queries
////////////////////////////////////////////////////
template<class vtype> static inline int indexRank(const iScalar<vtype> tmp) { return 1; }
template<class vtype,int N> static inline int indexRank(const iVector<vtype,N> tmp){ return N; }
template<class vtype,int N> static inline int indexRank(const iMatrix<vtype,N> tmp){ return N; }
template<class vtype> static inline int isScalar(const iScalar<vtype> tmp) { return true;}
template<class vtype,int N> static inline int isScalar(const iVector<vtype,N> tmp){ return false;}
template<class vtype,int N> static inline int isScalar(const iMatrix<vtype,N> tmp){ return false;}
template<class vtype> static inline int isVector(const iScalar<vtype> tmp) { return false;}
template<class vtype,int N> static inline int isVector(const iVector<vtype,N> tmp){ return true;}
template<class vtype,int N> static inline int isVector(const iMatrix<vtype,N> tmp){ return false;}
template<class vtype> static inline int isMatrix(const iScalar<vtype> tmp) { return false;}
template<class vtype,int N> static inline int isMatrix(const iVector<vtype,N> tmp){ return false;}
template<class vtype,int N> static inline int isMatrix(const iMatrix<vtype,N> tmp){ return true;}
///////////////////////////////////////// /////////////////////////////////////////
// Ends recursion for trace (scalar/vector/matrix) // Ends recursion for trace (scalar/vector/matrix)
@ -302,6 +342,26 @@ class TensorIndexRecursion<0> {
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
// External wrappers // External wrappers
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Level,class vtype> inline int indexRank(void)
{
vtype tmp;
return TensorIndexRecursion<Level>::indexRank(tmp);
}
template<int Level,class vtype> inline int isScalar(void)
{
vtype tmp;
return TensorIndexRecursion<Level>::isScalar(tmp);
}
template<int Level,class vtype> inline int isVector(void)
{
vtype tmp;
return TensorIndexRecursion<Level>::isVector(tmp);
}
template<int Level,class vtype> inline int isMatrix(void)
{
vtype tmp;
return TensorIndexRecursion<Level>::isMatrix(tmp);
}
template<int Level,class vtype> inline auto traceIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion<Level>::traceIndex(arg)) template<int Level,class vtype> inline auto traceIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion<Level>::traceIndex(arg))
{ {

View File

@ -281,8 +281,8 @@ namespace Grid {
template<typename T> template<typename T>
class getPrecision{ class getPrecision{
public: public:
typedef typename getVectorType<T>::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor) //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor)
typedef typename getVectorType<T>::type vector_obj;
typedef typename GridTypeMapper<vector_obj>::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types typedef typename GridTypeMapper<vector_obj>::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types
typedef typename GridTypeMapper<scalar_type>::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type typedef typename GridTypeMapper<scalar_type>::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type

99
tests/IO/Test_ildg_io.cc Normal file
View File

@ -0,0 +1,99 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_nersc_io.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::cout <<GridLogMessage<< " main "<<std::endl;
std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
//std::vector<int> latt_size ({48,48,48,96});
//std::vector<int> latt_size ({32,32,32,32});
std::vector<int> latt_size ({16,16,16,32});
std::vector<int> clatt_size ({4,4,4,8});
int orthodir=3;
int orthosz =latt_size[orthodir];
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridCartesian Coarse(clatt_size,simd_layout,mpi_layout);
GridParallelRNG pRNGa(&Fine);
GridParallelRNG pRNGb(&Fine);
GridSerialRNG sRNGa;
GridSerialRNG sRNGb;
std::cout <<GridLogMessage<< " seeding... "<<std::endl;
pRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
sRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
std::cout <<GridLogMessage<< " ...done "<<std::endl;
LatticeGaugeField Umu(&Fine);
LatticeGaugeField Umu_diff(&Fine);
LatticeGaugeField Umu_saved(&Fine);
std::vector<LatticeColourMatrix> U(4,&Fine);
SU3::HotConfiguration(pRNGa,Umu);
FieldMetaData header;
std::cout <<GridLogMessage<<"**************************************"<<std::endl;
std::cout <<GridLogMessage<<"** Writing out ILDG conf *********"<<std::endl;
std::cout <<GridLogMessage<<"**************************************"<<std::endl;
std::string file("./ckpoint_ildg.4000");
IldgWriter _IldgWriter;
_IldgWriter.open(file);
_IldgWriter.writeConfiguration(Umu,4000,std::string("dummy_ildg_LFN"),std::string("dummy_config"));
_IldgWriter.close();
Umu_saved = Umu;
std::cout <<GridLogMessage<<"**************************************"<<std::endl;
std::cout <<GridLogMessage<<"** Reading back ILDG conf *********"<<std::endl;
std::cout <<GridLogMessage<<"**************************************"<<std::endl;
IldgReader _IldgReader;
_IldgReader.open(file);
_IldgReader.readConfiguration(Umu,header);
_IldgReader.close();
Umu_diff = Umu - Umu_saved;
std::cout <<GridLogMessage<< "norm2 Gauge Diff = "<<norm2(Umu_diff)<<std::endl;
Grid_finalize();
}

115
tests/IO/Test_ildg_read.cc Normal file
View File

@ -0,0 +1,115 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_nersc_io.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> latt_size = GridDefaultLatt();
int orthodir=3;
int orthosz =latt_size[orthodir];
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
LatticeGaugeField Umu(&Fine);
std::vector<LatticeColourMatrix> U(4,&Fine);
FieldMetaData header;
std::string file("./ildg.file");
IldgReader IR;
IR.open(file);
IR.readConfiguration(Umu,header);
IR.close();
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
// Painful ; fix syntactical niceness
LatticeComplex LinkTrace(&Fine);
LinkTrace=zero;
for(int mu=0;mu<Nd;mu++){
LinkTrace = LinkTrace + trace(U[mu]);
}
// (1+2+3)=6 = N(N-1)/2 terms
LatticeComplex Plaq(&Fine);
Plaq = zero;
for(int mu=1;mu<Nd;mu++){
for(int nu=0;nu<mu;nu++){
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
}
}
double vol = Fine.gSites();
Complex PlaqScale(1.0/vol/6.0/3.0);
std::cout<<GridLogMessage <<"PlaqScale" << PlaqScale<<std::endl;
std::vector<TComplex> Plaq_T(orthosz);
sliceSum(Plaq,Plaq_T,Nd-1);
int Nt = Plaq_T.size();
TComplex Plaq_T_sum;
Plaq_T_sum=zero;
for(int t=0;t<Nt;t++){
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
Complex Pt=TensorRemove(Plaq_T[t]);
std::cout<<GridLogMessage << "sliced ["<<t<<"]" <<Pt*PlaqScale*Real(Nt)<<std::endl;
}
{
Complex Pt = TensorRemove(Plaq_T_sum);
std::cout<<GridLogMessage << "total " <<Pt*PlaqScale<<std::endl;
}
TComplex Tp = sum(Plaq);
Complex p = TensorRemove(Tp);
std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl;
Complex LinkTraceScale(1.0/vol/4.0/3.0);
TComplex Tl = sum(LinkTrace);
Complex l = TensorRemove(Tl);
std::cout<<GridLogMessage << "calculated link trace " <<l*LinkTraceScale<<std::endl;
Grid_finalize();
}

View File

@ -38,10 +38,13 @@ int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
std::cout <<GridLogMessage<< " main "<<std::endl;
std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> latt_size ({16,16,16,16}); //std::vector<int> latt_size ({48,48,48,96});
//std::vector<int> latt_size ({32,32,32,32});
std::vector<int> latt_size ({16,16,16,32});
std::vector<int> clatt_size ({4,4,4,8}); std::vector<int> clatt_size ({4,4,4,8});
int orthodir=3; int orthodir=3;
int orthosz =latt_size[orthodir]; int orthosz =latt_size[orthodir];
@ -49,30 +52,32 @@ int main (int argc, char ** argv)
GridCartesian Fine(latt_size,simd_layout,mpi_layout); GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridCartesian Coarse(clatt_size,simd_layout,mpi_layout); GridCartesian Coarse(clatt_size,simd_layout,mpi_layout);
GridParallelRNG pRNGa(&Fine); GridParallelRNG pRNGa(&Fine);
GridParallelRNG pRNGb(&Fine); GridParallelRNG pRNGb(&Fine);
GridSerialRNG sRNGa; GridSerialRNG sRNGa;
GridSerialRNG sRNGb; GridSerialRNG sRNGb;
std::cout <<GridLogMessage<< " seeding... "<<std::endl;
pRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9})); pRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
sRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9})); sRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
std::cout <<GridLogMessage<< " ...done "<<std::endl;
std::string rfile("./ckpoint_rng.4000"); std::string rfile("./ckpoint_rng.4000");
FieldMetaData rngheader;
NerscIO::writeRNGState(sRNGa,pRNGa,rfile); NerscIO::writeRNGState(sRNGa,pRNGa,rfile);
NerscField rngheader;
NerscIO::readRNGState (sRNGb,pRNGb,rngheader,rfile); NerscIO::readRNGState (sRNGb,pRNGb,rngheader,rfile);
LatticeComplex tmpa(&Fine); random(pRNGa,tmpa); LatticeComplex tmpa(&Fine); random(pRNGa,tmpa);
LatticeComplex tmpb(&Fine); random(pRNGb,tmpb); LatticeComplex tmpb(&Fine); random(pRNGb,tmpb);
tmpa = tmpa - tmpb; tmpa = tmpa - tmpb;
std::cout << " difference between restored randoms and orig "<<norm2( tmpa ) <<" / "<< norm2(tmpb)<<std::endl; std::cout <<GridLogMessage<< " difference between restored randoms and orig "<<norm2( tmpa ) <<" / "<< norm2(tmpb)<<std::endl;
ComplexD a,b; ComplexD a,b;
random(sRNGa,a); random(sRNGa,a);
random(sRNGb,b); random(sRNGb,b);
std::cout << " serial RNG numbers "<<a<<" "<<b<<std::endl; std::cout <<GridLogMessage<< " serial RNG numbers "<<a<<" "<<b<<std::endl;
LatticeGaugeField Umu(&Fine); LatticeGaugeField Umu(&Fine);
LatticeGaugeField Umu_diff(&Fine); LatticeGaugeField Umu_diff(&Fine);
@ -80,15 +85,20 @@ int main (int argc, char ** argv)
std::vector<LatticeColourMatrix> U(4,&Fine); std::vector<LatticeColourMatrix> U(4,&Fine);
SU3::ColdConfiguration(pRNGa,Umu); SU3::HotConfiguration(pRNGa,Umu);
NerscField header; FieldMetaData header;
std::string file("./ckpoint_lat.4000"); std::string file("./ckpoint_lat.4000");
int precision32 = 0; int precision32 = 0;
int tworow = 0; int tworow = 0;
NerscIO::writeConfiguration(Umu,file,tworow,precision32); NerscIO::writeConfiguration(Umu,file,tworow,precision32);
Umu_saved = Umu;
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);
Umu_diff = Umu - Umu_saved;
//std::cout << "Umu_save "<<Umu_saved[0]<<std::endl;
//std::cout << "Umu_read "<<Umu[0]<<std::endl;
std::cout <<GridLogMessage<< "norm2 Gauge Diff = "<<norm2(Umu_diff)<<std::endl;
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu); U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
@ -115,7 +125,6 @@ int main (int argc, char ** argv)
#endif #endif
double vol = Fine.gSites(); double vol = Fine.gSites();
Complex PlaqScale(1.0/vol/6.0/3.0); Complex PlaqScale(1.0/vol/6.0/3.0);
std::cout<<GridLogMessage <<"PlaqScale" << PlaqScale<<std::endl;
std::vector<TComplex> Plaq_T(orthosz); std::vector<TComplex> Plaq_T(orthosz);
sliceSum(Plaq,Plaq_T,Nd-1); sliceSum(Plaq,Plaq_T,Nd-1);
@ -139,7 +148,6 @@ int main (int argc, char ** argv)
Complex p = TensorRemove(Tp); Complex p = TensorRemove(Tp);
std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl; std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl;
Complex LinkTraceScale(1.0/vol/4.0/3.0); Complex LinkTraceScale(1.0/vol/4.0/3.0);
TComplex Tl = sum(LinkTrace); TComplex Tl = sum(LinkTrace);
Complex l = TensorRemove(Tl); Complex l = TensorRemove(Tl);

View File

@ -50,7 +50,7 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(&Fine); LatticeGaugeField Umu(&Fine);
std::vector<LatticeColourMatrix> U(4,&Fine); std::vector<LatticeColourMatrix> U(4,&Fine);
NerscField header; FieldMetaData header;
std::string file("./ckpoint_lat"); std::string file("./ckpoint_lat");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);

View File

@ -31,6 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
using namespace Grid; using namespace Grid;
using namespace Grid::QCD;
GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3); GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3);

View File

@ -0,0 +1,110 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_cg_prec.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
const int Ls=24;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f);
GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f);
GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeFermionD src(FGrid); random(RNG5,src);
LatticeFermionD result(FGrid); result=zero;
LatticeGaugeFieldD Umu(UGrid);
LatticeGaugeFieldF Umu_f(UGrid_f);
SU3::HotConfiguration(RNG4,Umu);
precisionChange(Umu_f,Umu);
RealD mass=0.1;
RealD M5=1.8;
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
DomainWallFermionFH Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5);
LatticeFermionD src_o(FrbGrid);
LatticeFermionD result_o(FrbGrid);
LatticeFermionD result_o_2(FrbGrid);
pickCheckerboard(Odd,src_o,src);
result_o.checkerboard = Odd;
result_o = zero;
result_o_2.checkerboard = Odd;
result_o_2 = zero;
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
SchurDiagMooeeOperator<DomainWallFermionFH,LatticeFermionF> HermOpEO_f(Ddwf_f);
std::cout << "Starting mixed CG" << std::endl;
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
mCG.InnerTolerance = 3.0e-5;
mCG(src_o,result_o);
std::cout << "Starting regular CG" << std::endl;
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
CG(HermOpEO,src_o,result_o_2);
LatticeFermionD diff_o(FrbGrid);
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
std::cout << "Diff between mixed and regular CG: " << diff << std::endl;
Grid_finalize();
}

View File

@ -73,7 +73,7 @@ int main (int argc, char ** argv)
std::vector<LatticeColourMatrix> U(4,&Fine); std::vector<LatticeColourMatrix> U(4,&Fine);
NerscField header; FieldMetaData header;
std::string file("./ckpoint_lat.4000"); std::string file("./ckpoint_lat.4000");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);

View File

@ -90,7 +90,7 @@ int main (int argc, char ** argv)
std::vector<LatticeColourMatrix> U(4,&Fine); std::vector<LatticeColourMatrix> U(4,&Fine);
NerscField header; FieldMetaData header;
std::string file("./ckpoint_lat.4000"); std::string file("./ckpoint_lat.4000");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);

View File

@ -28,212 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
using namespace Grid;
using namespace Grid::QCD;
template <class Gimpl>
class FourierAcceleratedGaugeFixer : public Gimpl {
public:
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
for(int mu=0;mu<Nd;mu++){
// ImplComplex cmi(0.0,-1.0);
Complex cmi(0.0,-1.0);
A[mu] = Ta(U[mu]) * cmi;
}
}
static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) {
dmuAmu=zero;
for(int mu=0;mu<Nd;mu++){
dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
}
}
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol) {
GridBase *grid = Umu._grid;
Real org_plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
Real old_trace = org_link_trace;
Real trG;
std::vector<GaugeMat> U(Nd,grid);
GaugeMat dmuAmu(grid);
for(int i=0;i<maxiter;i++){
for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu);
//trG = SteepestDescentStep(U,alpha,dmuAmu);
trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu);
for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu);
// Monitor progress and convergence test
// infrequently to minimise cost overhead
if ( i %20 == 0 ) {
Real plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
Real Phi = 1.0 - old_trace / link_trace ;
Real Omega= 1.0 - trG;
std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl;
if ( (Omega < Omega_tol) && ( ::fabs(Phi) < Phi_tol) ) {
std::cout << GridLogMessage << "Converged ! "<<std::endl;
return;
}
old_trace = link_trace;
}
}
};
static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
GridBase *grid = U[0]._grid;
std::vector<GaugeMat> A(Nd,grid);
GaugeMat g(grid);
GaugeLinkToLieAlgebraField(U,A);
ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);
Real vol = grid->gSites();
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
GridBase *grid = U[0]._grid;
Real vol = grid->gSites();
FFT theFFT((GridCartesian *)grid);
LatticeComplex Fp(grid);
LatticeComplex psq(grid); psq=zero;
LatticeComplex pmu(grid);
LatticeComplex one(grid); one = Complex(1.0,0.0);
GaugeMat g(grid);
GaugeMat dmuAmu_p(grid);
std::vector<GaugeMat> A(Nd,grid);
GaugeLinkToLieAlgebraField(U,A);
DmuAmu(A,dmuAmu);
theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward);
//////////////////////////////////
// Work out Fp = psq_max/ psq...
//////////////////////////////////
std::vector<int> latt_size = grid->GlobalDimensions();
std::vector<int> coor(grid->_ndimension,0);
for(int mu=0;mu<Nd;mu++) {
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);
}
Complex psqMax(16.0);
Fp = psqMax*one/psq;
/*
static int once;
if ( once == 0 ) {
std::cout << " Fp " << Fp <<std::endl;
once ++;
}*/
pokeSite(TComplex(1.0),Fp,coor);
dmuAmu_p = dmuAmu_p * Fp;
theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);
GaugeMat ciadmam(grid);
Complex cialpha(0.0,-alpha);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
GridBase *grid = g._grid;
Complex cialpha(0.0,-alpha);
GaugeMat ciadmam(grid);
DmuAmu(A,dmuAmu);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
}
/*
////////////////////////////////////////////////////////////////
// NB The FT for fields living on links has an extra phase in it
// Could add these to the FFT class as a later task since this code
// might be reused elsewhere ????
////////////////////////////////////////////////////////////////
static void InverseFourierTransformAmu(FFT &theFFT,const std::vector<GaugeMat> &Ap,std::vector<GaugeMat> &Ax) {
GridBase * grid = theFFT.Grid();
std::vector<int> latt_size = grid->GlobalDimensions();
ComplexField pmu(grid);
ComplexField pha(grid);
GaugeMat Apha(grid);
Complex ci(0.0,1.0);
for(int mu=0;mu<Nd;mu++){
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
pha = exp(pmu * (0.5 *ci)); // e(ipmu/2) since Amu(x+mu/2)
Apha = Ap[mu] * pha;
theFFT.FFT_all_dim(Apha,Ax[mu],FFT::backward);
}
}
static void FourierTransformAmu(FFT & theFFT,const std::vector<GaugeMat> &Ax,std::vector<GaugeMat> &Ap) {
GridBase * grid = theFFT.Grid();
std::vector<int> latt_size = grid->GlobalDimensions();
ComplexField pmu(grid);
ComplexField pha(grid);
Complex ci(0.0,1.0);
// Sign convention for FFTW calls:
// A(x)= Sum_p e^ipx A(p) / V
// A(p)= Sum_p e^-ipx A(x)
for(int mu=0;mu<Nd;mu++){
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
pha = exp(-pmu * (0.5 *ci)); // e(+ipmu/2) since Amu(x+mu/2)
theFFT.FFT_all_dim(Ax[mu],Ap[mu],FFT::backward);
Ap[mu] = Ap[mu] * pha;
}
}
*/
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
std::vector<int> seeds({1,2,3,4}); std::vector<int> seeds({1,2,3,4});
@ -264,22 +58,24 @@ int main (int argc, char ** argv)
std::cout<< "*****************************************************************" <<std::endl; std::cout<< "*****************************************************************" <<std::endl;
LatticeGaugeField Umu(&GRID); LatticeGaugeField Umu(&GRID);
LatticeGaugeField Urnd(&GRID);
LatticeGaugeField Uorg(&GRID); LatticeGaugeField Uorg(&GRID);
LatticeColourMatrix g(&GRID); // Gauge xform LatticeColourMatrix g(&GRID); // Gauge xform
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge SU3::ColdConfiguration(pRNG,Umu); // Unit gauge
Uorg=Umu; Uorg=Umu;
Urnd=Umu;
SU3::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge
SU3::RandomGaugeTransform(pRNG,Umu,g); // Unit gauge
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Initial plaquette "<<plaq << std::endl; std::cout << " Initial plaquette "<<plaq << std::endl;
Real alpha=0.1; Real alpha=0.1;
FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10);
Umu = Urnd;
FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,false);
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Final plaquette "<<plaq << std::endl; std::cout << " Final plaquette "<<plaq << std::endl;
@ -288,14 +84,28 @@ int main (int argc, char ** argv)
std::cout << " Norm Difference "<< norm2(Uorg) << std::endl; std::cout << " Norm Difference "<< norm2(Uorg) << std::endl;
// std::cout<< "*****************************************************************" <<std::endl; std::cout<< "*****************************************************************" <<std::endl;
// std::cout<< "* Testing Fourier accelerated fixing *" <<std::endl; std::cout<< "* Testing Fourier accelerated fixing *" <<std::endl;
// std::cout<< "*****************************************************************" <<std::endl; std::cout<< "*****************************************************************" <<std::endl;
Umu=Urnd;
FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true);
// std::cout<< "*****************************************************************" <<std::endl; plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
// std::cout<< "* Testing non-unit configuration *" <<std::endl; std::cout << " Final plaquette "<<plaq << std::endl;
// std::cout<< "*****************************************************************" <<std::endl;
std::cout<< "*****************************************************************" <<std::endl;
std::cout<< "* Testing non-unit configuration *" <<std::endl;
std::cout<< "*****************************************************************" <<std::endl;
SU3::HotConfiguration(pRNG,Umu); // Unit gauge
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Initial plaquette "<<plaq << std::endl;
FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true);
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Final plaquette "<<plaq << std::endl;
Grid_finalize(); Grid_finalize();

View File

@ -336,7 +336,7 @@ int main(int argc, char **argv) {
std::cout << GridLogMessage << "norm cMmat : " << norm2(cMat) std::cout << GridLogMessage << "norm cMmat : " << norm2(cMat)
<< std::endl; << std::endl;
cMat = expMat(cMat, ComplexD(1.0, 0.0)); cMat = expMat(cMat,1.0);// ComplexD(1.0, 0.0));
std::cout << GridLogMessage << "norm expMat: " << norm2(cMat) std::cout << GridLogMessage << "norm expMat: " << norm2(cMat)
<< std::endl; << std::endl;
peekSite(cm, cMat, mysite); peekSite(cm, cMat, mysite);

View File

@ -67,7 +67,7 @@ int main (int argc, char ** argv)
LatticeFermion err(FGrid); LatticeFermion err(FGrid);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
NerscField header; FieldMetaData header;
std::string file("./ckpoint_lat.400"); std::string file("./ckpoint_lat.400");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);

View File

@ -133,8 +133,8 @@ int main (int argc, char ** argv)
int Nconv; int Nconv;
RealD eresid = 1.0e-6; RealD eresid = 1.0e-6;
ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nm,eresid,Nit); ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit);
ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit); ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit);
LatticeComplex src(grid); gaussian(RNG,src); LatticeComplex src(grid); gaussian(RNG,src);
{ {

View File

@ -139,7 +139,7 @@ int main (int argc, char ** argv)
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -150,7 +150,7 @@ int main (int argc, char ** argv)
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -194,9 +194,9 @@ int main (int argc, char ** argv)
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
Complex dSm = sum(dSmom); ComplexD dSm = sum(dSmom);
Complex dSm2 = sum(dSmom2); ComplexD dSm2 = sum(dSmom2);
std::cout << GridLogMessage <<"Initial mom hamiltonian is "<< Hmom <<std::endl; std::cout << GridLogMessage <<"Initial mom hamiltonian is "<< Hmom <<std::endl;

View File

@ -113,7 +113,7 @@ int main (int argc, char ** argv)
dS = dS - trace(mommu*UdSdUmu)*dt*2.0; dS = dS - trace(mommu*UdSdUmu)*dt*2.0;
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -143,7 +143,7 @@ int main (int argc, char ** argv)
dS = dS+trace(mommu*forcemu)*dt; dS = dS+trace(mommu*forcemu)*dt;
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
// From TwoFlavourPseudoFermion: // From TwoFlavourPseudoFermion:
////////////////////////////////////////////////////// //////////////////////////////////////////////////////

View File

@ -143,7 +143,7 @@ int main (int argc, char ** argv)
dS = dS+trace(mommu*forcemu)*dt; dS = dS+trace(mommu*forcemu)*dt;
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -128,7 +128,7 @@ int main (int argc, char ** argv)
dS = dS + trace(mommu*UdSdUmu)*dt*2.0; dS = dS + trace(mommu*UdSdUmu)*dt*2.0;
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -141,7 +141,7 @@ int main (int argc, char ** argv)
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " -- S "<<S<<std::endl; std::cout << GridLogMessage << " -- S "<<S<<std::endl;
std::cout << GridLogMessage << " -- Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " -- Sprime "<<Sprime<<std::endl;

View File

@ -141,7 +141,7 @@ int main (int argc, char ** argv)
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -112,7 +112,7 @@ int main (int argc, char ** argv)
dS = dS - trace(mommu*UdSdUmu)*dt*2.0; dS = dS - trace(mommu*UdSdUmu)*dt*2.0;
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -178,9 +178,9 @@ int main (int argc, char ** argv)
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
Complex dSm = sum(dSmom); ComplexD dSm = sum(dSmom);
Complex dSm2 = sum(dSmom2); ComplexD dSm2 = sum(dSmom2);
std::cout << GridLogMessage <<"Initial mom hamiltonian is "<< Hmom <<std::endl; std::cout << GridLogMessage <<"Initial mom hamiltonian is "<< Hmom <<std::endl;

View File

@ -155,7 +155,7 @@ int main (int argc, char ** argv)
} }
Complex dSpred = sum(dS); ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl; std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;

View File

@ -61,6 +61,10 @@ int main(int argc, char *argv[])
// gauge field // gauge field
application.createModule<MGauge::Unit>("gauge"); application.createModule<MGauge::Unit>("gauge");
// set fermion boundary conditions to be periodic space, antiperiodic time.
std::string boundary = "1 1 1 -1";
for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int i = 0; i < flavour.size(); ++i)
{ {
// actions // actions
@ -69,6 +73,7 @@ int main(int argc, char *argv[])
actionPar.Ls = 12; actionPar.Ls = 12;
actionPar.M5 = 1.8; actionPar.M5 = 1.8;
actionPar.mass = mass[i]; actionPar.mass = mass[i];
actionPar.boundary = boundary;
application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar);
// solvers // solvers

View File

@ -98,6 +98,10 @@ int main(int argc, char *argv[])
gaugePar.file = configStem; gaugePar.file = configStem;
application.createModule<MGauge::Load>("gauge", gaugePar); application.createModule<MGauge::Load>("gauge", gaugePar);
} }
// set fermion boundary conditions to be periodic space, antiperiodic time.
std::string boundary = "1 1 1 -1";
for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int i = 0; i < flavour.size(); ++i)
{ {
// actions // actions
@ -106,6 +110,7 @@ int main(int argc, char *argv[])
actionPar.Ls = 16; actionPar.Ls = 16;
actionPar.M5 = 1.8; actionPar.M5 = 1.8;
actionPar.mass = mass[i]; actionPar.mass = mass[i];
actionPar.boundary = boundary;
application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar);
// solvers // solvers

View File

@ -63,6 +63,10 @@ int main(int argc, char *argv[])
MSource::Point::Par ptPar; MSource::Point::Par ptPar;
ptPar.position = "0 0 0 0"; ptPar.position = "0 0 0 0";
application.createModule<MSource::Point>("pt", ptPar); application.createModule<MSource::Point>("pt", ptPar);
// set fermion boundary conditions to be periodic space, antiperiodic time.
std::string boundary = "1 1 1 -1";
for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int i = 0; i < flavour.size(); ++i)
{ {
// actions // actions
@ -71,6 +75,7 @@ int main(int argc, char *argv[])
actionPar.Ls = 12; actionPar.Ls = 12;
actionPar.M5 = 1.8; actionPar.M5 = 1.8;
actionPar.mass = mass[i]; actionPar.mass = mass[i];
actionPar.boundary = boundary;
application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar);
// solvers // solvers

View File

@ -28,6 +28,38 @@ directory
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
namespace Grid{
struct WFParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters,
int, steps,
double, step_size,
int, meas_interval,
double, maxTau); // for the adaptive algorithm
template <class ReaderClass >
WFParameters(Reader<ReaderClass>& Reader){
read(Reader, "WilsonFlow", *this);
}
};
struct ConfParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(ConfParameters,
std::string, conf_prefix,
std::string, rng_prefix,
int, StartConfiguration,
int, EndConfiguration,
int, Skip);
template <class ReaderClass >
ConfParameters(Reader<ReaderClass>& Reader){
read(Reader, "Configurations", *this);
}
};
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
@ -42,22 +74,38 @@ int main(int argc, char **argv) {
GridRedBlackCartesian RBGrid(latt_size, simd_layout, mpi_layout); GridRedBlackCartesian RBGrid(latt_size, simd_layout, mpi_layout);
std::vector<int> seeds({1, 2, 3, 4, 5}); std::vector<int> seeds({1, 2, 3, 4, 5});
GridSerialRNG sRNG;
GridParallelRNG pRNG(&Grid); GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid), Uflow(&Grid); LatticeGaugeField Umu(&Grid), Uflow(&Grid);
SU<Nc>::HotConfiguration(pRNG, Umu); SU<Nc>::HotConfiguration(pRNG, Umu);
typedef Grid::JSONReader Serialiser;
Serialiser Reader("input.json");
WFParameters WFPar(Reader);
ConfParameters CPar(Reader);
CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix);
BinaryHmcCheckpointer<PeriodicGimplR> CPBin(CPPar);
for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){
CPBin.CheckpointRestore(conf, Umu, sRNG, pRNG);
std::cout << std::setprecision(15); std::cout << std::setprecision(15);
std::cout << GridLogMessage << "Plaquette: " std::cout << GridLogMessage << "Initial plaquette: "
<< WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl; << WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl;
WilsonFlow<PeriodicGimplR> WF(200, 0.01); WilsonFlow<PeriodicGimplR> WF(WFPar.steps, WFPar.step_size, WFPar.meas_interval);
WF.smear(Uflow, Umu); WF.smear_adaptive(Uflow, Umu, WFPar.maxTau);
RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow); RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
std::cout << GridLogMessage << "Plaquette: "<< WFlow_plaq << std::endl; RealD WFlow_TC = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow);
RealD WFlow_T0 = WF.energyDensityPlaquette(Uflow);
std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl;
std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl;
std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl;
std::cout<< GridLogMessage << " Admissibility check:\n"; std::cout<< GridLogMessage << " Admissibility check:\n";
const double sp_adm = 0.067; // admissible threshold const double sp_adm = 0.067; // admissible threshold
@ -73,6 +121,32 @@ int main(int argc, char **argv) {
std::cout<< GridLogMessage << " (sp_admissible = "<< sp_adm <<")\n"; std::cout<< GridLogMessage << " (sp_admissible = "<< sp_adm <<")\n";
//std::cout<< GridLogMessage << " sp_admissible - sp_max = "<<sp_adm-sp_max <<"\n"; //std::cout<< GridLogMessage << " sp_admissible - sp_max = "<<sp_adm-sp_max <<"\n";
std::cout<< GridLogMessage << " sp_admissible - sp_ave = "<<sp_adm-sp_ave <<"\n"; std::cout<< GridLogMessage << " sp_admissible - sp_ave = "<<sp_adm-sp_ave <<"\n";
}
Grid_finalize(); Grid_finalize();
} // main } // main
/*
Input file example
JSON
{
"WilsonFlow":{
"steps": 200,
"step_size": 0.01,
"meas_interval": 50,
"maxTau": 2.0
},
"Configurations":{
"conf_prefix": "ckpoint_lat",
"rng_prefix": "ckpoint_rng",
"StartConfiguration": 3000,
"EndConfiguration": 3000,
"Skip": 5
}
}
*/

View File

@ -516,7 +516,7 @@ int main (int argc, char ** argv)
LatticeColourMatrix U(UGrid); LatticeColourMatrix U(UGrid);
LatticeColourMatrix zz(UGrid); LatticeColourMatrix zz(UGrid);
NerscField header; FieldMetaData header;
std::string file("./ckpoint_lat.4000"); std::string file("./ckpoint_lat.4000");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);

View File

@ -54,7 +54,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::TepidConfiguration(RNG4, Umu); SU3::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
@ -92,16 +92,15 @@ int main (int argc, char ** argv)
std::vector<RealD> eval(Nm); std::vector<RealD> eval(Nm);
FermionField src(FrbGrid); gaussian(RNG5rb,src); FermionField src(FrbGrid);
gaussian(RNG5rb,src);
std::vector<FermionField> evec(Nm,FrbGrid); std::vector<FermionField> evec(Nm,FrbGrid);
for(int i=0;i<1;i++){ for(int i=0;i<1;i++){
std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl; std::cout << GridLogMessage <<i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
}; };
int Nconv; int Nconv;
IRL.calc(eval,evec, IRL.calc(eval,evec,src,Nconv);
src,
Nconv);
Grid_finalize(); Grid_finalize();

View File

@ -51,7 +51,7 @@ int main (int argc, char ** argv)
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
typename ImprovedStaggeredFermion5DR::ImplParams params; typename ImprovedStaggeredFermion5DR::ImplParams params;
const int Ls=4; const int Ls=8;
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -74,17 +74,19 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
RealD mass=0.01; RealD mass=0.003;
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass); ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass);
MdagMLinearOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds); MdagMLinearOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds);
ConjugateGradient<FermionField> CG(1.0e-8,10000); ConjugateGradient<FermionField> CG(1.0e-8,10000);
BlockConjugateGradient<FermionField> BCG(1.0e-8,10000); int blockDim = 0;
MultiRHSConjugateGradient<FermionField> mCG(1.0e-8,10000); BlockConjugateGradient<FermionField> BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000);
BlockConjugateGradient<FermionField> BCG (BlockCG,blockDim,1.0e-8,10000);
BlockConjugateGradient<FermionField> mCG (CGmultiRHS,blockDim,1.0e-8,10000);
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling 4d CG "<<std::endl; std::cout << GridLogMessage << " Calling 4d CG "<<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass); ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass);
MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp4d(Ds4d); MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp4d(Ds4d);
FermionField src4d(UGrid); random(pRNG,src4d); FermionField src4d(UGrid); random(pRNG,src4d);
@ -111,7 +113,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl; std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
result=zero; result=zero;
BCG(HermOp,src,result); BCGrQ(HermOp,src,result);
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "************************************************************************ "<<std::endl;