1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-24 12:45:56 +01:00

Merge branch 'develop' of https://github.com/paboyle/Grid into merge2

This commit is contained in:
Chulwoo Jung 2018-06-12 19:08:06 -04:00
commit 855b249f57
67 changed files with 5028 additions and 2406 deletions

View File

@ -1,4 +1,4 @@
Version : 0.7.0 Version : 0.8.0
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
- MPI and MPI3 comms optimisations for KNL and OPA finished - MPI and MPI3 comms optimisations for KNL and OPA finished

108
benchmarks/Benchmark_IO.cc Normal file
View File

@ -0,0 +1,108 @@
#include <Grid/Grid.h>
#ifdef HAVE_LIME
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
#define MSG cout << GridLogMessage
#define SEP \
"============================================================================="
#ifndef BENCH_IO_LMAX
#define BENCH_IO_LMAX 40
#endif
typedef function<void(const string, LatticeFermion &)> WriterFn;
typedef function<void(LatticeFermion &, const string)> ReaderFn;
string filestem(const int l)
{
return "iobench_l" + to_string(l);
}
void limeWrite(const string filestem, LatticeFermion &vec)
{
emptyUserRecord record;
ScidacWriter binWriter(vec._grid->IsBoss());
binWriter.open(filestem + ".bin");
binWriter.writeScidacFieldRecord(vec, record);
binWriter.close();
}
void limeRead(LatticeFermion &vec, const string filestem)
{
emptyUserRecord record;
ScidacReader binReader;
binReader.open(filestem + ".bin");
binReader.readScidacFieldRecord(vec, record);
binReader.close();
}
void writeBenchmark(const int l, const WriterFn &write)
{
auto mpi = GridDefaultMpi();
auto simd = GridDefaultSimd(Nd, vComplex::Nsimd());
vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
unique_ptr<GridCartesian> gPt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
GridCartesian *g = gPt.get();
GridParallelRNG rng(g);
LatticeFermion vec(g);
emptyUserRecord record;
ScidacWriter binWriter(g->IsBoss());
cout << "-- Local volume " << l << "^4" << endl;
random(rng, vec);
write(filestem(l), vec);
}
void readBenchmark(const int l, const ReaderFn &read)
{
auto mpi = GridDefaultMpi();
auto simd = GridDefaultSimd(Nd, vComplex::Nsimd());
vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
unique_ptr<GridCartesian> gPt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
GridCartesian *g = gPt.get();
LatticeFermion vec(g);
emptyUserRecord record;
ScidacReader binReader;
cout << "-- Local volume " << l << "^4" << endl;
read(vec, filestem(l));
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
auto simd = GridDefaultSimd(Nd,vComplex::Nsimd());
auto mpi = GridDefaultMpi();
int64_t threads = GridThread::GetThreads();
MSG << "Grid is setup to use " << threads << " threads" << endl;
MSG << SEP << endl;
MSG << "Benchmark Lime write" << endl;
MSG << SEP << endl;
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
{
writeBenchmark(l, limeWrite);
}
MSG << "Benchmark Lime read" << endl;
MSG << SEP << endl;
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
{
readBenchmark(l, limeRead);
}
Grid_finalize();
return EXIT_SUCCESS;
}
#else
int main (int argc, char ** argv)
{
return EXIT_SUCCESS;
}
#endif

View File

@ -158,8 +158,10 @@ public:
dbytes=0; dbytes=0;
ncomm=0; ncomm=0;
#ifdef GRID_OMP
parallel_for(int dir=0;dir<8;dir++){ #pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
#endif
for(int dir=0;dir<8;dir++){
double tbytes; double tbytes;
int mu =dir % 4; int mu =dir % 4;
@ -175,9 +177,14 @@ public:
int comm_proc = mpi_layout[mu]-1; int comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
} }
#ifdef GRID_OMP
int tid = omp_get_thread_num();
#else
int tid = dir;
#endif
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank, (void *)&rbuf[dir][0], recv_from_rank,
bytes,dir); bytes,tid);
#ifdef GRID_OMP #ifdef GRID_OMP
#pragma omp atomic #pragma omp atomic

View File

@ -169,7 +169,11 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){ for(int Ls=8;Ls<=8;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat}); std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors; RealD Nrank = Grid._Nprocessors;
@ -446,7 +450,7 @@ int main (int argc, char ** argv)
} }
#ifdef GRID_OMP
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
@ -485,7 +489,8 @@ int main (int argc, char ** argv)
dbytes=0; dbytes=0;
ncomm=0; ncomm=0;
parallel_for(int dir=0;dir<8;dir++){ #pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
for(int dir=0;dir<8;dir++){
double tbytes; double tbytes;
int mu =dir % 4; int mu =dir % 4;
@ -502,9 +507,9 @@ int main (int argc, char ** argv)
int comm_proc = mpi_layout[mu]-1; int comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
} }
int tid = omp_get_thread_num();
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank, bytes,dir); (void *)&rbuf[dir][0], recv_from_rank, bytes,tid);
#pragma omp atomic #pragma omp atomic
dbytes+=tbytes; dbytes+=tbytes;
@ -532,7 +537,7 @@ int main (int argc, char ** argv)
} }
} }
#endif
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;

View File

@ -55,7 +55,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=96; uint64_t lmax=64;
#define NLOOP (10*lmax*lmax*lmax*lmax/vol) #define NLOOP (10*lmax*lmax*lmax*lmax/vol)
for(int lat=8;lat<=lmax;lat+=8){ for(int lat=8;lat<=lmax;lat+=8){

View File

@ -35,9 +35,11 @@ using namespace Grid::QCD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
#define LMAX (64) #define LMAX (32)
#define LMIN (16)
#define LINC (4)
int64_t Nloop=20; int64_t Nloop=2000;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_layout = GridDefaultMpi();
@ -51,7 +53,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -83,7 +85,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -114,7 +116,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -145,7 +147,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -165,10 +167,87 @@ int main (int argc, char ** argv)
double time = (stop-start)/Nloop*1000.0; double time = (stop-start)/Nloop*1000.0;
double bytes=3*vol*Nc*Nc*sizeof(Complex); double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(8+8+8)*vol; double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl; std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
} }
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
for(int mu=0;mu<4;mu++){
double start=usecond();
for(int64_t i=0;i<Nloop;i++){
z = PeriodicBC::CovShiftForward(x,mu,y);
}
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
}
#if 1
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
LatticeColourMatrix tmp(&Grid);
for(int mu=0;mu<4;mu++){
double tshift=0;
double tmult =0;
double start=usecond();
for(int64_t i=0;i<Nloop;i++){
tshift-=usecond();
tmp = Cshift(y,mu,-1);
tshift+=usecond();
tmult-=usecond();
z = x*tmp;
tmult+=usecond();
}
double stop=usecond();
double time = (stop-start)/Nloop;
tshift = tshift/Nloop;
tmult = tmult /Nloop;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
time = time * 1000; // convert to NS for GB/s
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
}
#endif
Grid_finalize(); Grid_finalize();
} }

View File

@ -340,7 +340,7 @@ case ${ac_PRECISION} in
esac esac
###################### Shared memory allocation technique under MPI3 ###################### Shared memory allocation technique under MPI3
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs], AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone],
[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen])
case ${ac_SHM} in case ${ac_SHM} in
@ -349,6 +349,14 @@ case ${ac_SHM} in
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
;; ;;
shmget)
AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
;;
shmnone)
AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] )
;;
hugetlbfs) hugetlbfs)
AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] )
;; ;;
@ -362,7 +370,7 @@ esac
AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
[Select SHM mmap base path for hugetlbfs])], [Select SHM mmap base path for hugetlbfs])],
[ac_SHMPATH=${enable_shmpath}], [ac_SHMPATH=${enable_shmpath}],
[ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/]) [ac_SHMPATH=/var/lib/hugetlbfs/global/pagesize-2MB/])
AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing])
############### communication type selection ############### communication type selection

View File

@ -479,15 +479,13 @@ until convergence
Field B(grid); B.checkerboard = evec[0].checkerboard; Field B(grid); B.checkerboard = evec[0].checkerboard;
// power of two search pattern; not every evalue in eval2 is assessed. // power of two search pattern; not every evalue in eval2 is assessed.
int allconv =1;
for(int jj = 1; jj<=Nstop; jj*=2){ for(int jj = 1; jj<=Nstop; jj*=2){
int j = Nstop-jj; int j = Nstop-jj;
RealD e = eval2_copy[j]; // Discard the evalue RealD e = eval2_copy[j]; // Discard the evalue
basisRotateJ(B,evec,Qt,j,0,Nk,Nm); basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
if ( j > Nconv ) { allconv=0;
Nconv=j+1;
jj=Nstop; // Terminate the scan
}
} }
} }
// Do evec[0] for good measure // Do evec[0] for good measure
@ -495,8 +493,10 @@ until convergence
int j=0; int j=0;
RealD e = eval2_copy[0]; RealD e = eval2_copy[0];
basisRotateJ(B,evec,Qt,j,0,Nk,Nm); basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox); if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
} }
if ( allconv ) Nconv = Nstop;
// test if we converged, if so, terminate // test if we converged, if so, terminate
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl; std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
// if( Nconv>=Nstop || beta_k < betastp){ // if( Nconv>=Nstop || beta_k < betastp){

View File

@ -48,6 +48,7 @@ struct LanczosParams : Serializable {
struct LocalCoherenceLanczosParams : Serializable { struct LocalCoherenceLanczosParams : Serializable {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
bool, saveEvecs,
bool, doFine, bool, doFine,
bool, doFineRead, bool, doFineRead,
bool, doCoarse, bool, doCoarse,

View File

@ -277,7 +277,9 @@ public:
uint8_t *cp = (uint8_t *)ptr; uint8_t *cp = (uint8_t *)ptr;
if ( ptr ) { if ( ptr ) {
// One touch per 4k page, static OMP loop to catch same loop order // One touch per 4k page, static OMP loop to catch same loop order
#ifdef GRID_OMP
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
#endif
for(size_type n=0;n<bytes;n+=4096){ for(size_type n=0;n<bytes;n+=4096){
cp[n]=0; cp[n]=0;
} }

View File

@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <pwd.h>
namespace Grid { namespace Grid {
@ -113,19 +114,151 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
assert(WorldNode!=-1); assert(WorldNode!=-1);
_ShmSetup=1; _ShmSetup=1;
} }
// Gray encode support
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm) int BinaryToGray (int binary) {
int gray = (binary>>1)^binary;
return gray;
}
int Log2Size(int TwoToPower,int MAXLOG2)
{ {
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = -1; int log2size = -1;
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){ for(int i=0;i<=MAXLOG2;i++){
if ( (0x1<<i) == WorldShmSize ) { if ( (0x1<<i) == TwoToPower ) {
log2size = i; log2size = i;
break; break;
} }
} }
return log2size;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
#undef HYPERCUBE
#ifdef HYPERCUBE
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify the hypercube coordinate of this node using hostname
////////////////////////////////////////////////////////////////
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
// i runs 0..7 3 bits
// r runs 0..3 2 bits
// 2^10 = 1024 nodes
const int maxhdim = 10;
std::vector<int> HyperCubeCoords(maxhdim,0);
std::vector<int> RootHyperCubeCoords(maxhdim,0);
int R;
int I;
int N;
const int namelen = _POSIX_HOST_NAME_MAX;
char name[namelen];
// Parse ICE-XA hostname to get hypercube location
gethostname(name,namelen);
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
assert(nscan==3);
int nlo = N%9;
int nhi = N/9;
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
uint32_t rootcoor = hypercoor;
//////////////////////////////////////////////////////////////////
// Print debug info
//////////////////////////////////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
std::string hname(name);
std::cout << "hostname "<<hname<<std::endl;
std::cout << "R " << R << " I " << I << " N "<< N<<
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
//////////////////////////////////////////////////////////////////
// broadcast node 0's base coordinate for this partition.
//////////////////////////////////////////////////////////////////
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
hypercoor=hypercoor-rootcoor;
assert(hypercoor<WorldSize);
assert(hypercoor>=0);
//////////////////////////////////////
// Printing
//////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
std::vector<int> processor_coor(ndimension);
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
std::vector<int> HyperCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<ndimension;d++){
NodeDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Map Hcube according to physical lattice
// must partition. Loop over dims and find out who would join.
////////////////////////////////////////////////////////////////
int hcoor = hypercoor;
for(int d=0;d<ndimension;d++){
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
int msk = (0x1<<bits)-1;
HyperCoor[d]=hcoor & msk;
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
hcoor = hcoor >> bits;
}
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
int Nprocessors=1;
for(int i=0;i<ndimension;i++){
Nprocessors*=processors[i];
}
assert(WorldSize==Nprocessors);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
////////////////////////////////////////////////////////////////
int rank;
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
/////////////////////////////////////////////////////////////////
// Build the new communicator
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#else
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1); assert(log2size != -1);
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@ -174,7 +307,69 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0); assert(ierr==0);
#endif
} }
////////////////////////////////////////////////////////////////////////////////////////////
// SHMGET
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMGET
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
std::vector<int> shmids(WorldShmSize);
if ( WorldShmRank == 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes;
key_t key = IPC_PRIVATE;
int flags = IPC_CREAT | SHM_R | SHM_W;
#ifdef SHM_HUGETLB
if (Hugepages) flags|=SHM_HUGETLB;
#endif
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
int errsv = errno;
printf("Errno %d\n",errsv);
printf("key %d\n",key);
printf("size %lld\n",size);
printf("flags %d\n",flags);
perror("shmget");
exit(1);
}
}
}
MPI_Barrier(WorldShmComm);
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
MPI_Barrier(WorldShmComm);
for(int r=0;r<WorldShmSize;r++){
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
perror("Shared memory attach failure");
shmctl(shmids[r], IPC_RMID, NULL);
exit(2);
}
}
MPI_Barrier(WorldShmComm);
///////////////////////////////////
// Mark for clean up
///////////////////////////////////
for(int r=0;r<WorldShmSize;r++){
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
}
MPI_Barrier(WorldShmComm);
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}
#endif
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended // Hugetlbfs mapping intended
@ -182,7 +377,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
#ifdef GRID_MPI3_SHMMMAP #ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<<std::endl; std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
assert(_ShmSetup==1); assert(_ShmSetup==1);
assert(_ShmAlloc==0); assert(_ShmAlloc==0);
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -192,7 +387,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
WorldShmCommBufs.resize(WorldShmSize); WorldShmCommBufs.resize(WorldShmSize);
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages // Hugetlbfs and others map filesystems as mappable huge pages
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX]; char shm_name [NAME_MAX];
for(int r=0;r<WorldShmSize;r++){ for(int r=0;r<WorldShmSize;r++){
@ -226,6 +421,48 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
}; };
#endif // MMAP #endif // MMAP
#ifdef GRID_MPI3_SHM_NONE
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages
////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX];
assert(WorldShmSize == 1);
for(int r=0;r<WorldShmSize;r++){
int fd=-1;
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
#ifdef MAP_POPULATE
mmap_flag|=MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if ( flags ) mmap_flag |= MAP_HUGETLB;
#endif
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
if ( ptr == (void *)MAP_FAILED ) {
printf("mmap %s failed\n",shm_name);
perror("failed mmap"); assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
};
#endif // MMAP
#ifdef GRID_MPI3_SHMOPEN #ifdef GRID_MPI3_SHMOPEN
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case // POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
@ -246,7 +483,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
size_t size = bytes; size_t size = bytes;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r); struct passwd *pw = getpwuid (getuid());
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
shm_unlink(shm_name); shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
@ -281,7 +519,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
size_t size = bytes ; size_t size = bytes ;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r); struct passwd *pw = getpwuid (getuid());
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
int fd=shm_open(shm_name,O_RDWR,0666); int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); } if ( fd<0 ) { perror("failed shm_open"); assert(0); }
@ -299,6 +538,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
} }
#endif #endif
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Global shared functionality finished // Global shared functionality finished
// Now move to per communicator functionality // Now move to per communicator functionality

View File

@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension]; int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int ent = 0;
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int stride=rhs._grid->_slice_stride[dimension]; int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) { if ( cbmask == 0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o = n*stride; int o = n*stride;
int bo = n*e2; int bo = n*e2;
buffer[off+bo+b]=rhs._odata[so+o+b]; table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
} }
} }
} else { } else {
int bo=0; int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o = n*stride; int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b); int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb &cbmask ) { if ( ocb &cbmask ) {
table.push_back(std::pair<int,int> (bo++,o+b)); table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
} }
} }
} }
parallel_for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
} }
parallel_for(int i=0;i<ent;i++){
buffer[table[i].first]=rhs._odata[table[i].second];
} }
} }
@ -141,32 +143,36 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension]; int stride=rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent =0;
if ( cbmask ==0x3 ) { if ( cbmask ==0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension]; int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension]; int bo =n*rhs._grid->_slice_block[dimension];
rhs._odata[so+o+b]=buffer[bo+b]; table[ent++] = std::pair<int,int>(so+o+b,bo+b);
} }
} }
} else { } else {
std::vector<std::pair<int,int> > table;
int bo=0; int bo=0;
for(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension]; int o =n*rhs._grid->_slice_stride[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) { if ( ocb & cbmask ) {
table.push_back(std::pair<int,int> (so+o+b,bo++)); table[ent++]=std::pair<int,int> (so+o+b,bo++);
} }
} }
} }
parallel_for(int i=0;i<table.size();i++){ }
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
parallel_for(int i=0;i<ent;i++){
rhs._odata[table[i].first]=buffer[table[i].second]; rhs._odata[table[i].first]=buffer[table[i].second];
} }
} }
}
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// Scatter for when there *is* need to SIMD split // Scatter for when there *is* need to SIMD split
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int stride = rhs._grid->_slice_stride[dimension]; int stride = rhs._grid->_slice_stride[dimension];
if(cbmask == 0x3 ){ static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
parallel_for_nest2(int n=0;n<e1;n++){ int ent=0;
for(int b=0;b<e2;b++){
if(cbmask == 0x3 ){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b; int o =n*stride+b;
//lhs._odata[lo+o]=rhs._odata[ro+o]; table[ent++] = std::pair<int,int>(lo+o,ro+o);
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
} }
} }
} else { } else {
parallel_for_nest2(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o =n*stride+b; int o =n*stride+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o); int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) { if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o]; table[ent++] = std::pair<int,int>(lo+o,ro+o);
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
} }
} }
} }
} }
parallel_for(int i=0;i<ent;i++){
lhs._odata[table[i].first]=rhs._odata[table[i].second];
}
} }
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
int e2=rhs._grid->_slice_block [dimension]; int e2=rhs._grid->_slice_block [dimension];
int stride = rhs._grid->_slice_stride[dimension]; int stride = rhs._grid->_slice_stride[dimension];
parallel_for_nest2(int n=0;n<e1;n++){ static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
for(int b=0;b<e2;b++){ int ent=0;
double t_tab,t_perm;
if ( cbmask == 0x3 ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride; int o =n*stride;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b); int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) { if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type); }}
} }
}} parallel_for(int i=0;i<ent;i++){
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
}
} }
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
double t_local;
if ( sshift[0] == sshift[1] ) { if ( sshift[0] == sshift[1] ) {
Cshift_local(ret,rhs,dimension,shift,0x3); Cshift_local(ret,rhs,dimension,shift,0x3);
} else { } else {
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
} }
} }
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask) template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{ {
GridBase *grid = rhs._grid; GridBase *grid = rhs._grid;
int fd = grid->_fdimensions[dimension]; int fd = grid->_fdimensions[dimension];
@ -326,10 +349,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
// FIXME : This must change where we have a
// Rotate slice.
// Document how this works ; why didn't I do this when I first wrote it...
// wrap is whether sshift > rd. // wrap is whether sshift > rd.
// num is sshift mod rd. // num is sshift mod rd.
// //
@ -366,9 +385,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist); if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
else Copy_plane(ret,rhs,dimension,x,sx,cbmask); else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
} }
return ret;
} }
} }
#endif #endif

View File

@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
if ( !comm_dim ) { if ( !comm_dim ) {
// std::cout << "Cshift_local" <<std::endl; //std::cout << "CSHIFT: Cshift_local" <<std::endl;
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
} else if ( splice_dim ) { } else if ( splice_dim ) {
// std::cout << "Cshift_comms_simd" <<std::endl; //std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift); Cshift_comms_simd(ret,rhs,dimension,shift);
} else { } else {
// std::cout << "Cshift_comms" <<std::endl; //std::cout << "CSHIFT: Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift); Cshift_comms(ret,rhs,dimension,shift);
} }
return ret; return ret;
@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) { if ( sshift[0] == sshift[1] ) {
//std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift,0x3); Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
} else { } else {
//std::cout << "Two pass Cshift_comms" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
} }
@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
int simd_layout = grid->_simd_layout[dimension]; int simd_layout = grid->_simd_layout[dimension];
int comm_dim = grid->_processors[dimension] >1 ; int comm_dim = grid->_processors[dimension] >1 ;
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
assert(comm_dim==1); assert(comm_dim==1);
assert(simd_layout==2); assert(simd_layout==2);
assert(shift>=0); assert(shift>=0);

View File

@ -257,7 +257,40 @@ public:
} }
} }
Lattice(Lattice&& r){ // move constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata=std::move(r._odata);
}
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
{
_grid = r._grid;
checkerboard = r.checkerboard;
_odata =std::move(r._odata);
return *this;
}
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
virtual ~Lattice(void) = default; virtual ~Lattice(void) = default;
@ -277,15 +310,6 @@ public:
return *this; return *this;
} }
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation // *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) { template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {

View File

@ -179,7 +179,7 @@ namespace Grid {
return ret; return ret;
} }
#define DECLARE_RELATIONAL(op,functor) \ #define DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd,IfSimd<vsimd> = 0>\ template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\ inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
{\ {\
@ -198,11 +198,6 @@ namespace Grid {
typedef typename vsimd::scalar_type scalar;\ typedef typename vsimd::scalar_type scalar;\
return Comparison(functor<scalar,scalar>(),lhs,rhs);\ return Comparison(functor<scalar,scalar>(),lhs,rhs);\
}\ }\
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
} \
template<class vsimd>\ template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \ inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
{ \ { \
@ -212,14 +207,21 @@ namespace Grid {
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \ inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
{ \ { \
return lhs op rhs._internal; \ return lhs op rhs._internal; \
} } \
#define DECLARE_RELATIONAL(op,functor) \
DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
}
DECLARE_RELATIONAL(<,slt); DECLARE_RELATIONAL(<,slt);
DECLARE_RELATIONAL(<=,sle); DECLARE_RELATIONAL(<=,sle);
DECLARE_RELATIONAL(>,sgt); DECLARE_RELATIONAL(>,sgt);
DECLARE_RELATIONAL(>=,sge); DECLARE_RELATIONAL(>=,sge);
DECLARE_RELATIONAL(==,seq); DECLARE_RELATIONAL_EQ(==,seq);
DECLARE_RELATIONAL(!=,sne); DECLARE_RELATIONAL(!=,sne);
#undef DECLARE_RELATIONAL #undef DECLARE_RELATIONAL

View File

@ -52,23 +52,5 @@ namespace Grid {
} }
}; };
// LatticeCoordinate();
// FIXME for debug; deprecate this; made obscelete by
template<class vobj> void lex_sites(Lattice<vobj> &l){
Real *v_ptr = (Real *)&l._odata[0];
size_t o_len = l._grid->oSites();
size_t v_len = sizeof(vobj)/sizeof(vRealF);
size_t vec_len = vRealF::Nsimd();
for(int i=0;i<o_len;i++){
for(int j=0;j<v_len;j++){
for(int vv=0;vv<vec_len;vv+=2){
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
}
}}
}
} }
#endif #endif

View File

@ -599,6 +599,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
extract1(in_vobj, out_ptrs, 0); extract1(in_vobj, out_ptrs, 0);
} }
} }
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
{
typedef typename vobj::vector_type vtype;
GridBase* in_grid = in._grid;
out.resize(in_grid->lSites());
int ndim = in_grid->Nd();
int in_nsimd = vtype::Nsimd();
std::vector<std::vector<int> > in_icoor(in_nsimd);
for(int lane=0; lane < in_nsimd; lane++){
in_icoor[lane].resize(ndim);
in_grid->iCoorFromIindex(in_icoor[lane], lane);
}
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
//Assemble vector of pointers to output elements
std::vector<sobj*> out_ptrs(in_nsimd);
std::vector<int> in_ocoor(ndim);
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
std::vector<int> lcoor(in_grid->Nd());
for(int lane=0; lane < in_nsimd; lane++){
for(int mu=0;mu<ndim;mu++)
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
int lex;
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
out_ptrs[lane] = &out[lex];
}
//Unpack into those ptrs
const vobj & in_vobj = in._odata[in_oidx];
extract1(in_vobj, out_ptrs, 0);
}
}
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj> template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value typename std::enable_if<isSIMDvectorized<vobj>::value
@ -648,10 +693,59 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
} }
} }
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value
&& !isSIMDvectorized<sobj>::value, void>::type
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
{
typedef typename vobj::vector_type vtype;
GridBase* grid = out._grid;
assert(in.size()==grid->lSites());
int ndim = grid->Nd();
int nsimd = vtype::Nsimd();
std::vector<std::vector<int> > icoor(nsimd);
for(int lane=0; lane < nsimd; lane++){
icoor[lane].resize(ndim);
grid->iCoorFromIindex(icoor[lane],lane);
}
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
//Assemble vector of pointers to output elements
std::vector<sobj*> ptrs(nsimd);
std::vector<int> ocoor(ndim);
grid->oCoorFromOindex(ocoor, oidx);
std::vector<int> lcoor(grid->Nd());
for(int lane=0; lane < nsimd; lane++){
for(int mu=0;mu<ndim;mu++){
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
}
int lex;
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
ptrs[lane] = &in[lex];
}
//pack from those ptrs
vobj vecobj;
merge1(vecobj, ptrs, 0);
out._odata[oidx] = vecobj;
}
}
//Convert a Lattice from one precision to another //Convert a Lattice from one precision to another
template<class VobjOut, class VobjIn> template<class VobjOut, class VobjIn>
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
assert(out._grid->Nd() == in._grid->Nd()); assert(out._grid->Nd() == in._grid->Nd());
assert(out._grid->FullDimensions() == in._grid->FullDimensions());
out.checkerboard = in.checkerboard; out.checkerboard = in.checkerboard;
GridBase *in_grid=in._grid; GridBase *in_grid=in._grid;
GridBase *out_grid = out._grid; GridBase *out_grid = out._grid;

View File

@ -91,7 +91,7 @@ class BinaryIO {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
GridBase *grid = lat._grid; GridBase *grid = lat._grid;
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
unvectorizeToLexOrdArray(scalardata,lat); unvectorizeToLexOrdArray(scalardata,lat);
@ -110,11 +110,11 @@ class BinaryIO {
lsites = 1; lsites = 1;
} }
#pragma omp parallel PARALLEL_REGION
{ {
uint32_t nersc_csum_thr = 0; uint32_t nersc_csum_thr = 0;
#pragma omp for PARALLEL_FOR_LOOP_INTERN
for (uint64_t local_site = 0; local_site < lsites; local_site++) for (uint64_t local_site = 0; local_site < lsites; local_site++)
{ {
uint32_t *site_buf = (uint32_t *)&fbuf[local_site]; uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
@ -124,7 +124,7 @@ class BinaryIO {
} }
} }
#pragma omp critical PARALLEL_CRITICAL
{ {
nersc_csum += nersc_csum_thr; nersc_csum += nersc_csum_thr;
} }
@ -146,21 +146,23 @@ class BinaryIO {
std::vector<int> local_start =grid->LocalStarts(); std::vector<int> local_start =grid->LocalStarts();
std::vector<int> global_vol =grid->FullDimensions(); std::vector<int> global_vol =grid->FullDimensions();
#pragma omp parallel PARALLEL_REGION
{ {
std::vector<int> coor(nd); std::vector<int> coor(nd);
uint32_t scidac_csuma_thr=0; uint32_t scidac_csuma_thr=0;
uint32_t scidac_csumb_thr=0; uint32_t scidac_csumb_thr=0;
uint32_t site_crc=0; uint32_t site_crc=0;
#pragma omp for PARALLEL_FOR_LOOP_INTERN
for(uint64_t local_site=0;local_site<lsites;local_site++){ for(uint64_t local_site=0;local_site<lsites;local_site++){
uint32_t * site_buf = (uint32_t *)&fbuf[local_site]; uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
/* /*
* Scidac csum is rather more heavyweight * Scidac csum is rather more heavyweight
* FIXME -- 128^3 x 256 x 16 will overflow.
*/ */
int global_site; int global_site;
Lexicographic::CoorFromIndex(coor,local_site,local_vol); Lexicographic::CoorFromIndex(coor,local_site,local_vol);
@ -181,7 +183,7 @@ class BinaryIO {
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31); scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
} }
#pragma omp critical PARALLEL_CRITICAL
{ {
scidac_csuma^= scidac_csuma_thr; scidac_csuma^= scidac_csuma_thr;
scidac_csumb^= scidac_csumb_thr; scidac_csumb^= scidac_csumb_thr;
@ -261,7 +263,7 @@ class BinaryIO {
GridBase *grid, GridBase *grid,
std::vector<fobj> &iodata, std::vector<fobj> &iodata,
std::string file, std::string file,
Integer offset, uint64_t& offset,
const std::string &format, int control, const std::string &format, int control,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
@ -429,14 +431,20 @@ class BinaryIO {
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0); MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
} }
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl; std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
assert(ierr == 0); assert(ierr == 0);
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl; std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
assert(ierr == 0); assert(ierr == 0);
MPI_Offset os;
MPI_File_get_position(fh, &os);
MPI_File_get_byte_offset(fh, os, &disp);
offset = disp;
MPI_File_close(&fh); MPI_File_close(&fh);
MPI_Type_free(&fileArray); MPI_Type_free(&fileArray);
MPI_Type_free(&localArray); MPI_Type_free(&localArray);
@ -446,16 +454,20 @@ class BinaryIO {
} else { } else {
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : " std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl; << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ofstream fout; std::ofstream fout;
fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
try { try {
if (offset) { // Must already exist and contain data
fout.open(file,std::ios::binary|std::ios::out|std::ios::in); fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
} else { // Allow create
fout.open(file,std::ios::binary|std::ios::out);
}
} catch (const std::fstream::failure& exc) { } catch (const std::fstream::failure& exc) {
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl; std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl; std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl; // std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
#ifdef USE_MPI_IO #ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1); MPI_Abort(MPI_COMM_WORLD,1);
#else #else
@ -489,6 +501,7 @@ class BinaryIO {
exit(1); exit(1);
#endif #endif
} }
offset = fout.tellp();
fout.close(); fout.close();
} }
timer.Stop(); timer.Stop();
@ -523,7 +536,7 @@ class BinaryIO {
static inline void readLatticeObject(Lattice<vobj> &Umu, static inline void readLatticeObject(Lattice<vobj> &Umu,
std::string file, std::string file,
munger munge, munger munge,
Integer offset, uint64_t offset,
const std::string &format, const std::string &format,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
@ -533,7 +546,7 @@ class BinaryIO {
typedef typename vobj::Realified::scalar_type word; word w=0; typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -544,7 +557,7 @@ class BinaryIO {
GridStopWatch timer; GridStopWatch timer;
timer.Start(); timer.Start();
parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]); parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
vectorizeFromLexOrdArray(scalardata,Umu); vectorizeFromLexOrdArray(scalardata,Umu);
grid->Barrier(); grid->Barrier();
@ -560,7 +573,7 @@ class BinaryIO {
static inline void writeLatticeObject(Lattice<vobj> &Umu, static inline void writeLatticeObject(Lattice<vobj> &Umu,
std::string file, std::string file,
munger munge, munger munge,
Integer offset, uint64_t offset,
const std::string &format, const std::string &format,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
@ -569,7 +582,7 @@ class BinaryIO {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0; typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -580,7 +593,7 @@ class BinaryIO {
GridStopWatch timer; timer.Start(); GridStopWatch timer; timer.Start();
unvectorizeToLexOrdArray(scalardata,Umu); unvectorizeToLexOrdArray(scalardata,Umu);
parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]); parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
grid->Barrier(); grid->Barrier();
timer.Stop(); timer.Stop();
@ -597,7 +610,7 @@ class BinaryIO {
static inline void readRNG(GridSerialRNG &serial, static inline void readRNG(GridSerialRNG &serial,
GridParallelRNG &parallel, GridParallelRNG &parallel,
std::string file, std::string file,
Integer offset, uint64_t offset,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
uint32_t &scidac_csumb) uint32_t &scidac_csumb)
@ -610,8 +623,8 @@ class BinaryIO {
std::string format = "IEEE32BIG"; std::string format = "IEEE32BIG";
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int gsites = grid->gSites(); uint64_t gsites = grid->gSites();
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp = 0; uint32_t nersc_csum_tmp = 0;
uint32_t scidac_csuma_tmp = 0; uint32_t scidac_csuma_tmp = 0;
@ -626,7 +639,7 @@ class BinaryIO {
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
timer.Start(); timer.Start();
parallel_for(int lidx=0;lidx<lsites;lidx++){ parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
parallel.SetState(tmp,lidx); parallel.SetState(tmp,lidx);
@ -659,7 +672,7 @@ class BinaryIO {
static inline void writeRNG(GridSerialRNG &serial, static inline void writeRNG(GridSerialRNG &serial,
GridParallelRNG &parallel, GridParallelRNG &parallel,
std::string file, std::string file,
Integer offset, uint64_t offset,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
uint32_t &scidac_csumb) uint32_t &scidac_csumb)
@ -670,8 +683,8 @@ class BinaryIO {
typedef std::array<RngStateType,RngStateCount> RNGstate; typedef std::array<RngStateType,RngStateCount> RNGstate;
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int gsites = grid->gSites(); uint64_t gsites = grid->gSites();
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp; uint32_t nersc_csum_tmp;
uint32_t scidac_csuma_tmp; uint32_t scidac_csuma_tmp;
@ -684,7 +697,7 @@ class BinaryIO {
timer.Start(); timer.Start();
std::vector<RNGstate> iodata(lsites); std::vector<RNGstate> iodata(lsites);
parallel_for(int lidx=0;lidx<lsites;lidx++){ parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);
parallel.GetState(tmp,lidx); parallel.GetState(tmp,lidx);
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
@ -693,7 +706,6 @@ class BinaryIO {
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
iodata.resize(1); iodata.resize(1);
{ {
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);

View File

@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
{ {
filename= _filename; filename= _filename;
File = fopen(filename.c_str(), "r"); File = fopen(filename.c_str(), "r");
if (File == nullptr)
{
std::cerr << "cannot open file '" << filename << "'" << std::endl;
abort();
}
LimeR = limeCreateReader(File); LimeR = limeCreateReader(File);
} }
///////////////////////////////////////////// /////////////////////////////////////////////
@ -248,7 +253,6 @@ class GridLimeReader : public BinaryIO {
template<class serialisable_object> template<class serialisable_object>
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
{ {
std::string xmlstring;
// should this be a do while; can we miss a first record?? // should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
@ -262,7 +266,8 @@ class GridLimeReader : public BinaryIO {
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl; // std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
XmlReader RD(&xmlc[0],""); std::string xmlstring(&xmlc[0]);
XmlReader RD(xmlstring, true, "");
read(RD,object_name,object); read(RD,object_name,object);
return; return;
} }
@ -272,8 +277,10 @@ class GridLimeReader : public BinaryIO {
} }
}; };
class GridLimeWriter : public BinaryIO { class GridLimeWriter : public BinaryIO
{
public: public:
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead // FIXME: format for RNG? Now just binary out instead
// FIXME: collective calls or not ? // FIXME: collective calls or not ?
@ -282,17 +289,24 @@ class GridLimeWriter : public BinaryIO {
FILE *File; FILE *File;
LimeWriter *LimeW; LimeWriter *LimeW;
std::string filename; std::string filename;
bool boss_node;
GridLimeWriter( bool isboss = true) {
boss_node = isboss;
}
void open(const std::string &_filename) { void open(const std::string &_filename) {
filename= _filename; filename= _filename;
if ( boss_node ) {
File = fopen(filename.c_str(), "w"); File = fopen(filename.c_str(), "w");
LimeW = limeCreateWriter(File); assert(LimeW != NULL ); LimeW = limeCreateWriter(File); assert(LimeW != NULL );
} }
}
///////////////////////////////////////////// /////////////////////////////////////////////
// Close the file // Close the file
///////////////////////////////////////////// /////////////////////////////////////////////
void close(void) { void close(void) {
if ( boss_node ) {
fclose(File); fclose(File);
}
// limeDestroyWriter(LimeW); // limeDestroyWriter(LimeW);
} }
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
@ -300,10 +314,12 @@ class GridLimeWriter : public BinaryIO {
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
{ {
if ( boss_node ) {
LimeRecordHeader *h; LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize); h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
assert(limeWriteRecordHeader(h, LimeW) >= 0); assert(limeWriteRecordHeader(h, LimeW) >= 0);
limeDestroyHeader(h); limeDestroyHeader(h);
}
return LIME_SUCCESS; return LIME_SUCCESS;
} }
//////////////////////////////////////////// ////////////////////////////////////////////
@ -312,6 +328,7 @@ class GridLimeWriter : public BinaryIO {
template<class serialisable_object> template<class serialisable_object>
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
{ {
if ( boss_node ) {
std::string xmlstring; std::string xmlstring;
{ {
XmlWriter WR("",""); XmlWriter WR("","");
@ -329,48 +346,81 @@ class GridLimeWriter : public BinaryIO {
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0); err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h); limeDestroyHeader(h);
// std::cout << " File offset is now"<<ftello(File) << std::endl;
} }
//////////////////////////////////////////// }
////////////////////////////////////////////////////
// Write a generic lattice field and csum // Write a generic lattice field and csum
//////////////////////////////////////////// // This routine is Collectively called by all nodes
// in communicator used by the field._grid
////////////////////////////////////////////////////
template<class vobj> template<class vobj>
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{ {
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
int err;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the // NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units). // the same file through different file handles (integer units).
// //
// These are both buffered, so why I think this code is right is as follows. // These are both buffered, so why I think this code is right is as follows.
// //
// i) write record header to FILE *File, telegraphing the size. // i) write record header to FILE *File, telegraphing the size; flush
// ii) ftello reads the offset from FILE *File . // ii) ftello reads the offset from FILE *File .
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
// Closes iostream and flushes. // Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section. // iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record. // v) Continue writing scidac record.
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
uint64_t offset = ftello(File);
// std::cout << " Writing to offset "<<offset << std::endl; GridBase *grid = field._grid;
assert(boss_node == field._grid->IsBoss() );
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
int err;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
if ( boss_node ) {
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
fflush(File);
}
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
////////////////////////////////////////////////
// Check all nodes agree on file position
////////////////////////////////////////////////
uint64_t offset1;
if ( boss_node ) {
offset1 = ftello(File);
}
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
///////////////////////////////////////////
// The above is collective. Write by other means into the binary record
///////////////////////////////////////////
std::string format = getFormatString<vobj>(); std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge; BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
err=limeWriterCloseRecord(LimeW); assert(err>=0);
///////////////////////////////////////////
// Wind forward and close the record
///////////////////////////////////////////
if ( boss_node ) {
fseek(File,0,SEEK_END);
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
assert( (offset2-offset1) == PayloadSize);
}
/////////////////////////////////////////////////////////////
// Check MPI-2 I/O did what we expect to file
/////////////////////////////////////////////////////////////
if ( boss_node ) {
err=limeWriterCloseRecord(LimeW); assert(err>=0);
}
//////////////////////////////////////// ////////////////////////////////////////
// Write checksum element, propagaing forward from the BinaryIO // Write checksum element, propagaing forward from the BinaryIO
// Always pair a checksum with a binary object, and close message // Always pair a checksum with a binary object, and close message
@ -380,21 +430,26 @@ class GridLimeWriter : public BinaryIO {
std::stringstream streamb; streamb << std::hex << scidac_csumb; std::stringstream streamb; streamb << std::hex << scidac_csumb;
checksum.suma= streama.str(); checksum.suma= streama.str();
checksum.sumb= streamb.str(); checksum.sumb= streamb.str();
// std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; if ( boss_node ) {
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
} }
}
}; };
class ScidacWriter : public GridLimeWriter { class ScidacWriter : public GridLimeWriter {
public: public:
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
template<class SerialisableUserFile> template<class SerialisableUserFile>
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{ {
scidacFile _scidacFile(grid); scidacFile _scidacFile(grid);
if ( this->boss_node ) {
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
} }
}
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Write generic lattice field in scidac format // Write generic lattice field in scidac format
//////////////////////////////////////////////// ////////////////////////////////////////////////
@ -415,9 +470,12 @@ class ScidacWriter : public GridLimeWriter {
////////////////////////////////////////////// //////////////////////////////////////////////
// Fill the Lime file record by record // Fill the Lime file record by record
////////////////////////////////////////////// //////////////////////////////////////////////
if ( this->boss_node ) {
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
}
// Collective call
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
} }
}; };
@ -485,6 +543,8 @@ class ScidacReader : public GridLimeReader {
class IldgWriter : public ScidacWriter { class IldgWriter : public ScidacWriter {
public: public:
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
/////////////////////////////////// ///////////////////////////////////
// A little helper // A little helper
/////////////////////////////////// ///////////////////////////////////
@ -568,7 +628,6 @@ class IldgWriter : public ScidacWriter {
writeLimeIldgLFN(header.ildg_lfn); // rec writeLimeIldgLFN(header.ildg_lfn); // rec
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
// limeDestroyWriter(LimeW); // limeDestroyWriter(LimeW);
fclose(File);
} }
}; };
@ -644,9 +703,11 @@ class IldgReader : public GridLimeReader {
////////////////////////////////// //////////////////////////////////
// ILDG format record // ILDG format record
std::string xmlstring(&xmlc[0]);
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) { if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],""); XmlReader RD(xmlstring, true, "");
read(RD,"ildgFormat",ildgFormat_); read(RD,"ildgFormat",ildgFormat_);
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG"); if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
@ -661,13 +722,13 @@ class IldgReader : public GridLimeReader {
} }
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) { if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]); FieldMetaData_.ildg_lfn = xmlstring;
found_ildgLFN = 1; found_ildgLFN = 1;
} }
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) { if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],""); XmlReader RD(xmlstring, true, "");
read(RD,"FieldMetaData",FieldMetaData_); read(RD,"FieldMetaData",FieldMetaData_);
format = FieldMetaData_.floating_point; format = FieldMetaData_.floating_point;
@ -681,18 +742,17 @@ class IldgReader : public GridLimeReader {
} }
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) { if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
std::string xmls(&xmlc[0]);
// is it a USQCD info field // is it a USQCD info field
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) { if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; // std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
XmlReader RD(&xmlc[0],""); XmlReader RD(xmlstring, true, "");
read(RD,"usqcdInfo",usqcdInfo_); read(RD,"usqcdInfo",usqcdInfo_);
found_usqcdInfo = 1; found_usqcdInfo = 1;
} }
} }
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) { if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
XmlReader RD(&xmlc[0],""); XmlReader RD(xmlstring, true, "");
read(RD,"scidacChecksum",scidacChecksum_); read(RD,"scidacChecksum",scidacChecksum_);
found_scidacChecksum = 1; found_scidacChecksum = 1;
} }

View File

@ -136,8 +136,9 @@ struct scidacRecord : Serializable {
int, typesize, int, typesize,
int, datacount); int, datacount);
scidacRecord() { version =1.0; } scidacRecord()
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
{}
}; };
//////////////////////// ////////////////////////

View File

@ -81,18 +81,16 @@ namespace Grid {
std::string, creation_date, std::string, creation_date,
std::string, archive_date, std::string, archive_date,
std::string, floating_point); std::string, floating_point);
FieldMetaData(void) { // WARNING: non-initialised values might lead to twisted parallel IO
nd=4; // issues, std::string are fine because they initliase to size 0
dimension.resize(4); // as per C++ standard.
boundary.resize(4); FieldMetaData(void)
scidac_checksuma=0; : nd(4), dimension(4,0), boundary(4, ""), data_start(0),
scidac_checksumb=0; link_trace(0.), plaquette(0.), checksum(0),
checksum=0; scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
} {}
}; };
namespace QCD { namespace QCD {
using namespace Grid; using namespace Grid;

View File

@ -57,7 +57,7 @@ namespace Grid {
// for the header-reader // for the header-reader
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field) static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
{ {
int offset=0; uint64_t offset=0;
std::map<std::string,std::string> header; std::map<std::string,std::string> header;
std::string line; std::string line;
@ -139,7 +139,7 @@ namespace Grid {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header); uint64_t offset = readHeader(file,Umu._grid,header);
FieldMetaData clone(header); FieldMetaData clone(header);
@ -236,21 +236,25 @@ namespace Grid {
GaugeStatistics(Umu,header); GaugeStatistics(Umu,header);
MachineCharacteristics(header); MachineCharacteristics(header);
int offset; uint64_t offset;
truncate(file);
// Sod it -- always write 3x3 double // Sod it -- always write 3x3 double
header.floating_point = std::string("IEEE64BIG"); header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3"); header.data_type = std::string("4D_SU3_GAUGE_3x3");
GaugeSimpleUnmunger<fobj3D,sobj> munge; GaugeSimpleUnmunger<fobj3D,sobj> munge;
if ( grid->IsBoss() ) {
truncate(file);
offset = writeHeader(header,file); offset = writeHeader(header,file);
}
grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point, BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum; header.checksum = nersc_csum;
if ( grid->IsBoss() ) {
writeHeader(header,file); writeHeader(header,file);
}
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum " std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
<<std::hex<<header.checksum <<std::hex<<header.checksum
@ -278,7 +282,7 @@ namespace Grid {
header.plaquette=0.0; header.plaquette=0.0;
MachineCharacteristics(header); MachineCharacteristics(header);
int offset; uint64_t offset;
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
@ -293,12 +297,18 @@ namespace Grid {
header.data_type = std::string("SITMO"); header.data_type = std::string("SITMO");
#endif #endif
if ( grid->IsBoss() ) {
truncate(file); truncate(file);
offset = writeHeader(header,file); offset = writeHeader(header,file);
}
grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum; header.checksum = nersc_csum;
if ( grid->IsBoss() ) {
offset = writeHeader(header,file); offset = writeHeader(header,file);
}
std::cout<<GridLogMessage std::cout<<GridLogMessage
<<"Written NERSC RNG STATE "<<file<< " checksum " <<"Written NERSC RNG STATE "<<file<< " checksum "
@ -313,7 +323,7 @@ namespace Grid {
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int offset = readHeader(file,grid,header); uint64_t offset = readHeader(file,grid,header);
FieldMetaData clone(header); FieldMetaData clone(header);

View File

@ -1,44 +0,0 @@
pugixml [![Build Status](https://travis-ci.org/zeux/pugixml.svg?branch=master)](https://travis-ci.org/zeux/pugixml) [![Build status](https://ci.appveyor.com/api/projects/status/9hdks1doqvq8pwe7/branch/master?svg=true)](https://ci.appveyor.com/project/zeux/pugixml)
=======
pugixml is a C++ XML processing library, which consists of a DOM-like interface with rich traversal/modification
capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0
implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface
variants and conversions between different Unicode encodings (which happen automatically during parsing/saving).
pugixml is used by a lot of projects, both open-source and proprietary, for performance and easy-to-use interface.
## Documentation
Documentation for the current release of pugixml is available on-line as two separate documents:
* [Quick-start guide](http://pugixml.org/docs/quickstart.html), that aims to provide enough information to start using the library;
* [Complete reference manual](http://pugixml.org/docs/manual.html), that describes all features of the library in detail.
Youre advised to start with the quick-start guide; however, many important library features are either not described in it at all or only mentioned briefly; if you require more information you should read the complete manual.
## License
This library is available to anybody free of charge, under the terms of MIT License:
Copyright (c) 2006-2015 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,7 +1,7 @@
/** /**
* pugixml parser - version 1.6 * pugixml parser - version 1.9
* -------------------------------------------------------- * --------------------------------------------------------
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/ * Report bugs and download new versions at http://pugixml.org/
* *
* This library is distributed under the MIT License. See notice at the end * This library is distributed under the MIT License. See notice at the end
@ -17,6 +17,9 @@
// Uncomment this to enable wchar_t mode // Uncomment this to enable wchar_t mode
// #define PUGIXML_WCHAR_MODE // #define PUGIXML_WCHAR_MODE
// Uncomment this to enable compact mode
// #define PUGIXML_COMPACT
// Uncomment this to disable XPath // Uncomment this to disable XPath
// #define PUGIXML_NO_XPATH // #define PUGIXML_NO_XPATH
@ -46,7 +49,7 @@
#endif #endif
/** /**
* Copyright (c) 2006-2015 Arseny Kapoulkine * Copyright (c) 2006-2018 Arseny Kapoulkine
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/** /**
* pugixml parser - version 1.6 * pugixml parser - version 1.9
* -------------------------------------------------------- * --------------------------------------------------------
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/ * Report bugs and download new versions at http://pugixml.org/
* *
* This library is distributed under the MIT License. See notice at the end * This library is distributed under the MIT License. See notice at the end
@ -13,7 +13,7 @@
#ifndef PUGIXML_VERSION #ifndef PUGIXML_VERSION
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons // Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
# define PUGIXML_VERSION 160 # define PUGIXML_VERSION 190
#endif #endif
// Include user configuration file (this can define various configuration macros) // Include user configuration file (this can define various configuration macros)
@ -72,6 +72,44 @@
# endif # endif
#endif #endif
// If the platform is known to have move semantics support, compile move ctor/operator implementation
#ifndef PUGIXML_HAS_MOVE
# if __cplusplus >= 201103
# define PUGIXML_HAS_MOVE
# elif defined(_MSC_VER) && _MSC_VER >= 1600
# define PUGIXML_HAS_MOVE
# endif
#endif
// If C++ is 2011 or higher, add 'noexcept' specifiers
#ifndef PUGIXML_NOEXCEPT
# if __cplusplus >= 201103
# define PUGIXML_NOEXCEPT noexcept
# elif defined(_MSC_VER) && _MSC_VER >= 1900
# define PUGIXML_NOEXCEPT noexcept
# else
# define PUGIXML_NOEXCEPT
# endif
#endif
// Some functions can not be noexcept in compact mode
#ifdef PUGIXML_COMPACT
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT
#else
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT
#endif
// If C++ is 2011 or higher, add 'override' qualifiers
#ifndef PUGIXML_OVERRIDE
# if __cplusplus >= 201103
# define PUGIXML_OVERRIDE override
# elif defined(_MSC_VER) && _MSC_VER >= 1700
# define PUGIXML_OVERRIDE override
# else
# define PUGIXML_OVERRIDE
# endif
#endif
// Character interface macros // Character interface macros
#ifdef PUGIXML_WCHAR_MODE #ifdef PUGIXML_WCHAR_MODE
# define PUGIXML_TEXT(t) L ## t # define PUGIXML_TEXT(t) L ## t
@ -158,6 +196,11 @@ namespace pugi
// is a valid document. This flag is off by default. // is a valid document. This flag is off by default.
const unsigned int parse_fragment = 0x1000; const unsigned int parse_fragment = 0x1000;
// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
// the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments.
// This flag is off by default.
const unsigned int parse_embed_pcdata = 0x2000;
// The default parsing mode. // The default parsing mode.
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
@ -206,6 +249,9 @@ namespace pugi
// Write every attribute on a new line with appropriate indentation. This flag is off by default. // Write every attribute on a new line with appropriate indentation. This flag is off by default.
const unsigned int format_indent_attributes = 0x40; const unsigned int format_indent_attributes = 0x40;
// Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default.
const unsigned int format_no_empty_element_tags = 0x80;
// The default set of formatting flags. // The default set of formatting flags.
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none. // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
const unsigned int format_default = format_indent; const unsigned int format_default = format_indent;
@ -268,7 +314,7 @@ namespace pugi
// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
xml_writer_file(void* file); xml_writer_file(void* file);
virtual void write(const void* data, size_t size); virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
private: private:
void* file; void* file;
@ -283,7 +329,7 @@ namespace pugi
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream); xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream); xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
virtual void write(const void* data, size_t size); virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
private: private:
std::basic_ostream<char, std::char_traits<char> >* narrow_stream; std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
@ -354,6 +400,8 @@ namespace pugi
// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
bool set_value(int rhs); bool set_value(int rhs);
bool set_value(unsigned int rhs); bool set_value(unsigned int rhs);
bool set_value(long rhs);
bool set_value(unsigned long rhs);
bool set_value(double rhs); bool set_value(double rhs);
bool set_value(float rhs); bool set_value(float rhs);
bool set_value(bool rhs); bool set_value(bool rhs);
@ -367,6 +415,8 @@ namespace pugi
xml_attribute& operator=(const char_t* rhs); xml_attribute& operator=(const char_t* rhs);
xml_attribute& operator=(int rhs); xml_attribute& operator=(int rhs);
xml_attribute& operator=(unsigned int rhs); xml_attribute& operator=(unsigned int rhs);
xml_attribute& operator=(long rhs);
xml_attribute& operator=(unsigned long rhs);
xml_attribute& operator=(double rhs); xml_attribute& operator=(double rhs);
xml_attribute& operator=(float rhs); xml_attribute& operator=(float rhs);
xml_attribute& operator=(bool rhs); xml_attribute& operator=(bool rhs);
@ -601,8 +651,8 @@ namespace pugi
xpath_node_set select_nodes(const xpath_query& query) const; xpath_node_set select_nodes(const xpath_query& query) const;
// (deprecated: use select_node instead) Select single node by evaluating XPath query. // (deprecated: use select_node instead) Select single node by evaluating XPath query.
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
xpath_node select_single_node(const xpath_query& query) const; PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const;
#endif #endif
@ -701,6 +751,8 @@ namespace pugi
// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
bool set(int rhs); bool set(int rhs);
bool set(unsigned int rhs); bool set(unsigned int rhs);
bool set(long rhs);
bool set(unsigned long rhs);
bool set(double rhs); bool set(double rhs);
bool set(float rhs); bool set(float rhs);
bool set(bool rhs); bool set(bool rhs);
@ -714,6 +766,8 @@ namespace pugi
xml_text& operator=(const char_t* rhs); xml_text& operator=(const char_t* rhs);
xml_text& operator=(int rhs); xml_text& operator=(int rhs);
xml_text& operator=(unsigned int rhs); xml_text& operator=(unsigned int rhs);
xml_text& operator=(long rhs);
xml_text& operator=(unsigned long rhs);
xml_text& operator=(double rhs); xml_text& operator=(double rhs);
xml_text& operator=(float rhs); xml_text& operator=(float rhs);
xml_text& operator=(bool rhs); xml_text& operator=(bool rhs);
@ -945,10 +999,11 @@ namespace pugi
// Non-copyable semantics // Non-copyable semantics
xml_document(const xml_document&); xml_document(const xml_document&);
const xml_document& operator=(const xml_document&); xml_document& operator=(const xml_document&);
void create(); void _create();
void destroy(); void _destroy();
void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
public: public:
// Default constructor, makes empty document // Default constructor, makes empty document
@ -957,6 +1012,12 @@ namespace pugi
// Destructor, invalidates all node/attribute handles to this document // Destructor, invalidates all node/attribute handles to this document
~xml_document(); ~xml_document();
#ifdef PUGIXML_HAS_MOVE
// Move semantics support
xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
#endif
// Removes all nodes, leaving the empty document // Removes all nodes, leaving the empty document
void reset(); void reset();
@ -970,7 +1031,7 @@ namespace pugi
#endif #endif
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied. // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
xml_parse_result load(const char_t* contents, unsigned int options = parse_default); PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
// Load document from zero-terminated string. No encoding conversions are applied. // Load document from zero-terminated string. No encoding conversions are applied.
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default); xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
@ -1095,10 +1156,10 @@ namespace pugi
xpath_variable_set(const xpath_variable_set& rhs); xpath_variable_set(const xpath_variable_set& rhs);
xpath_variable_set& operator=(const xpath_variable_set& rhs); xpath_variable_set& operator=(const xpath_variable_set& rhs);
#if __cplusplus >= 201103 #ifdef PUGIXML_HAS_MOVE
// Move semantics support // Move semantics support
xpath_variable_set(xpath_variable_set&& rhs); xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
xpath_variable_set& operator=(xpath_variable_set&& rhs); xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
#endif #endif
// Add a new variable or get the existing one, if the types match // Add a new variable or get the existing one, if the types match
@ -1139,10 +1200,10 @@ namespace pugi
// Destructor // Destructor
~xpath_query(); ~xpath_query();
#if __cplusplus >= 201103 #ifdef PUGIXML_HAS_MOVE
// Move semantics support // Move semantics support
xpath_query(xpath_query&& rhs); xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT;
xpath_query& operator=(xpath_query&& rhs); xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT;
#endif #endif
// Get query expression return type // Get query expression return type
@ -1201,7 +1262,7 @@ namespace pugi
explicit xpath_exception(const xpath_parse_result& result); explicit xpath_exception(const xpath_parse_result& result);
// Get error message // Get error message
virtual const char* what() const throw(); virtual const char* what() const throw() PUGIXML_OVERRIDE;
// Get parse result // Get parse result
const xpath_parse_result& result() const; const xpath_parse_result& result() const;
@ -1280,10 +1341,10 @@ namespace pugi
xpath_node_set(const xpath_node_set& ns); xpath_node_set(const xpath_node_set& ns);
xpath_node_set& operator=(const xpath_node_set& ns); xpath_node_set& operator=(const xpath_node_set& ns);
#if __cplusplus >= 201103 #ifdef PUGIXML_HAS_MOVE
// Move semantics support // Move semantics support
xpath_node_set(xpath_node_set&& rhs); xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
xpath_node_set& operator=(xpath_node_set&& rhs); xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
#endif #endif
// Get collection type // Get collection type
@ -1317,7 +1378,7 @@ namespace pugi
xpath_node* _end; xpath_node* _end;
void _assign(const_iterator begin, const_iterator end, type_t type); void _assign(const_iterator begin, const_iterator end, type_t type);
void _move(xpath_node_set& rhs); void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT;
}; };
#endif #endif
@ -1375,7 +1436,7 @@ namespace std
#endif #endif
/** /**
* Copyright (c) 2006-2015 Arseny Kapoulkine * Copyright (c) 2006-2018 Arseny Kapoulkine
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation

View File

@ -1,6 +1,6 @@
pugixml 1.6 - an XML processing library pugixml 1.9 - an XML processing library
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
Report bugs and download new versions at http://pugixml.org/ Report bugs and download new versions at http://pugixml.org/
This is the distribution of pugixml, which is a C++ XML processing library, This is the distribution of pugixml, which is a C++ XML processing library,
@ -28,7 +28,7 @@ The distribution contains the following folders:
This library is distributed under the MIT License: This library is distributed under the MIT License:
Copyright (c) 2006-2015 Arseny Kapoulkine Copyright (c) 2006-2018 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation obtaining a copy of this software and associated documentation

View File

@ -52,6 +52,35 @@ namespace QCD {
{ {
} }
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
template<class Impl>
void CayleyFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
tmp = solution5d;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0);
axpby_ssp_pplus (tmp, 1., tmp , 1., solution5d, 0, Ls-1);
ExtractSlice(exported4d, tmp, 0, 0);
}
template<class Impl>
void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
tmp = zero;
InsertSlice(input4d, tmp, 0 , 0);
InsertSlice(input4d, tmp, Ls-1, 0);
axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0);
axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1);
Dminus(tmp,imported5d);
}
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi)
{ {

View File

@ -83,8 +83,13 @@ namespace Grid {
virtual void M5D (const FermionField &psi, FermionField &chi); virtual void M5D (const FermionField &psi, FermionField &chi);
virtual void M5Ddag(const FermionField &psi, FermionField &chi); virtual void M5Ddag(const FermionField &psi, FermionField &chi);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void Dminus(const FermionField &psi, FermionField &chi); virtual void Dminus(const FermionField &psi, FermionField &chi);
virtual void DminusDag(const FermionField &psi, FermionField &chi); virtual void DminusDag(const FermionField &psi, FermionField &chi);
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Instantiate different versions depending on Impl // Instantiate different versions depending on Impl

View File

@ -295,6 +295,27 @@ namespace Grid {
assert((Ls&0x1)==1); // Odd Ls required assert((Ls&0x1)==1); // Odd Ls required
} }
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=zero;
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
FermOpTemplateInstantiate(ContinuedFractionFermion5D); FermOpTemplateInstantiate(ContinuedFractionFermion5D);
} }

View File

@ -65,6 +65,14 @@ namespace Grid {
// Efficient support for multigrid coarsening // Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors // Constructors
ContinuedFractionFermion5D(GaugeField &_Umu, ContinuedFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,

View File

@ -128,6 +128,19 @@ namespace Grid {
std::vector<Real> mom, std::vector<Real> mom,
unsigned int tmin, unsigned int tmin,
unsigned int tmax)=0; unsigned int tmax)=0;
///////////////////////////////////////////////
// Physical field import/export
///////////////////////////////////////////////
virtual void Dminus(const FermionField &psi, FermionField &chi) { chi=psi; }
virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; }
virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported)
{
imported = input;
};
virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported)
{
exported=solution;
};
}; };
} }

View File

@ -396,6 +396,27 @@ namespace Grid {
amax=zolo_hi; amax=zolo_hi;
} }
template<class Impl>
void PartialFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=zero;
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
// Constructors // Constructors
template<class Impl> template<class Impl>
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu, PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,

View File

@ -70,6 +70,12 @@ namespace Grid {
// Efficient support for multigrid coarsening // Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors // Constructors
PartialFractionFermion5D(GaugeField &_Umu, PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,

View File

@ -69,39 +69,47 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/ /*****************************************************/
/* Compress includes precision change if mpi data is not same */ /* Compress includes precision change if mpi data is not same */
/*****************************************************/ /*****************************************************/
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) { inline void Compress(SiteHalfSpinor * __restrict__ buf,Integer o,const SiteSpinor &in) {
projector::Proj(buf[o],in,mu,dag); SiteHalfSpinor tmp;
projector::Proj(tmp,in,mu,dag);
vstream(buf[o],tmp);
} }
/*****************************************************/ /*****************************************************/
/* Exchange includes precision change if mpi data is not same */ /* Exchange includes precision change if mpi data is not same */
/*****************************************************/ /*****************************************************/
inline void Exchange(SiteHalfSpinor *mp, inline void Exchange(SiteHalfSpinor * __restrict__ mp,
SiteHalfSpinor *vp0, const SiteHalfSpinor * __restrict__ vp0,
SiteHalfSpinor *vp1, const SiteHalfSpinor * __restrict__ vp1,
Integer type,Integer o){ Integer type,Integer o){
exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); SiteHalfSpinor tmp1;
SiteHalfSpinor tmp2;
exchange(tmp1,tmp2,vp0[o],vp1[o],type);
vstream(mp[2*o ],tmp1);
vstream(mp[2*o+1],tmp2);
} }
/*****************************************************/ /*****************************************************/
/* Have a decompression step if mpi data is not same */ /* Have a decompression step if mpi data is not same */
/*****************************************************/ /*****************************************************/
inline void Decompress(SiteHalfSpinor *out, inline void Decompress(SiteHalfSpinor * __restrict__ out,
SiteHalfSpinor *in, Integer o) { SiteHalfSpinor * __restrict__ in, Integer o) {
assert(0); assert(0);
} }
/*****************************************************/ /*****************************************************/
/* Compress Exchange */ /* Compress Exchange */
/*****************************************************/ /*****************************************************/
inline void CompressExchange(SiteHalfSpinor *out0, inline void CompressExchange(SiteHalfSpinor * __restrict__ out0,
SiteHalfSpinor *out1, SiteHalfSpinor * __restrict__ out1,
const SiteSpinor *in, const SiteSpinor * __restrict__ in,
Integer j,Integer k, Integer m,Integer type){ Integer j,Integer k, Integer m,Integer type){
SiteHalfSpinor temp1, temp2,temp3,temp4; SiteHalfSpinor temp1, temp2,temp3,temp4;
projector::Proj(temp1,in[k],mu,dag); projector::Proj(temp1,in[k],mu,dag);
projector::Proj(temp2,in[m],mu,dag); projector::Proj(temp2,in[m],mu,dag);
exchange(out0[j],out1[j],temp1,temp2,type); exchange(temp3,temp4,temp1,temp2,type);
vstream(out0[j],temp3);
vstream(out1[j],temp4);
} }
/*****************************************************/ /*****************************************************/

View File

@ -30,147 +30,33 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define REGISTER #define REGISTER
#define LOAD_CHIMU_BODY(F) \ #define LOAD_CHIMU \
Chimu_00=ref(F)(0)(0); \
Chimu_01=ref(F)(0)(1); \
Chimu_02=ref(F)(0)(2); \
Chimu_10=ref(F)(1)(0); \
Chimu_11=ref(F)(1)(1); \
Chimu_12=ref(F)(1)(2); \
Chimu_20=ref(F)(2)(0); \
Chimu_21=ref(F)(2)(1); \
Chimu_22=ref(F)(2)(2); \
Chimu_30=ref(F)(3)(0); \
Chimu_31=ref(F)(3)(1); \
Chimu_32=ref(F)(3)(2)
#define LOAD_CHIMU(DIR,F,PERM) \
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
#define LOAD_CHI_BODY(F) \
Chi_00 = ref(F)(0)(0);\
Chi_01 = ref(F)(0)(1);\
Chi_02 = ref(F)(0)(2);\
Chi_10 = ref(F)(1)(0);\
Chi_11 = ref(F)(1)(1);\
Chi_12 = ref(F)(1)(2)
#define LOAD_CHI(DIR,F,PERM) \
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
//G-parity implementations using in-place intrinsic ops
//1l 1h -> 1h 1l
//0l 0h , 1h 1l -> 0l 1h 0h,1l
//0h,1l -> 1l,0h
//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
//Pulled fermion through forwards face, GPBC on upper component
//Need 0= 0l 1h 1= 1l 0h
//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
//Pulled fermion through backwards face, GPBC on lower component
//Need 0= 1l 0h 1= 0l 1h
//1l 1h -> 1h 1l
//0l 0h , 1h 1l -> 0l 1h 0h,1l
#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
permute##PERM(tmp1, ref(1)(S)(C)); \
exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
INTO = tmp2;
//0l 0h -> 0h 0l
//1l 1h, 0h 0l -> 1l 0h, 1h 0l
#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
permute##PERM(tmp1, ref(0)(S)(C)); \
exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
INTO = tmp2;
#define LOAD_CHI_SETUP(DIR,F) \
g = F; \
direction = st._directions[DIR]; \
distance = st._distances[DIR]; \
sl = st._grid->_simd_layout[direction]; \
inplace_twist = 0; \
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
if(sl == 1){ \
g = (F+1) % 2; \
}else{ \
inplace_twist = 1; \
} \
}
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
{const SiteSpinor & ref (in._odata[offset]); \ {const SiteSpinor & ref (in._odata[offset]); \
LOAD_CHI_SETUP(DIR,F); \ Chimu_00=ref()(0)(0);\
if(!inplace_twist){ \ Chimu_01=ref()(0)(1);\
LOAD_CHIMU_BODY(g); \ Chimu_02=ref()(0)(2);\
}else{ \ Chimu_10=ref()(1)(0);\
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \ Chimu_11=ref()(1)(1);\
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \ Chimu_12=ref()(1)(2);\
DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \ Chimu_20=ref()(2)(0);\
DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \ Chimu_21=ref()(2)(1);\
DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \ Chimu_22=ref()(2)(2);\
DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \ Chimu_30=ref()(3)(0);\
DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \ Chimu_31=ref()(3)(1);\
DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \ Chimu_32=ref()(3)(2);}
DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
}else{ \
DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
} \
} \
}
#define LOAD_CHI\
#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
{const SiteHalfSpinor &ref(buf[offset]); \ {const SiteHalfSpinor &ref(buf[offset]); \
LOAD_CHI_SETUP(DIR,F); \ Chi_00 = ref()(0)(0);\
if(!inplace_twist){ \ Chi_01 = ref()(0)(1);\
LOAD_CHI_BODY(g); \ Chi_02 = ref()(0)(2);\
}else{ \ Chi_10 = ref()(1)(0);\
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \ Chi_11 = ref()(1)(1);\
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \ Chi_12 = ref()(1)(2);}
DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
}else{ \
DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
} \
} \
}
#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
// To splat or not to splat depends on the implementation // To splat or not to splat depends on the implementation
#define MULT_2SPIN_BODY \ #define MULT_2SPIN(A)\
{auto & ref(U._odata[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \ Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \ Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \ Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -197,14 +83,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
UChi_01+= U_10*Chi_02;\ UChi_01+= U_10*Chi_02;\
UChi_11+= U_10*Chi_12;\ UChi_11+= U_10*Chi_12;\
UChi_02+= U_20*Chi_02;\ UChi_02+= U_20*Chi_02;\
UChi_12+= U_20*Chi_12 UChi_12+= U_20*Chi_12;}
#define MULT_2SPIN(A,F) \
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
#define MULT_2SPIN_GPARITY(A,F) \
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
#define PERMUTE_DIR(dir) \ #define PERMUTE_DIR(dir) \
@ -428,87 +307,84 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
result_31-= UChi_11; \ result_31-= UChi_11; \
result_32-= UChi_12; result_32-= UChi_12;
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ #define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \ offset = SE->_offset; \
local = SE->_is_local; \ local = SE->_is_local; \
perm = SE->_permute; \ perm = SE->_permute; \
if ( local ) { \ if ( local ) { \
LOAD_CHIMU_IMPL(DIR,F,PERM); \ LOAD_CHIMU; \
PROJ; \ PROJ; \
if ( perm) { \ if ( perm) { \
PERMUTE_DIR(PERM); \ PERMUTE_DIR(PERM); \
} \ } \
} else { \ } else { \
LOAD_CHI_IMPL(DIR,F,PERM); \ LOAD_CHI; \
} \ } \
MULT_2SPIN_IMPL(DIR,F); \ MULT_2SPIN(DIR); \
RECON; RECON;
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \ SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \ offset = SE->_offset; \
local = SE->_is_local; \ local = SE->_is_local; \
perm = SE->_permute; \ perm = SE->_permute; \
if ( local ) { \ if ( local ) { \
LOAD_CHIMU_IMPL(DIR,F,PERM); \ LOAD_CHIMU; \
PROJ; \ PROJ; \
if ( perm) { \ if ( perm) { \
PERMUTE_DIR(PERM); \ PERMUTE_DIR(PERM); \
} \ } \
} else if ( st.same_node[DIR] ) { \ } else if ( st.same_node[DIR] ) { \
LOAD_CHI_IMPL(DIR,F,PERM); \ LOAD_CHI; \
} \ } \
if (local || st.same_node[DIR] ) { \ if (local || st.same_node[DIR] ) { \
MULT_2SPIN_IMPL(DIR,F); \ MULT_2SPIN(DIR); \
RECON; \ RECON; \
} }
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ #define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \ offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \ if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
LOAD_CHI_IMPL(DIR,F,PERM); \ LOAD_CHI; \
MULT_2SPIN_IMPL(DIR,F); \ MULT_2SPIN(DIR); \
RECON; \ RECON; \
nmu++; \ nmu++; \
} }
#define HAND_RESULT(ss,F) \ #define HAND_RESULT(ss) \
{ \ { \
SiteSpinor & ref (out._odata[ss]); \ SiteSpinor & ref (out._odata[ss]); \
vstream(ref(F)(0)(0),result_00); \ vstream(ref()(0)(0),result_00); \
vstream(ref(F)(0)(1),result_01); \ vstream(ref()(0)(1),result_01); \
vstream(ref(F)(0)(2),result_02); \ vstream(ref()(0)(2),result_02); \
vstream(ref(F)(1)(0),result_10); \ vstream(ref()(1)(0),result_10); \
vstream(ref(F)(1)(1),result_11); \ vstream(ref()(1)(1),result_11); \
vstream(ref(F)(1)(2),result_12); \ vstream(ref()(1)(2),result_12); \
vstream(ref(F)(2)(0),result_20); \ vstream(ref()(2)(0),result_20); \
vstream(ref(F)(2)(1),result_21); \ vstream(ref()(2)(1),result_21); \
vstream(ref(F)(2)(2),result_22); \ vstream(ref()(2)(2),result_22); \
vstream(ref(F)(3)(0),result_30); \ vstream(ref()(3)(0),result_30); \
vstream(ref(F)(3)(1),result_31); \ vstream(ref()(3)(1),result_31); \
vstream(ref(F)(3)(2),result_32); \ vstream(ref()(3)(2),result_32); \
} }
#define HAND_RESULT_EXT(ss,F) \ #define HAND_RESULT_EXT(ss) \
if (nmu){ \ if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \ SiteSpinor & ref (out._odata[ss]); \
ref(F)(0)(0)+=result_00; \ ref()(0)(0)+=result_00; \
ref(F)(0)(1)+=result_01; \ ref()(0)(1)+=result_01; \
ref(F)(0)(2)+=result_02; \ ref()(0)(2)+=result_02; \
ref(F)(1)(0)+=result_10; \ ref()(1)(0)+=result_10; \
ref(F)(1)(1)+=result_11; \ ref()(1)(1)+=result_11; \
ref(F)(1)(2)+=result_12; \ ref()(1)(2)+=result_12; \
ref(F)(2)(0)+=result_20; \ ref()(2)(0)+=result_20; \
ref(F)(2)(1)+=result_21; \ ref()(2)(1)+=result_21; \
ref(F)(2)(2)+=result_22; \ ref()(2)(2)+=result_22; \
ref(F)(3)(0)+=result_30; \ ref()(3)(0)+=result_30; \
ref(F)(3)(1)+=result_31; \ ref()(3)(1)+=result_31; \
ref(F)(3)(2)+=result_32; \ ref()(3)(2)+=result_32; \
} }
@ -587,18 +463,15 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
int offset,local,perm, ptype; int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss);
HAND_RESULT(ss,F)
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
} }
template<class Impl> template<class Impl>
@ -613,18 +486,15 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
StencilEntry *SE; StencilEntry *SE;
int offset,local,perm, ptype; int offset,local,perm, ptype;
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss);
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
} }
template<class Impl> void template<class Impl> void
@ -639,20 +509,16 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
int offset,local,perm, ptype; int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
ZERO_RESULT;
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
ZERO_RESULT; \ HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
} }
template<class Impl> template<class Impl>
@ -666,20 +532,16 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
StencilEntry *SE; StencilEntry *SE;
int offset,local,perm, ptype; int offset,local,perm, ptype;
ZERO_RESULT;
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
ZERO_RESULT; \ HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
} }
template<class Impl> void template<class Impl> void
@ -695,20 +557,16 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
int offset,local,perm, ptype; int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
int nmu=0; int nmu=0;
ZERO_RESULT;
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
ZERO_RESULT; \ HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT_EXT(ss);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
} }
template<class Impl> template<class Impl>
@ -723,193 +581,18 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
StencilEntry *SE; StencilEntry *SE;
int offset,local,perm, ptype; int offset,local,perm, ptype;
int nmu=0; int nmu=0;
ZERO_RESULT;
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
ZERO_RESULT; \ HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT_EXT(ss);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
} }
////////////////////////////////////////////////
// Specialise Gparity to simple implementation
////////////////////////////////////////////////
#define HAND_SPECIALISE_EMPTY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
#define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
int nmu=0; \
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
int nmu=0; \
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
}
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
////////////// Wilson ; uses this implementation ///////////////////// ////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \ #define INSTANTIATE_THEM(A) \
@ -930,8 +613,6 @@ INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD); INSTANTIATE_THEM(WilsonImplD);
INSTANTIATE_THEM(ZWilsonImplF); INSTANTIATE_THEM(ZWilsonImplF);
INSTANTIATE_THEM(ZWilsonImplD); INSTANTIATE_THEM(ZWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(DomainWallVec5dImplF); INSTANTIATE_THEM(DomainWallVec5dImplF);
INSTANTIATE_THEM(DomainWallVec5dImplD); INSTANTIATE_THEM(DomainWallVec5dImplD);
INSTANTIATE_THEM(ZDomainWallVec5dImplF); INSTANTIATE_THEM(ZDomainWallVec5dImplF);
@ -940,12 +621,11 @@ INSTANTIATE_THEM(WilsonImplFH);
INSTANTIATE_THEM(WilsonImplDF); INSTANTIATE_THEM(WilsonImplDF);
INSTANTIATE_THEM(ZWilsonImplFH); INSTANTIATE_THEM(ZWilsonImplFH);
INSTANTIATE_THEM(ZWilsonImplDF); INSTANTIATE_THEM(ZWilsonImplDF);
INSTANTIATE_THEM(GparityWilsonImplFH);
INSTANTIATE_THEM(GparityWilsonImplDF);
INSTANTIATE_THEM(DomainWallVec5dImplFH); INSTANTIATE_THEM(DomainWallVec5dImplFH);
INSTANTIATE_THEM(DomainWallVec5dImplDF); INSTANTIATE_THEM(DomainWallVec5dImplDF);
INSTANTIATE_THEM(ZDomainWallVec5dImplFH); INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
INSTANTIATE_THEM(ZDomainWallVec5dImplDF); INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF); INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF);
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD); INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD);
}} }}

View File

@ -0,0 +1,878 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsHand.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#define REGISTER
#define LOAD_CHIMU_BODY(F) \
Chimu_00=ref(F)(0)(0); \
Chimu_01=ref(F)(0)(1); \
Chimu_02=ref(F)(0)(2); \
Chimu_10=ref(F)(1)(0); \
Chimu_11=ref(F)(1)(1); \
Chimu_12=ref(F)(1)(2); \
Chimu_20=ref(F)(2)(0); \
Chimu_21=ref(F)(2)(1); \
Chimu_22=ref(F)(2)(2); \
Chimu_30=ref(F)(3)(0); \
Chimu_31=ref(F)(3)(1); \
Chimu_32=ref(F)(3)(2)
#define LOAD_CHIMU(DIR,F,PERM) \
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
#define LOAD_CHI_BODY(F) \
Chi_00 = ref(F)(0)(0);\
Chi_01 = ref(F)(0)(1);\
Chi_02 = ref(F)(0)(2);\
Chi_10 = ref(F)(1)(0);\
Chi_11 = ref(F)(1)(1);\
Chi_12 = ref(F)(1)(2)
#define LOAD_CHI(DIR,F,PERM) \
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
//G-parity implementations using in-place intrinsic ops
//1l 1h -> 1h 1l
//0l 0h , 1h 1l -> 0l 1h 0h,1l
//0h,1l -> 1l,0h
//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
//Pulled fermion through forwards face, GPBC on upper component
//Need 0= 0l 1h 1= 1l 0h
//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
//Pulled fermion through backwards face, GPBC on lower component
//Need 0= 1l 0h 1= 0l 1h
//1l 1h -> 1h 1l
//0l 0h , 1h 1l -> 0l 1h 0h,1l
#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
permute##PERM(tmp1, ref(1)(S)(C)); \
exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
INTO = tmp2;
//0l 0h -> 0h 0l
//1l 1h, 0h 0l -> 1l 0h, 1h 0l
#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
permute##PERM(tmp1, ref(0)(S)(C)); \
exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
INTO = tmp2;
#define LOAD_CHI_SETUP(DIR,F) \
g = F; \
direction = st._directions[DIR]; \
distance = st._distances[DIR]; \
sl = st._grid->_simd_layout[direction]; \
inplace_twist = 0; \
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
if(sl == 1){ \
g = (F+1) % 2; \
}else{ \
inplace_twist = 1; \
} \
}
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
{ const SiteSpinor &ref(in._odata[offset]); \
LOAD_CHI_SETUP(DIR,F); \
if(!inplace_twist){ \
LOAD_CHIMU_BODY(g); \
}else{ \
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
}else{ \
DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
} \
} \
}
#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
{ const SiteHalfSpinor &ref(buf[offset]); \
LOAD_CHI_SETUP(DIR,F); \
if(!inplace_twist){ \
LOAD_CHI_BODY(g); \
}else{ \
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
}else{ \
DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
} \
} \
}
#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
// To splat or not to splat depends on the implementation
#define MULT_2SPIN_BODY \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
Impl::loadLinkElement(U_01,ref()(0,1)); \
Impl::loadLinkElement(U_11,ref()(1,1)); \
Impl::loadLinkElement(U_21,ref()(2,1)); \
UChi_00 = U_00*Chi_00; \
UChi_10 = U_00*Chi_10; \
UChi_01 = U_10*Chi_00; \
UChi_11 = U_10*Chi_10; \
UChi_02 = U_20*Chi_00; \
UChi_12 = U_20*Chi_10; \
UChi_00+= U_01*Chi_01; \
UChi_10+= U_01*Chi_11; \
UChi_01+= U_11*Chi_01; \
UChi_11+= U_11*Chi_11; \
UChi_02+= U_21*Chi_01; \
UChi_12+= U_21*Chi_11; \
Impl::loadLinkElement(U_00,ref()(0,2)); \
Impl::loadLinkElement(U_10,ref()(1,2)); \
Impl::loadLinkElement(U_20,ref()(2,2)); \
UChi_00+= U_00*Chi_02; \
UChi_10+= U_00*Chi_12; \
UChi_01+= U_10*Chi_02; \
UChi_11+= U_10*Chi_12; \
UChi_02+= U_20*Chi_02; \
UChi_12+= U_20*Chi_12
#define MULT_2SPIN(A,F) \
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
#define MULT_2SPIN_GPARITY(A,F) \
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
#define PERMUTE_DIR(dir) \
permute##dir(Chi_00,Chi_00);\
permute##dir(Chi_01,Chi_01);\
permute##dir(Chi_02,Chi_02);\
permute##dir(Chi_10,Chi_10);\
permute##dir(Chi_11,Chi_11);\
permute##dir(Chi_12,Chi_12);
// hspin(0)=fspin(0)+timesI(fspin(3));
// hspin(1)=fspin(1)+timesI(fspin(2));
#define XP_PROJ \
Chi_00 = Chimu_00+timesI(Chimu_30);\
Chi_01 = Chimu_01+timesI(Chimu_31);\
Chi_02 = Chimu_02+timesI(Chimu_32);\
Chi_10 = Chimu_10+timesI(Chimu_20);\
Chi_11 = Chimu_11+timesI(Chimu_21);\
Chi_12 = Chimu_12+timesI(Chimu_22);
#define YP_PROJ \
Chi_00 = Chimu_00-Chimu_30;\
Chi_01 = Chimu_01-Chimu_31;\
Chi_02 = Chimu_02-Chimu_32;\
Chi_10 = Chimu_10+Chimu_20;\
Chi_11 = Chimu_11+Chimu_21;\
Chi_12 = Chimu_12+Chimu_22;
#define ZP_PROJ \
Chi_00 = Chimu_00+timesI(Chimu_20); \
Chi_01 = Chimu_01+timesI(Chimu_21); \
Chi_02 = Chimu_02+timesI(Chimu_22); \
Chi_10 = Chimu_10-timesI(Chimu_30); \
Chi_11 = Chimu_11-timesI(Chimu_31); \
Chi_12 = Chimu_12-timesI(Chimu_32);
#define TP_PROJ \
Chi_00 = Chimu_00+Chimu_20; \
Chi_01 = Chimu_01+Chimu_21; \
Chi_02 = Chimu_02+Chimu_22; \
Chi_10 = Chimu_10+Chimu_30; \
Chi_11 = Chimu_11+Chimu_31; \
Chi_12 = Chimu_12+Chimu_32;
// hspin(0)=fspin(0)-timesI(fspin(3));
// hspin(1)=fspin(1)-timesI(fspin(2));
#define XM_PROJ \
Chi_00 = Chimu_00-timesI(Chimu_30);\
Chi_01 = Chimu_01-timesI(Chimu_31);\
Chi_02 = Chimu_02-timesI(Chimu_32);\
Chi_10 = Chimu_10-timesI(Chimu_20);\
Chi_11 = Chimu_11-timesI(Chimu_21);\
Chi_12 = Chimu_12-timesI(Chimu_22);
#define YM_PROJ \
Chi_00 = Chimu_00+Chimu_30;\
Chi_01 = Chimu_01+Chimu_31;\
Chi_02 = Chimu_02+Chimu_32;\
Chi_10 = Chimu_10-Chimu_20;\
Chi_11 = Chimu_11-Chimu_21;\
Chi_12 = Chimu_12-Chimu_22;
#define ZM_PROJ \
Chi_00 = Chimu_00-timesI(Chimu_20); \
Chi_01 = Chimu_01-timesI(Chimu_21); \
Chi_02 = Chimu_02-timesI(Chimu_22); \
Chi_10 = Chimu_10+timesI(Chimu_30); \
Chi_11 = Chimu_11+timesI(Chimu_31); \
Chi_12 = Chimu_12+timesI(Chimu_32);
#define TM_PROJ \
Chi_00 = Chimu_00-Chimu_20; \
Chi_01 = Chimu_01-Chimu_21; \
Chi_02 = Chimu_02-Chimu_22; \
Chi_10 = Chimu_10-Chimu_30; \
Chi_11 = Chimu_11-Chimu_31; \
Chi_12 = Chimu_12-Chimu_32;
// fspin(0)=hspin(0);
// fspin(1)=hspin(1);
// fspin(2)=timesMinusI(hspin(1));
// fspin(3)=timesMinusI(hspin(0));
#define XP_RECON\
result_00 = UChi_00;\
result_01 = UChi_01;\
result_02 = UChi_02;\
result_10 = UChi_10;\
result_11 = UChi_11;\
result_12 = UChi_12;\
result_20 = timesMinusI(UChi_10);\
result_21 = timesMinusI(UChi_11);\
result_22 = timesMinusI(UChi_12);\
result_30 = timesMinusI(UChi_00);\
result_31 = timesMinusI(UChi_01);\
result_32 = timesMinusI(UChi_02);
#define XP_RECON_ACCUM\
result_00+=UChi_00;\
result_01+=UChi_01;\
result_02+=UChi_02;\
result_10+=UChi_10;\
result_11+=UChi_11;\
result_12+=UChi_12;\
result_20-=timesI(UChi_10);\
result_21-=timesI(UChi_11);\
result_22-=timesI(UChi_12);\
result_30-=timesI(UChi_00);\
result_31-=timesI(UChi_01);\
result_32-=timesI(UChi_02);
#define XM_RECON\
result_00 = UChi_00;\
result_01 = UChi_01;\
result_02 = UChi_02;\
result_10 = UChi_10;\
result_11 = UChi_11;\
result_12 = UChi_12;\
result_20 = timesI(UChi_10);\
result_21 = timesI(UChi_11);\
result_22 = timesI(UChi_12);\
result_30 = timesI(UChi_00);\
result_31 = timesI(UChi_01);\
result_32 = timesI(UChi_02);
#define XM_RECON_ACCUM\
result_00+= UChi_00;\
result_01+= UChi_01;\
result_02+= UChi_02;\
result_10+= UChi_10;\
result_11+= UChi_11;\
result_12+= UChi_12;\
result_20+= timesI(UChi_10);\
result_21+= timesI(UChi_11);\
result_22+= timesI(UChi_12);\
result_30+= timesI(UChi_00);\
result_31+= timesI(UChi_01);\
result_32+= timesI(UChi_02);
#define YP_RECON_ACCUM\
result_00+= UChi_00;\
result_01+= UChi_01;\
result_02+= UChi_02;\
result_10+= UChi_10;\
result_11+= UChi_11;\
result_12+= UChi_12;\
result_20+= UChi_10;\
result_21+= UChi_11;\
result_22+= UChi_12;\
result_30-= UChi_00;\
result_31-= UChi_01;\
result_32-= UChi_02;
#define YM_RECON_ACCUM\
result_00+= UChi_00;\
result_01+= UChi_01;\
result_02+= UChi_02;\
result_10+= UChi_10;\
result_11+= UChi_11;\
result_12+= UChi_12;\
result_20-= UChi_10;\
result_21-= UChi_11;\
result_22-= UChi_12;\
result_30+= UChi_00;\
result_31+= UChi_01;\
result_32+= UChi_02;
#define ZP_RECON_ACCUM\
result_00+= UChi_00;\
result_01+= UChi_01;\
result_02+= UChi_02;\
result_10+= UChi_10;\
result_11+= UChi_11;\
result_12+= UChi_12;\
result_20-= timesI(UChi_00); \
result_21-= timesI(UChi_01); \
result_22-= timesI(UChi_02); \
result_30+= timesI(UChi_10); \
result_31+= timesI(UChi_11); \
result_32+= timesI(UChi_12);
#define ZM_RECON_ACCUM\
result_00+= UChi_00;\
result_01+= UChi_01;\
result_02+= UChi_02;\
result_10+= UChi_10;\
result_11+= UChi_11;\
result_12+= UChi_12;\
result_20+= timesI(UChi_00); \
result_21+= timesI(UChi_01); \
result_22+= timesI(UChi_02); \
result_30-= timesI(UChi_10); \
result_31-= timesI(UChi_11); \
result_32-= timesI(UChi_12);
#define TP_RECON_ACCUM\
result_00+= UChi_00;\
result_01+= UChi_01;\
result_02+= UChi_02;\
result_10+= UChi_10;\
result_11+= UChi_11;\
result_12+= UChi_12;\
result_20+= UChi_00; \
result_21+= UChi_01; \
result_22+= UChi_02; \
result_30+= UChi_10; \
result_31+= UChi_11; \
result_32+= UChi_12;
#define TM_RECON_ACCUM\
result_00+= UChi_00;\
result_01+= UChi_01;\
result_02+= UChi_02;\
result_10+= UChi_10;\
result_11+= UChi_11;\
result_12+= UChi_12;\
result_20-= UChi_00; \
result_21-= UChi_01; \
result_22-= UChi_02; \
result_30-= UChi_10; \
result_31-= UChi_11; \
result_32-= UChi_12;
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHIMU_IMPL(DIR,F,PERM); \
PROJ; \
if ( perm) { \
PERMUTE_DIR(PERM); \
} \
} else { \
LOAD_CHI_IMPL(DIR,F,PERM); \
} \
MULT_2SPIN_IMPL(DIR,F); \
RECON;
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHIMU_IMPL(DIR,F,PERM); \
PROJ; \
if ( perm) { \
PERMUTE_DIR(PERM); \
} \
} else if ( st.same_node[DIR] ) { \
LOAD_CHI_IMPL(DIR,F,PERM); \
} \
if (local || st.same_node[DIR] ) { \
MULT_2SPIN_IMPL(DIR,F); \
RECON; \
}
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
LOAD_CHI_IMPL(DIR,F,PERM); \
MULT_2SPIN_IMPL(DIR,F); \
RECON; \
nmu++; \
}
#define HAND_RESULT(ss,F) \
{ \
SiteSpinor & ref (out._odata[ss]); \
vstream(ref(F)(0)(0),result_00); \
vstream(ref(F)(0)(1),result_01); \
vstream(ref(F)(0)(2),result_02); \
vstream(ref(F)(1)(0),result_10); \
vstream(ref(F)(1)(1),result_11); \
vstream(ref(F)(1)(2),result_12); \
vstream(ref(F)(2)(0),result_20); \
vstream(ref(F)(2)(1),result_21); \
vstream(ref(F)(2)(2),result_22); \
vstream(ref(F)(3)(0),result_30); \
vstream(ref(F)(3)(1),result_31); \
vstream(ref(F)(3)(2),result_32); \
}
#define HAND_RESULT_EXT(ss,F) \
if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \
ref(F)(0)(0)+=result_00; \
ref(F)(0)(1)+=result_01; \
ref(F)(0)(2)+=result_02; \
ref(F)(1)(0)+=result_10; \
ref(F)(1)(1)+=result_11; \
ref(F)(1)(2)+=result_12; \
ref(F)(2)(0)+=result_20; \
ref(F)(2)(1)+=result_21; \
ref(F)(2)(2)+=result_22; \
ref(F)(3)(0)+=result_30; \
ref(F)(3)(1)+=result_31; \
ref(F)(3)(2)+=result_32; \
}
#define HAND_DECLARATIONS(a) \
Simd result_00; \
Simd result_01; \
Simd result_02; \
Simd result_10; \
Simd result_11; \
Simd result_12; \
Simd result_20; \
Simd result_21; \
Simd result_22; \
Simd result_30; \
Simd result_31; \
Simd result_32; \
Simd Chi_00; \
Simd Chi_01; \
Simd Chi_02; \
Simd Chi_10; \
Simd Chi_11; \
Simd Chi_12; \
Simd UChi_00; \
Simd UChi_01; \
Simd UChi_02; \
Simd UChi_10; \
Simd UChi_11; \
Simd UChi_12; \
Simd U_00; \
Simd U_10; \
Simd U_20; \
Simd U_01; \
Simd U_11; \
Simd U_21;
#define ZERO_RESULT \
result_00=zero; \
result_01=zero; \
result_02=zero; \
result_10=zero; \
result_11=zero; \
result_12=zero; \
result_20=zero; \
result_21=zero; \
result_22=zero; \
result_30=zero; \
result_31=zero; \
result_32=zero;
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
#define Chimu_02 Chi_02
#define Chimu_10 Chi_10
#define Chimu_11 Chi_11
#define Chimu_12 Chi_12
#define Chimu_20 UChi_00
#define Chimu_21 UChi_01
#define Chimu_22 UChi_02
#define Chimu_30 UChi_10
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
namespace Grid {
namespace QCD {
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
int nmu=0;
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
int nmu=0;
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
#define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
int nmu=0; \
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; \
\
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
int nmu=0; \
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
}
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplFH);
INSTANTIATE_THEM(GparityWilsonImplDF);
}}

View File

@ -48,6 +48,22 @@ with this program; if not, write to the Free Software Foundation, Inc.,
} \ } \
} }
#define RegisterLoadCheckPointerMetadataFunction(NAME) \
template < class Metadata > \
void Load##NAME##Checkpointer(const CheckpointerParameters& Params_, const Metadata& M_) { \
if (!have_CheckPointer) { \
std::cout << GridLogDebug << "Loading Metadata Checkpointer " << #NAME \
<< std::endl; \
CP = std::unique_ptr<CheckpointerBaseModule>( \
new NAME##CPModule<ImplementationPolicy, Metadata >(Params_, M_)); \
have_CheckPointer = true; \
} else { \
std::cout << GridLogError << "Checkpointer already loaded " \
<< std::endl; \
exit(1); \
} \
}
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
@ -77,7 +93,7 @@ class HMCResourceManager {
bool have_CheckPointer; bool have_CheckPointer;
// NOTE: operator << is not overloaded for std::vector<string> // NOTE: operator << is not overloaded for std::vector<string>
// so thsi function is necessary // so this function is necessary
void output_vector_string(const std::vector<std::string> &vs){ void output_vector_string(const std::vector<std::string> &vs){
for (auto &i: vs) for (auto &i: vs)
std::cout << i << " "; std::cout << i << " ";
@ -254,6 +270,7 @@ class HMCResourceManager {
RegisterLoadCheckPointerFunction(Nersc); RegisterLoadCheckPointerFunction(Nersc);
#ifdef HAVE_LIME #ifdef HAVE_LIME
RegisterLoadCheckPointerFunction(ILDG); RegisterLoadCheckPointerFunction(ILDG);
RegisterLoadCheckPointerMetadataFunction(Scidac);
#endif #endif
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////

View File

@ -76,6 +76,14 @@ class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
} }
} }
void check_filename(const std::string &filename){
std::ifstream f(filename.c_str());
if(!f.good()){
std::cout << GridLogError << "Filename " << filename << " not found. Aborting. " << std::endl;
abort();
};
}
virtual void initialize(const CheckpointerParameters &Params) = 0; virtual void initialize(const CheckpointerParameters &Params) = 0;
virtual void CheckpointRestore(int traj, typename Impl::Field &U, virtual void CheckpointRestore(int traj, typename Impl::Field &U,

View File

@ -93,6 +93,9 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer<Impl> {
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
this->check_filename(rng);
this->check_filename(config);
BinarySimpleMunger<sobj_double, sobj> munge; BinarySimpleMunger<sobj_double, sobj> munge;

View File

@ -136,6 +136,20 @@ class ILDGCPModule: public CheckPointerModule< ImplementationPolicy> {
}; };
template<class ImplementationPolicy, class Metadata>
class ScidacCPModule: public CheckPointerModule< ImplementationPolicy> {
typedef CheckPointerModule< ImplementationPolicy> CPBase;
Metadata M;
// acquire resource
virtual void initialize(){
this->CheckPointPtr.reset(new ScidacHmcCheckpointer<ImplementationPolicy, Metadata>(this->Par_, M));
}
public:
ScidacCPModule(typename CPBase::APar Par, Metadata M_):M(M_), CPBase(Par) {}
template <class ReaderClass>
ScidacCPModule(Reader<ReaderClass>& Reader) : Parametrized<typename CPBase::APar>(Reader), M(Reader){};
};
#endif #endif

View File

@ -34,6 +34,7 @@ directory
#include <Grid/qcd/hmc/checkpointers/NerscCheckpointer.h> #include <Grid/qcd/hmc/checkpointers/NerscCheckpointer.h>
#include <Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h> #include <Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h>
#include <Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h> #include <Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h>
#include <Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h>
//#include <Grid/qcd/hmc/checkpointers/CheckPointerModules.h> //#include <Grid/qcd/hmc/checkpointers/CheckPointerModules.h>

View File

@ -74,10 +74,10 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
if ((traj % Params.saveInterval) == 0) { if ((traj % Params.saveInterval) == 0) {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
GridBase *grid = U._grid;
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
IldgWriter _IldgWriter; IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(config); _IldgWriter.open(config);
_IldgWriter.writeConfiguration(U, traj, config, config); _IldgWriter.writeConfiguration(U, traj, config, config);
_IldgWriter.close(); _IldgWriter.close();
@ -95,6 +95,10 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
GridParallelRNG &pRNG) { GridParallelRNG &pRNG) {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
this->check_filename(rng);
this->check_filename(config);
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);

View File

@ -69,6 +69,9 @@ class NerscHmcCheckpointer : public BaseHmcCheckpointer<Gimpl> {
GridParallelRNG &pRNG) { GridParallelRNG &pRNG) {
std::string config, rng; std::string config, rng;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, rng);
this->check_filename(rng);
this->check_filename(config);
FieldMetaData header; FieldMetaData header;
NerscIO::readRNGState(sRNG, pRNG, header, rng); NerscIO::readRNGState(sRNG, pRNG, header, rng);

View File

@ -0,0 +1,125 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/ScidacCheckpointer.h
Copyright (C) 2018
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef SCIDAC_CHECKPOINTER
#define SCIDAC_CHECKPOINTER
#ifdef HAVE_LIME
#include <iostream>
#include <sstream>
#include <string>
namespace Grid {
namespace QCD {
// For generic fields
template <class Implementation, class Metadata>
class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
private:
CheckpointerParameters Params;
Metadata MData;
typedef typename Implementation::Field Field;
public:
//INHERIT_GIMPL_TYPES(Implementation);
ScidacHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
ScidacHmcCheckpointer(const CheckpointerParameters &Params_, const Metadata& M_):MData(M_) { initialize(Params_); }
void initialize(const CheckpointerParameters &Params_) {
Params = Params_;
// check here that the format is valid
int ieee32big = (Params.format == std::string("IEEE32BIG"));
int ieee32 = (Params.format == std::string("IEEE32"));
int ieee64big = (Params.format == std::string("IEEE64BIG"));
int ieee64 = (Params.format == std::string("IEEE64"));
if (!(ieee64big || ieee32 || ieee32big || ieee64)) {
std::cout << GridLogError << "Unrecognized file format " << Params.format
<< std::endl;
std::cout << GridLogError
<< "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
<< std::endl;
exit(1);
}
}
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U._grid;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(config);
_ScidacWriter.writeScidacFieldRecord(U, MData);
_ScidacWriter.close();
std::cout << GridLogMessage << "Written Scidac Configuration on " << config
<< " checksum " << std::hex << nersc_csum<<"/"
<< scidac_csuma<<"/" << scidac_csumb
<< std::dec << std::endl;
}
};
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
this->check_filename(rng);
this->check_filename(config);
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
Metadata md_content;
ScidacReader _ScidacReader;
_ScidacReader.open(config);
_ScidacReader.readScidacFieldRecord(U,md_content); // format from the header
_ScidacReader.close();
std::cout << GridLogMessage << "Read Scidac Configuration from " << config
<< " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
};
};
}
}
#endif // HAVE_LIME
#endif // ILDG_CHECKPOINTER

View File

@ -115,17 +115,25 @@ class Integrator {
// Fundamental updates, include smearing // Fundamental updates, include smearing
for (int a = 0; a < as[level].actions.size(); ++a) { for (int a = 0; a < as[level].actions.size(); ++a) {
double start_full = usecond();
Field force(U._grid); Field force(U._grid);
conformable(U._grid, Mom._grid); conformable(U._grid, Mom._grid);
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl; std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
Real force_abs = std::sqrt(norm2(force)/U._grid->gSites()); Real force_abs = std::sqrt(norm2(force)/U._grid->gSites());
std::cout << GridLogIntegrator << "Force average: " << force_abs << std::endl; std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
Mom -= force * ep; Mom -= force * ep;
double end_full = usecond();
double time_full = (end_full - start_full) / 1e3;
double time_force = (end_force - start_force) / 1e3;
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
} }
// Force from the other representations // Force from the other representations

View File

@ -6,13 +6,16 @@
#ifndef GAUGE_CONFIG_ #ifndef GAUGE_CONFIG_
#define GAUGE_CONFIG_ #define GAUGE_CONFIG_
namespace Grid { namespace Grid
{
namespace QCD { namespace QCD
{
//trivial class for no smearing //trivial class for no smearing
template <class Impl> template <class Impl>
class NoSmearing { class NoSmearing
{
public: public:
INHERIT_FIELD_TYPES(Impl); INHERIT_FIELD_TYPES(Impl);
@ -26,10 +29,10 @@ public:
Field &get_SmearedU() { return *ThinField; } Field &get_SmearedU() { return *ThinField; }
Field& get_U(bool smeared = false) { Field &get_U(bool smeared = false)
{
return *ThinField; return *ThinField;
} }
}; };
/*! /*!
@ -44,7 +47,8 @@ public:
It stores a list of smeared configurations. It stores a list of smeared configurations.
*/ */
template <class Gimpl> template <class Gimpl>
class SmearedConfiguration { class SmearedConfiguration
{
public: public:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -55,7 +59,8 @@ class SmearedConfiguration {
// Member functions // Member functions
//==================================================================== //====================================================================
void fill_smearedSet(GaugeField& U) { void fill_smearedSet(GaugeField &U)
{
ThinLinks = &U; // attach the smearing routine to the field U ThinLinks = &U; // attach the smearing routine to the field U
// check the pointer is not null // check the pointer is not null
@ -63,13 +68,15 @@ class SmearedConfiguration {
std::cout << GridLogError std::cout << GridLogError
<< "[SmearedConfiguration] Error in ThinLinks pointer\n"; << "[SmearedConfiguration] Error in ThinLinks pointer\n";
if (smearingLevels > 0) { if (smearingLevels > 0)
{
std::cout << GridLogDebug std::cout << GridLogDebug
<< "[SmearedConfiguration] Filling SmearedSet\n"; << "[SmearedConfiguration] Filling SmearedSet\n";
GaugeField previous_u(ThinLinks->_grid); GaugeField previous_u(ThinLinks->_grid);
previous_u = *ThinLinks; previous_u = *ThinLinks;
for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) { for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl)
{
StoutSmearing.smear(SmearedSet[smearLvl], previous_u); StoutSmearing.smear(SmearedSet[smearLvl], previous_u);
previous_u = SmearedSet[smearLvl]; previous_u = SmearedSet[smearLvl];
@ -82,7 +89,8 @@ class SmearedConfiguration {
} }
//==================================================================== //====================================================================
GaugeField AnalyticSmearedForce(const GaugeField &SigmaKPrime, GaugeField AnalyticSmearedForce(const GaugeField &SigmaKPrime,
const GaugeField& GaugeK) const { const GaugeField &GaugeK) const
{
GridBase *grid = GaugeK._grid; GridBase *grid = GaugeK._grid;
GaugeField C(grid), SigmaK(grid), iLambda(grid); GaugeField C(grid), SigmaK(grid), iLambda(grid);
GaugeLinkField iLambda_mu(grid); GaugeLinkField iLambda_mu(grid);
@ -94,7 +102,8 @@ class SmearedConfiguration {
SigmaK = zero; SigmaK = zero;
iLambda = zero; iLambda = zero;
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++)
{
Cmu = peekLorentz(C, mu); Cmu = peekLorentz(C, mu);
GaugeKmu = peekLorentz(GaugeK, mu); GaugeKmu = peekLorentz(GaugeK, mu);
SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu); SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu);
@ -109,14 +118,16 @@ class SmearedConfiguration {
} }
/*! @brief Returns smeared configuration at level 'Level' */ /*! @brief Returns smeared configuration at level 'Level' */
const GaugeField& get_smeared_conf(int Level) const { const GaugeField &get_smeared_conf(int Level) const
{
return SmearedSet[Level]; return SmearedSet[Level];
} }
//==================================================================== //====================================================================
void set_iLambda(GaugeLinkField &iLambda, GaugeLinkField &e_iQ, void set_iLambda(GaugeLinkField &iLambda, GaugeLinkField &e_iQ,
const GaugeLinkField &iQ, const GaugeLinkField &Sigmap, const GaugeLinkField &iQ, const GaugeLinkField &Sigmap,
const GaugeLinkField& GaugeK) const { const GaugeLinkField &GaugeK) const
{
GridBase *grid = iQ._grid; GridBase *grid = iQ._grid;
GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid); GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid);
GaugeLinkField unity(grid); GaugeLinkField unity(grid);
@ -208,13 +219,13 @@ class SmearedConfiguration {
//==================================================================== //====================================================================
public: public:
GaugeField * GaugeField *
ThinLinks; /*!< @brief Pointer to the thin ThinLinks; /* Pointer to the thin links configuration */
links configuration */
/*! @brief Standard constructor */ /* Standard constructor */
SmearedConfiguration(GridCartesian *UGrid, unsigned int Nsmear, SmearedConfiguration(GridCartesian *UGrid, unsigned int Nsmear,
Smear_Stout<Gimpl> &Stout) Smear_Stout<Gimpl> &Stout)
: smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) { : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL)
{
for (unsigned int i = 0; i < smearingLevels; ++i) for (unsigned int i = 0; i < smearingLevels; ++i)
SmearedSet.push_back(*(new GaugeField(UGrid))); SmearedSet.push_back(*(new GaugeField(UGrid)));
} }
@ -223,21 +234,29 @@ class SmearedConfiguration {
SmearedConfiguration() SmearedConfiguration()
: smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {} : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
// attach the smeared routines to the thin links U and fill the smeared set // attach the smeared routines to the thin links U and fill the smeared set
void set_Field(GaugeField& U) { fill_smearedSet(U); } void set_Field(GaugeField &U)
{
double start = usecond();
fill_smearedSet(U);
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "Smearing in " << time << " ms" << std::endl;
}
//==================================================================== //====================================================================
void smeared_force(GaugeField& SigmaTilde) const { void smeared_force(GaugeField &SigmaTilde) const
if (smearingLevels > 0) { {
if (smearingLevels > 0)
{
double start = usecond();
GaugeField force = SigmaTilde; // actually = U*SigmaTilde GaugeField force = SigmaTilde; // actually = U*SigmaTilde
GaugeLinkField tmp_mu(SigmaTilde._grid); GaugeLinkField tmp_mu(SigmaTilde._grid);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++)
{
// to get just SigmaTilde // to get just SigmaTilde
tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * peekLorentz(force, mu);
peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu); pokeLorentz(force, tmp_mu, mu);
} }
@ -246,33 +265,43 @@ class SmearedConfiguration {
force = AnalyticSmearedForce(force, *ThinLinks); force = AnalyticSmearedForce(force, *ThinLinks);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++)
{
tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu); tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu);
pokeLorentz(SigmaTilde, tmp_mu, mu); pokeLorentz(SigmaTilde, tmp_mu, mu);
} }
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing } // if smearingLevels = 0 do nothing
} }
//==================================================================== //====================================================================
GaugeField &get_SmearedU() { return SmearedSet[smearingLevels - 1]; } GaugeField &get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
GaugeField& get_U(bool smeared = false) { GaugeField &get_U(bool smeared = false)
{
// get the config, thin links by default // get the config, thin links by default
if (smeared) { if (smeared)
if (smearingLevels) { {
if (smearingLevels)
{
RealD impl_plaq = RealD impl_plaq =
WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]); WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]);
std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq
<< std::endl; << std::endl;
return get_SmearedU(); return get_SmearedU();
}
} else { else
{
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks); RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
<< std::endl; << std::endl;
return *ThinLinks; return *ThinLinks;
} }
} else { }
else
{
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks); RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
<< std::endl; << std::endl;

View File

@ -173,7 +173,7 @@ void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
std::cout << "Time to evolve " << diff.count() << " s\n"; std::cout << "Time to evolve " << diff.count() << " s\n";
#endif #endif
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << step << " " << tau(step) << " "
<< energyDensityPlaquette(step,out) << std::endl; << energyDensityPlaquette(step,out) << std::endl;
if( step % measure_interval == 0){ if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
@ -193,7 +193,7 @@ void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, Re
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
evolve_step_adaptive(out, maxTau); evolve_step_adaptive(out, maxTau);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << step << " " << taus << " "
<< energyDensityPlaquette(out) << std::endl; << energyDensityPlaquette(out) << std::endl;
if( step % measure_interval == 0){ if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "

View File

@ -55,6 +55,11 @@ void Hdf5Writer::writeDefault(const std::string &s, const char *x)
writeDefault(s, sx); writeDefault(s, sx);
} }
Group & Hdf5Writer::getGroup(void)
{
return group_;
}
// Reader implementation /////////////////////////////////////////////////////// // Reader implementation ///////////////////////////////////////////////////////
Hdf5Reader::Hdf5Reader(const std::string &fileName) Hdf5Reader::Hdf5Reader(const std::string &fileName)
: fileName_(fileName) : fileName_(fileName)
@ -103,3 +108,8 @@ void Hdf5Reader::readDefault(const std::string &s, std::string &x)
x.resize(strType.getSize()); x.resize(strType.getSize());
attribute.read(strType, &(x[0])); attribute.read(strType, &(x[0]));
} }
Group & Hdf5Reader::getGroup(void)
{
return group_;
}

View File

@ -38,6 +38,7 @@ namespace Grid
template <typename U> template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
writeDefault(const std::string &s, const std::vector<U> &x); writeDefault(const std::string &s, const std::vector<U> &x);
H5NS::Group & getGroup(void);
private: private:
template <typename U> template <typename U>
void writeSingleAttribute(const U &x, const std::string &name, void writeSingleAttribute(const U &x, const std::string &name,
@ -65,6 +66,7 @@ namespace Grid
template <typename U> template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
readDefault(const std::string &s, std::vector<U> &x); readDefault(const std::string &s, std::vector<U> &x);
H5NS::Group & getGroup(void);
private: private:
template <typename U> template <typename U>
void readSingleAttribute(U &x, const std::string &name, void readSingleAttribute(U &x, const std::string &name,

View File

@ -30,6 +30,48 @@ namespace Grid {
typedef typename std::vector<std::vector<typename TensorToVec<T>::type>> type; typedef typename std::vector<std::vector<typename TensorToVec<T>::type>> type;
}; };
template <typename T>
void tensorDim(std::vector<size_t> &dim, const T &t, const bool wipe = true)
{
if (wipe)
{
dim.clear();
}
}
template <typename T>
void tensorDim(std::vector<size_t> &dim, const iScalar<T> &t, const bool wipe = true)
{
if (wipe)
{
dim.clear();
}
tensorDim(dim, t._internal, false);
}
template <typename T, int N>
void tensorDim(std::vector<size_t> &dim, const iVector<T, N> &t, const bool wipe = true)
{
if (wipe)
{
dim.clear();
}
dim.push_back(N);
tensorDim(dim, t._internal[0], false);
}
template <typename T, int N>
void tensorDim(std::vector<size_t> &dim, const iMatrix<T, N> &t, const bool wipe = true)
{
if (wipe)
{
dim.clear();
}
dim.push_back(N);
dim.push_back(N);
tensorDim(dim, t._internal[0][0], false);
}
template <typename T> template <typename T>
typename TensorToVec<T>::type tensorToVec(const T &t) typename TensorToVec<T>::type tensorToVec(const T &t)
{ {

View File

@ -31,6 +31,17 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
using namespace Grid; using namespace Grid;
using namespace std; using namespace std;
void Grid::xmlCheckParse(const pugi::xml_parse_result &result, const std::string name)
{
if (!result)
{
std::cerr << "XML parsing error for " << name << std::endl;
std::cerr << "XML error description: " << result.description() << std::endl;
std::cerr << "XML error offset : " << result.offset << std::endl;
abort();
}
}
// Writer implementation /////////////////////////////////////////////////////// // Writer implementation ///////////////////////////////////////////////////////
XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName) XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName)
{ {
@ -54,6 +65,19 @@ void XmlWriter::push(const string &s)
node_ = node_.append_child(s.c_str()); node_ = node_.append_child(s.c_str());
} }
void XmlWriter::pushXmlString(const std::string &s)
{
pugi::xml_document doc;
auto result = doc.load_buffer(s.c_str(), s.size());
xmlCheckParse(result, "fragment\n'" + s +"'");
for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling())
{
node_ = node_.append_copy(child);
}
pop();
}
void XmlWriter::pop(void) void XmlWriter::pop(void)
{ {
node_ = node_.parent(); node_ = node_.parent();
@ -65,31 +89,23 @@ std::string XmlWriter::XmlString(void)
return oss.str(); return oss.str();
} }
XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("")
{
pugi::xml_parse_result result;
result = doc_.load_string(xmlstring);
if ( !result ) {
cerr << "XML error description (from char *): " << result.description() << "\nXML\n"<< xmlstring << "\n";
cerr << "XML error offset (from char *) " << result.offset << "\nXML\n"<< xmlstring <<"\n";
abort();
}
if ( toplev == std::string("") ) {
node_ = doc_;
} else {
node_ = doc_.child(toplev.c_str());
}
}
// Reader implementation /////////////////////////////////////////////////////// // Reader implementation ///////////////////////////////////////////////////////
XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) XmlReader::XmlReader(const std::string &s, const bool isBuffer,
std::string toplev)
{ {
pugi::xml_parse_result result; pugi::xml_parse_result result;
result = doc_.load_file(fileName_.c_str());
if ( !result ) { if (isBuffer)
cerr << "XML error description: " << result.description() <<" "<< fileName_ <<"\n"; {
cerr << "XML error offset : " << result.offset <<" "<< fileName_ <<"\n"; fileName_ = "<string>";
abort(); result = doc_.load_string(s.c_str());
xmlCheckParse(result, "string\n'" + s + "'");
}
else
{
fileName_ = s;
result = doc_.load_file(s.c_str());
xmlCheckParse(result, "file '" + fileName_ + "'");
} }
if ( toplev == std::string("") ) { if ( toplev == std::string("") ) {
node_ = doc_; node_ = doc_;
@ -98,7 +114,7 @@ XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName)
} }
} }
bool XmlReader::push(const string &s) bool XmlReader::push(const std::string &s)
{ {
if (node_.child(s.c_str())) if (node_.child(s.c_str()))
{ {
@ -129,7 +145,6 @@ bool XmlReader::nextElement(const std::string &s)
{ {
return false; return false;
} }
} }
template <> template <>

View File

@ -43,6 +43,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid namespace Grid
{ {
void xmlCheckParse(const pugi::xml_parse_result &result, const std::string name);
class XmlWriter: public Writer<XmlWriter> class XmlWriter: public Writer<XmlWriter>
{ {
@ -50,6 +51,7 @@ namespace Grid
XmlWriter(const std::string &fileName, std::string toplev = std::string("grid") ); XmlWriter(const std::string &fileName, std::string toplev = std::string("grid") );
virtual ~XmlWriter(void); virtual ~XmlWriter(void);
void push(const std::string &s); void push(const std::string &s);
void pushXmlString(const std::string &s);
void pop(void); void pop(void);
template <typename U> template <typename U>
void writeDefault(const std::string &s, const U &x); void writeDefault(const std::string &s, const U &x);
@ -65,8 +67,8 @@ namespace Grid
class XmlReader: public Reader<XmlReader> class XmlReader: public Reader<XmlReader>
{ {
public: public:
XmlReader(const char *xmlstring,std::string toplev = std::string("grid") ); XmlReader(const std::string &fileName, const bool isBuffer = false,
XmlReader(const std::string &fileName,std::string toplev = std::string("grid") ); std::string toplev = std::string("grid") );
virtual ~XmlReader(void) = default; virtual ~XmlReader(void) = default;
bool push(const std::string &s); bool push(const std::string &s);
void pop(void); void pop(void);
@ -75,6 +77,8 @@ namespace Grid
void readDefault(const std::string &s, U &output); void readDefault(const std::string &s, U &output);
template <typename U> template <typename U>
void readDefault(const std::string &s, std::vector<U> &output); void readDefault(const std::string &s, std::vector<U> &output);
private:
void checkParse(const pugi::xml_parse_result &result, const std::string name);
private: private:
pugi::xml_document doc_; pugi::xml_document doc_;
pugi::xml_node node_; pugi::xml_node node_;

View File

@ -66,6 +66,8 @@ void Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const La
parallel_for(int i=0;i<num;i++){ parallel_for(int i=0;i<num;i++){
compress.Compress(&buffer[off],table[i].first,rhs._odata[so+table[i].second]); compress.Compress(&buffer[off],table[i].first,rhs._odata[so+table[i].second]);
} }
// Further optimisatoin: i) streaming store the result
// ii) software prefetch the first element of the next table entry
} }
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
@ -505,25 +507,24 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
template<class decompressor> template<class decompressor>
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) { void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) {
for(int i=0;i<mm.size();i++){
mergetime-=usecond(); mergetime-=usecond();
for(int i=0;i<mm.size();i++){
parallel_for(int o=0;o<mm[i].buffer_size/2;o++){ parallel_for(int o=0;o<mm[i].buffer_size/2;o++){
decompress.Exchange(mm[i].mpointer, decompress.Exchange(mm[i].mpointer,
mm[i].vpointers[0], mm[i].vpointers[0],
mm[i].vpointers[1], mm[i].vpointers[1],
mm[i].type,o); mm[i].type,o);
} }
mergetime+=usecond();
} }
mergetime+=usecond();
for(int i=0;i<dd.size();i++){
decompresstime-=usecond(); decompresstime-=usecond();
for(int i=0;i<dd.size();i++){
parallel_for(int o=0;o<dd[i].buffer_size;o++){ parallel_for(int o=0;o<dd[i].buffer_size;o++){
decompress.Decompress(dd[i].kernel_p,dd[i].mpi_p,o); decompress.Decompress(dd[i].kernel_p,dd[i].mpi_p,o);
} }
decompresstime+=usecond();
} }
decompresstime+=usecond();
} }
//////////////////////////////////////// ////////////////////////////////////////
// Set up routines // Set up routines

View File

@ -297,12 +297,7 @@ void Grid_init(int *argc,char ***argv)
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl; std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl; std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
std::cout << "GNU General Public License for more details."<<std::endl; std::cout << "GNU General Public License for more details."<<std::endl;
#ifdef GITHASH printHash();
std::cout << "Current Grid git commit hash=" << GITHASH << std::endl;
#else
std::cout << "Current Grid git commit hash is undefined. Check makefile." << std::endl;
#endif
#undef GITHASH
std::cout << std::endl; std::cout << std::endl;
} }
@ -398,6 +393,7 @@ void Grid_init(int *argc,char ***argv)
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads"); arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads");
GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads); GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
assert(CartesianCommunicator::nCommThreads > 0);
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking"); arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");

View File

@ -64,6 +64,7 @@ namespace Grid {
std::vector<int> &simd, std::vector<int> &simd,
std::vector<int> &mpi); std::vector<int> &mpi);
void printHash(void);
}; };
#endif #endif

12
lib/util/version.cc Normal file
View File

@ -0,0 +1,12 @@
#include <iostream>
#include <version.h>
namespace Grid {
void printHash(){
#ifdef GITHASH
std::cout << "Current Grid git commit hash=" << GITHASH << std::endl;
#else
std::cout << "Current Grid git commit hash is undefined. Check makefile." << std::endl;
#endif
#undef GITHASH
}
}

View File

@ -79,7 +79,7 @@ int main (int argc, char ** argv)
std::cout <<GridLogMessage<<"** Writing out ILDG conf *********"<<std::endl; std::cout <<GridLogMessage<<"** Writing out ILDG conf *********"<<std::endl;
std::cout <<GridLogMessage<<"**************************************"<<std::endl; std::cout <<GridLogMessage<<"**************************************"<<std::endl;
std::string file("./ckpoint_ildg.4000"); std::string file("./ckpoint_ildg.4000");
IldgWriter _IldgWriter; IldgWriter _IldgWriter(Fine.IsBoss());
_IldgWriter.open(file); _IldgWriter.open(file);
_IldgWriter.writeConfiguration(Umu,4000,std::string("dummy_ildg_LFN"),std::string("dummy_config")); _IldgWriter.writeConfiguration(Umu,4000,std::string("dummy_ildg_LFN"),std::string("dummy_config"));
_IldgWriter.close(); _IldgWriter.close();

View File

@ -0,0 +1,259 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
Copyright (C) 2017
Author: Leans heavily on Christoph Lehner's code
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/*
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
* in Grid that were intended to be used to support blocked Aggregates, from
*/
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard)
// Base constructor
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
{};
void checkpointFine(std::string evecs_file,std::string evals_file)
{
#ifdef HAVE_LIME
assert(this->subspace.size()==nbasis);
emptyUserRecord record;
Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
WR.open(evecs_file);
for(int k=0;k<nbasis;k++) {
WR.writeScidacFieldRecord(this->subspace[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_fine);
#else
assert(0);
#endif
}
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
{
#ifdef HAVE_LIME
this->evals_fine.resize(nbasis);
this->subspace.resize(nbasis,this->_FineGrid);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_fine);
assert(this->evals_fine.size()==nbasis);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
emptyUserRecord record;
Grid::QCD::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nbasis;k++) {
this->subspace[k].checkerboard=this->_checkerboard;
RD.readScidacFieldRecord(this->subspace[k],record);
}
RD.close();
#else
assert(0);
#endif
}
void checkpointCoarse(std::string evecs_file,std::string evals_file)
{
#ifdef HAVE_LIME
int n = this->evec_coarse.size();
emptyUserRecord record;
Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
WR.open(evecs_file);
for(int k=0;k<n;k++) {
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_coarse);
#else
assert(0);
#endif
}
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
{
#ifdef HAVE_LIME
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
this->evals_coarse.resize(nvec);
this->evec_coarse.resize(nvec,this->_CoarseGrid);
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_coarse);
assert(this->evals_coarse.size()==nvec);
emptyUserRecord record;
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
Grid::QCD::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nvec;k++) {
RD.readScidacFieldRecord(this->evec_coarse[k],record);
}
RD.close();
#else
assert(0);
#endif
}
};
int main (int argc, char ** argv) {
Grid_init(&argc,&argv);
GridLogIRL.TimingMode(1);
LocalCoherenceLanczosParams Params;
{
Params.omega.resize(10);
Params.blockSize.resize(5);
XmlWriter writer("Params_template.xml");
write(writer,"Params",Params);
std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
}
{
XmlReader reader(std::string("./Params.xml"));
read(reader, "Params", Params);
}
int Ls = (int)Params.omega.size();
RealD mass = Params.mass;
RealD M5 = Params.M5;
std::vector<int> blockSize = Params.blockSize;
std::vector<int> latt({32,32,32,32});
uint64_t vol = Ls*latt[0]*latt[1]*latt[2]*latt[3];
double mat_flop= 2.0*1320.0*vol;
// Grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt,
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> fineLatt = latt;
int dims=fineLatt.size();
assert(blockSize.size()==dims+1);
std::vector<int> coarseLatt(dims);
std::vector<int> coarseLatt5d ;
for (int d=0;d<coarseLatt.size();d++){
coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
}
std::cout << GridLogMessage<< " 5d coarse lattice is ";
for (int i=0;i<coarseLatt.size();i++){
std::cout << coarseLatt[i]<<"x";
}
int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
std::cout << cLs<<std::endl;
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
// Gauge field
std::vector<int> seeds4({1,2,3,4});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu);
// FieldMetaData header;
// NerscIO::readConfiguration(Umu,header,Params.config);
std::cout << GridLogMessage << "Lattice dimensions: " << latt << " Ls: " << Ls << std::endl;
// ZMobius EO Operator
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
// Eigenvector storage
LanczosParams fine =Params.FineParams;
LanczosParams coarse=Params.CoarseParams;
const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop;
const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk;
const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm;
std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl;
std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
assert(Nm2 >= Nm1);
const int nbasis= 60;
assert(nbasis==Ns1);
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
assert( (Params.doFine)||(Params.doFineRead));
if ( Params.doFine ) {
std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
double t0=-usecond();
_LocalCoherenceLanczos.calcFine(fine.Cheby,
fine.Nstop,fine.Nk,fine.Nm,
fine.resid,fine.MaxIt,
fine.betastp,fine.MinRes);
t0+=usecond();
double t1=-usecond();
if ( Params.saveEvecs ) {
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
}
t1+=usecond();
std::cout << GridLogMessage << "Computation time is " << (t0)/1.0e6 <<" seconds"<<std::endl;
if ( Params.saveEvecs ) std::cout << GridLogMessage << "I/O time is " << (t1)/1.0e6 <<" seconds"<<std::endl;
std::cout << GridLogMessage << "Time to solution is " << (t1+t0)/1.0e6 <<" seconds"<<std::endl;
std::cout << GridLogMessage << "Done"<<std::endl;
}
Grid_finalize();
}

View File

@ -49,6 +49,8 @@ int main (int argc, char ** argv)
const int Ls=8; const int Ls=8;
std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@ -90,18 +92,44 @@ int main (int argc, char ** argv)
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf); SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f); SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
std::cout << "Starting mixed CG" << std::endl; std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
mCG(src_o,result_o); mCG(src_o,result_o);
std::cout << "Starting regular CG" << std::endl; std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000); ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
CG(HermOpEO,src_o,result_o_2); CG(HermOpEO,src_o,result_o_2);
LatticeFermionD diff_o(FrbGrid); LatticeFermionD diff_o(FrbGrid);
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
std::cout << "Diff between mixed and regular CG: " << diff << std::endl; std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
#ifdef HAVE_LIME
if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){
std::string file1("./Propagator1");
emptyUserRecord record;
uint32_t nersc_csum;
uint32_t scidac_csuma;
uint32_t scidac_csumb;
typedef SpinColourVectorD FermionD;
typedef vSpinColourVectorD vFermionD;
BinarySimpleMunger<FermionD,FermionD> munge;
std::string format = getFormatString<vFermionD>();
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o,file1,munge, 0, format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o_2,file1,munge, 0, format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
}
#endif
Grid_finalize(); Grid_finalize();

View File

@ -393,7 +393,6 @@ int main(int argc, char **argv) {
} }
random(Foo); random(Foo);
*/ */
lex_sites(Foo);
Integer mm[4]; Integer mm[4];
mm[0] = 1; mm[0] = 1;

View File

@ -0,0 +1,123 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_gp_rect_force.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeGaugeField U(&Grid);
SU3::HotConfiguration(pRNG,U);
double beta = 1.0;
double c1 = 0.331;
//ConjugatePlaqPlusRectangleActionR Action(beta,c1);
ConjugateWilsonGaugeActionR Action(beta);
//WilsonGaugeActionR Action(beta);
ComplexD S = Action.S(U);
// get the deriv of phidag MdagM phi with respect to "U"
LatticeGaugeField UdSdU(&Grid);
Action.deriv(U,UdSdU);
////////////////////////////////////
// Modify the gauge field a little
////////////////////////////////////
RealD dt = 0.0001;
LatticeColourMatrix mommu(&Grid);
LatticeColourMatrix forcemu(&Grid);
LatticeGaugeField mom(&Grid);
LatticeGaugeField Uprime(&Grid);
for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu);
// fourth order exponential approx
parallel_for(auto i=mom.begin();i<mom.end();i++){ // exp(pmu dt) * Umu
Uprime[i](mu) = U[i](mu) + mom[i](mu)*U[i](mu)*dt ;
}
}
ComplexD Sprime = Action.S(Uprime);
//////////////////////////////////////////////
// Use derivative to estimate dS
//////////////////////////////////////////////
LatticeComplex dS(&Grid); dS = zero;
for(int mu=0;mu<Nd;mu++){
auto UdSdUmu = PeekIndex<LorentzIndex>(UdSdU,mu);
mommu = PeekIndex<LorentzIndex>(mom,mu);
// Update gauge action density
// U = exp(p dt) U
// dU/dt = p U
// so dSdt = trace( dUdt dSdU) = trace( p UdSdUmu )
dS = dS - trace(mommu*UdSdUmu)*dt*2.0;
}
ComplexD dSpred = sum(dS);
std::cout << GridLogMessage << " S "<<S<<std::endl;
std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;
std::cout << GridLogMessage << "dS "<<Sprime-S<<std::endl;
std::cout << GridLogMessage << "pred dS "<< dSpred <<std::endl;
assert( fabs(real(Sprime-S-dSpred)) < 1.0e-2 ) ;
std::cout<< GridLogMessage << "Done" <<std::endl;
Grid_finalize();
}

View File

@ -34,6 +34,8 @@ class ScalarActionParameters : Serializable {
double, lambda, double, lambda,
double, g); double, g);
ScalarActionParameters() = default;
template <class ReaderClass > template <class ReaderClass >
ScalarActionParameters(Reader<ReaderClass>& Reader){ ScalarActionParameters(Reader<ReaderClass>& Reader){
read(Reader, "ScalarAction", *this); read(Reader, "ScalarAction", *this);
@ -125,9 +127,12 @@ int main(int argc, char **argv) {
TheHMC.Resources.AddGrid("scalar", ScalarGrid); TheHMC.Resources.AddGrid("scalar", ScalarGrid);
std::cout << "Lattice size : " << GridDefaultLatt() << std::endl; std::cout << "Lattice size : " << GridDefaultLatt() << std::endl;
ScalarActionParameters SPar(Reader);
// Checkpointer definition // Checkpointer definition
CheckpointerParameters CPparams(Reader); CheckpointerParameters CPparams(Reader);
TheHMC.Resources.LoadBinaryCheckpointer(CPparams); //TheHMC.Resources.LoadBinaryCheckpointer(CPparams);
TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar);
RNGModuleParameters RNGpar(Reader); RNGModuleParameters RNGpar(Reader);
TheHMC.Resources.SetRNGSeeds(RNGpar); TheHMC.Resources.SetRNGSeeds(RNGpar);
@ -140,7 +145,6 @@ int main(int argc, char **argv) {
// Collect actions, here use more encapsulation // Collect actions, here use more encapsulation
// Scalar action in adjoint representation // Scalar action in adjoint representation
ScalarActionParameters SPar(Reader);
ScalarAction Saction(SPar.mass_squared, SPar.lambda, SPar.g); ScalarAction Saction(SPar.mass_squared, SPar.lambda, SPar.g);
// Collect actions // Collect actions

View File

@ -33,6 +33,7 @@ namespace Grid{
GRID_SERIALIZABLE_CLASS_MEMBERS(ActionParameters, GRID_SERIALIZABLE_CLASS_MEMBERS(ActionParameters,
double, beta) double, beta)
ActionParameters() = default;
template <class ReaderClass > template <class ReaderClass >
ActionParameters(Reader<ReaderClass>& Reader){ ActionParameters(Reader<ReaderClass>& Reader){
@ -68,11 +69,15 @@ int main(int argc, char **argv) {
} }
Serialiser Reader(TheHMC.ParameterFile); Serialiser Reader(TheHMC.ParameterFile);
// Read parameters from input file
ActionParameters WilsonPar(Reader);
// Checkpointer definition // Checkpointer definition
CheckpointerParameters CPparams(Reader); CheckpointerParameters CPparams(Reader);
TheHMC.Resources.LoadNerscCheckpointer(CPparams); //TheHMC.Resources.LoadNerscCheckpointer(CPparams);
// Store metadata in the Scidac checkpointer
TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar);
RNGModuleParameters RNGpar(Reader); RNGModuleParameters RNGpar(Reader);
TheHMC.Resources.SetRNGSeeds(RNGpar); TheHMC.Resources.SetRNGSeeds(RNGpar);
@ -91,8 +96,6 @@ int main(int argc, char **argv) {
// need wrappers of the fermionic classes // need wrappers of the fermionic classes
// that have a complex construction // that have a complex construction
// standard // standard
ActionParameters WilsonPar(Reader);
//RealD beta = 6.4 ;
WilsonGaugeActionR Waction(WilsonPar.beta); WilsonGaugeActionR Waction(WilsonPar.beta);
ActionLevel<HMCWrapper::Field> Level1(1); ActionLevel<HMCWrapper::Field> Level1(1);

View File

@ -0,0 +1,253 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
Copyright (C) 2017
Author: Leans heavily on Christoph Lehner's code
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/*
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
* in Grid that were intended to be used to support blocked Aggregates, from
*/
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard)
// Base constructor
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
{};
void checkpointFine(std::string evecs_file,std::string evals_file)
{
assert(this->subspace.size()==nbasis);
emptyUserRecord record;
Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
WR.open(evecs_file);
for(int k=0;k<nbasis;k++) {
WR.writeScidacFieldRecord(this->subspace[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_fine);
}
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
{
this->evals_fine.resize(nbasis);
this->subspace.resize(nbasis,this->_FineGrid);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_fine);
assert(this->evals_fine.size()==nbasis);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
emptyUserRecord record;
Grid::QCD::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nbasis;k++) {
this->subspace[k].checkerboard=this->_checkerboard;
RD.readScidacFieldRecord(this->subspace[k],record);
}
RD.close();
}
void checkpointCoarse(std::string evecs_file,std::string evals_file)
{
int n = this->evec_coarse.size();
emptyUserRecord record;
Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
WR.open(evecs_file);
for(int k=0;k<n;k++) {
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_coarse);
}
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
{
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
this->evals_coarse.resize(nvec);
this->evec_coarse.resize(nvec,this->_CoarseGrid);
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_coarse);
assert(this->evals_coarse.size()==nvec);
emptyUserRecord record;
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
Grid::QCD::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nvec;k++) {
RD.readScidacFieldRecord(this->evec_coarse[k],record);
}
RD.close();
}
};
int main (int argc, char ** argv) {
Grid_init(&argc,&argv);
GridLogIRL.TimingMode(1);
LocalCoherenceLanczosParams Params;
{
Params.omega.resize(10);
Params.blockSize.resize(5);
XmlWriter writer("Params_template.xml");
write(writer,"Params",Params);
std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
}
{
XmlReader reader(std::string("./Params.xml"));
read(reader, "Params", Params);
}
int Ls = (int)Params.omega.size();
RealD mass = Params.mass;
RealD M5 = Params.M5;
std::vector<int> blockSize = Params.blockSize;
// Grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> fineLatt = GridDefaultLatt();
int dims=fineLatt.size();
assert(blockSize.size()==dims+1);
std::vector<int> coarseLatt(dims);
std::vector<int> coarseLatt5d ;
for (int d=0;d<coarseLatt.size();d++){
coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
}
std::cout << GridLogMessage<< " 5d coarse lattice is ";
for (int i=0;i<coarseLatt.size();i++){
std::cout << coarseLatt[i]<<"x";
}
int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
std::cout << cLs<<std::endl;
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
// Gauge field
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
NerscIO::readConfiguration(Umu,header,Params.config);
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
// ZMobius EO Operator
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
// Eigenvector storage
LanczosParams fine =Params.FineParams;
LanczosParams coarse=Params.CoarseParams;
const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop;
const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk;
const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm;
std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl;
std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
assert(Nm2 >= Nm1);
const int nbasis= 60;
assert(nbasis==Ns1);
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
assert( (Params.doFine)||(Params.doFineRead));
if ( Params.doFine ) {
std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
_LocalCoherenceLanczos.calcFine(fine.Cheby,
fine.Nstop,fine.Nk,fine.Nm,
fine.resid,fine.MaxIt,
fine.betastp,fine.MinRes);
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
_LocalCoherenceLanczos.Orthogonalise();
std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
}
if ( Params.doFineRead ) {
_LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml"));
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
_LocalCoherenceLanczos.Orthogonalise();
}
if ( Params.doCoarse ) {
std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl;
_LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol,
coarse.Nstop, coarse.Nk,coarse.Nm,
coarse.resid, coarse.MaxIt,
coarse.betastp,coarse.MinRes);
std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"));
}
if ( Params.doCoarseRead ) {
// Verify we can reread ???
_LocalCoherenceLanczos.checkpointCoarseRestore(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"),coarse.Nstop);
_LocalCoherenceLanczos.testCoarse(coarse.resid*100.0,Params.Smoother,Params.coarse_relax_tol); // Coarse check
}
Grid_finalize();
}

View File

@ -60,7 +60,7 @@ public:
{ {
assert(this->subspace.size()==nbasis); assert(this->subspace.size()==nbasis);
Grid::emptyUserRecord record; Grid::emptyUserRecord record;
Grid::QCD::ScidacWriter WR; Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
WR.open(evecs_file); WR.open(evecs_file);
for(int k=0;k<nbasis;k++) { for(int k=0;k<nbasis;k++) {
WR.writeScidacFieldRecord(this->subspace[k],record); WR.writeScidacFieldRecord(this->subspace[k],record);
@ -98,7 +98,7 @@ public:
{ {
int n = this->evec_coarse.size(); int n = this->evec_coarse.size();
emptyUserRecord record; emptyUserRecord record;
Grid::QCD::ScidacWriter WR; Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
WR.open(evecs_file); WR.open(evecs_file);
for(int k=0;k<n;k++) { for(int k=0;k<n;k++) {
WR.writeScidacFieldRecord(this->evec_coarse[k],record); WR.writeScidacFieldRecord(this->evec_coarse[k],record);
@ -182,7 +182,6 @@ int main (int argc, char ** argv) {
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4); GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4); GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
GridRedBlackCartesian * CoarseGrid5rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5);
// Gauge field // Gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
@ -208,7 +207,7 @@ int main (int argc, char ** argv) {
const int nbasis= 60; const int nbasis= 60;
assert(nbasis==Ns1); assert(nbasis==Ns1);
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
assert( (Params.doFine)||(Params.doFineRead)); assert( (Params.doFine)||(Params.doFineRead));

View File

@ -115,7 +115,7 @@ int main (int argc, char ** argv)
{ {
FGrid->Barrier(); FGrid->Barrier();
// ScidacWriter _ScidacWriter; ScidacWriter _ScidacWriter(FGrid->IsBoss());
// _ScidacWriter.open(file); // _ScidacWriter.open(file);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl; std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Writing out gauge field "<<std::endl; std::cout << GridLogMessage << " Writing out gauge field "<<std::endl;
@ -145,7 +145,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "****************************************************************** "<<std::endl; std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::stringstream filefn; filefn << filef << "."<< n; std::stringstream filefn; filefn << filef << "."<< n;
// ScidacWriter _ScidacWriter; ScidacWriter _ScidacWriter(FGrid->IsBoss());
// _ScidacWriter.open(filefn.str()); // _ScidacWriter.open(filefn.str());
// _ScidacWriter.writeScidacFieldRecord(src[n],record); // _ScidacWriter.writeScidacFieldRecord(src[n],record);
// _ScidacWriter.close(); // _ScidacWriter.close();