mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-05 03:35:55 +01:00
Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg
This commit is contained in:
commit
61812ab7f1
17
.travis.yml
17
.travis.yml
@ -19,6 +19,8 @@ before_install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||
|
||||
install:
|
||||
- export CWD=`pwd`
|
||||
- echo $CWD
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
@ -36,11 +38,22 @@ script:
|
||||
- ./bootstrap.sh
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||
- mkdir lime
|
||||
- cd lime
|
||||
- mkdir build
|
||||
- cd build
|
||||
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
||||
- tar xf lime-1.3.2.tar.gz
|
||||
- cd lime-1.3.2
|
||||
- ./configure --prefix=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- make install
|
||||
- cd $CWD/build
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- echo make clean
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
|
2
VERSION
2
VERSION
@ -1,4 +1,4 @@
|
||||
Version : 0.7.0
|
||||
Version : 0.8.0
|
||||
|
||||
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
|
||||
- MPI and MPI3 comms optimisations for KNL and OPA finished
|
||||
|
@ -158,8 +158,10 @@ public:
|
||||
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
|
||||
parallel_for(int dir=0;dir<8;dir++){
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
|
||||
#endif
|
||||
for(int dir=0;dir<8;dir++){
|
||||
|
||||
double tbytes;
|
||||
int mu =dir % 4;
|
||||
@ -175,9 +177,14 @@ public:
|
||||
int comm_proc = mpi_layout[mu]-1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
}
|
||||
#ifdef GRID_OMP
|
||||
int tid = omp_get_thread_num();
|
||||
#else
|
||||
int tid = dir;
|
||||
#endif
|
||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank,
|
||||
bytes,dir);
|
||||
bytes,tid);
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp atomic
|
||||
|
@ -169,7 +169,11 @@ int main (int argc, char ** argv)
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
lat*mpi_layout[2],
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
@ -446,7 +450,7 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef GRID_OMP
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
@ -485,7 +489,8 @@ int main (int argc, char ** argv)
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
|
||||
parallel_for(int dir=0;dir<8;dir++){
|
||||
#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
|
||||
for(int dir=0;dir<8;dir++){
|
||||
|
||||
double tbytes;
|
||||
int mu =dir % 4;
|
||||
@ -502,9 +507,9 @@ int main (int argc, char ** argv)
|
||||
int comm_proc = mpi_layout[mu]-1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
}
|
||||
|
||||
int tid = omp_get_thread_num();
|
||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank, bytes,dir);
|
||||
(void *)&rbuf[dir][0], recv_from_rank, bytes,tid);
|
||||
|
||||
#pragma omp atomic
|
||||
dbytes+=tbytes;
|
||||
@ -532,7 +537,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
|
@ -55,7 +55,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
uint64_t lmax=96;
|
||||
uint64_t lmax=64;
|
||||
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
|
||||
|
@ -35,9 +35,11 @@ using namespace Grid::QCD;
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
#define LMAX (64)
|
||||
#define LMAX (32)
|
||||
#define LMIN (16)
|
||||
#define LINC (4)
|
||||
|
||||
int64_t Nloop=20;
|
||||
int64_t Nloop=2000;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
@ -51,7 +53,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
@ -83,7 +85,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
@ -114,7 +116,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
@ -145,7 +147,38 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
mac(z,x,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
@ -157,18 +190,64 @@ int main (int argc, char ** argv)
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
mac(z,x,y);
|
||||
for(int mu=0;mu<4;mu++){
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
z = PeriodicBC::CovShiftForward(x,mu,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(8+8+8)*vol;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
}
|
||||
#if 1
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
LatticeColourMatrix tmp(&Grid);
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
double tshift=0;
|
||||
double tmult =0;
|
||||
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
tshift-=usecond();
|
||||
tmp = Cshift(y,mu,-1);
|
||||
tshift+=usecond();
|
||||
tmult-=usecond();
|
||||
z = x*tmp;
|
||||
tmult+=usecond();
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop;
|
||||
tshift = tshift/Nloop;
|
||||
tmult = tmult /Nloop;
|
||||
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
|
||||
time = time * 1000; // convert to NS for GB/s
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -340,7 +340,7 @@ case ${ac_PRECISION} in
|
||||
esac
|
||||
|
||||
###################### Shared memory allocation technique under MPI3
|
||||
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs|shmnone],
|
||||
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone],
|
||||
[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen])
|
||||
|
||||
case ${ac_SHM} in
|
||||
@ -349,6 +349,10 @@ case ${ac_SHM} in
|
||||
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
|
||||
;;
|
||||
|
||||
shmget)
|
||||
AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
|
||||
;;
|
||||
|
||||
shmnone)
|
||||
AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] )
|
||||
;;
|
||||
@ -366,7 +370,7 @@ esac
|
||||
AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
|
||||
[Select SHM mmap base path for hugetlbfs])],
|
||||
[ac_SHMPATH=${enable_shmpath}],
|
||||
[ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/])
|
||||
[ac_SHMPATH=/var/lib/hugetlbfs/global/pagesize-2MB/])
|
||||
AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing])
|
||||
|
||||
############### communication type selection
|
||||
|
@ -479,15 +479,13 @@ until convergence
|
||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||
|
||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||
int allconv =1;
|
||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||
int j = Nstop-jj;
|
||||
RealD e = eval2_copy[j]; // Discard the evalue
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
if ( j > Nconv ) {
|
||||
Nconv=j+1;
|
||||
jj=Nstop; // Terminate the scan
|
||||
}
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
allconv=0;
|
||||
}
|
||||
}
|
||||
// Do evec[0] for good measure
|
||||
@ -495,8 +493,10 @@ until convergence
|
||||
int j=0;
|
||||
RealD e = eval2_copy[0];
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||
}
|
||||
if ( allconv ) Nconv = Nstop;
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||
// if( Nconv>=Nstop || beta_k < betastp){
|
||||
|
@ -48,6 +48,7 @@ struct LanczosParams : Serializable {
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
bool, saveEvecs,
|
||||
bool, doFine,
|
||||
bool, doFineRead,
|
||||
bool, doCoarse,
|
||||
|
@ -277,7 +277,9 @@ public:
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
if ( ptr ) {
|
||||
// One touch per 4k page, static OMP loop to catch same loop order
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
||||
|
@ -114,19 +114,151 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
assert(WorldNode!=-1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
// Gray encode support
|
||||
int BinaryToGray (int binary) {
|
||||
int gray = (binary>>1)^binary;
|
||||
return gray;
|
||||
}
|
||||
int Log2Size(int TwoToPower,int MAXLOG2)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = -1;
|
||||
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
||||
if ( (0x1<<i) == WorldShmSize ) {
|
||||
for(int i=0;i<=MAXLOG2;i++){
|
||||
if ( (0x1<<i) == TwoToPower ) {
|
||||
log2size = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return log2size;
|
||||
}
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
#undef HYPERCUBE
|
||||
#ifdef HYPERCUBE
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify the hypercube coordinate of this node using hostname
|
||||
////////////////////////////////////////////////////////////////
|
||||
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
|
||||
// i runs 0..7 3 bits
|
||||
// r runs 0..3 2 bits
|
||||
// 2^10 = 1024 nodes
|
||||
const int maxhdim = 10;
|
||||
std::vector<int> HyperCubeCoords(maxhdim,0);
|
||||
std::vector<int> RootHyperCubeCoords(maxhdim,0);
|
||||
int R;
|
||||
int I;
|
||||
int N;
|
||||
const int namelen = _POSIX_HOST_NAME_MAX;
|
||||
char name[namelen];
|
||||
|
||||
// Parse ICE-XA hostname to get hypercube location
|
||||
gethostname(name,namelen);
|
||||
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
|
||||
assert(nscan==3);
|
||||
|
||||
int nlo = N%9;
|
||||
int nhi = N/9;
|
||||
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
|
||||
uint32_t rootcoor = hypercoor;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Print debug info
|
||||
//////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
std::string hname(name);
|
||||
std::cout << "hostname "<<hname<<std::endl;
|
||||
std::cout << "R " << R << " I " << I << " N "<< N<<
|
||||
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// broadcast node 0's base coordinate for this partition.
|
||||
//////////////////////////////////////////////////////////////////
|
||||
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
|
||||
hypercoor=hypercoor-rootcoor;
|
||||
assert(hypercoor<WorldSize);
|
||||
assert(hypercoor>=0);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Printing
|
||||
//////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
std::vector<int> HyperCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Map Hcube according to physical lattice
|
||||
// must partition. Loop over dims and find out who would join.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int hcoor = hypercoor;
|
||||
for(int d=0;d<ndimension;d++){
|
||||
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
|
||||
int msk = (0x1<<bits)-1;
|
||||
HyperCoor[d]=hcoor & msk;
|
||||
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
|
||||
hcoor = hcoor >> bits;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
|
||||
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
@ -175,15 +307,77 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#endif
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
std::vector<int> shmids(WorldShmSize);
|
||||
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
size_t size = bytes;
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
|
||||
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
|
||||
perror("Shared memory attach failure");
|
||||
shmctl(shmids[r], IPC_RMID, NULL);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
///////////////////////////////////
|
||||
// Mark for clean up
|
||||
///////////////////////////////////
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<<std::endl;
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -193,7 +387,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
// Hugetlbfs and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
@ -344,6 +538,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
|
@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
||||
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int ent = 0;
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
if ( cbmask == 0x3 ) {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int bo = n*e2;
|
||||
buffer[off+bo+b]=rhs._odata[so+o+b];
|
||||
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int bo=0;
|
||||
std::vector<std::pair<int,int> > table;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb &cbmask ) {
|
||||
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||
}
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<table.size();i++){
|
||||
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
buffer[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,31 +142,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent =0;
|
||||
|
||||
if ( cbmask ==0x3 ) {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int bo =n*rhs._grid->_slice_block[dimension];
|
||||
rhs._odata[so+o+b]=buffer[bo+b];
|
||||
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
std::vector<std::pair<int,int> > table;
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
table.push_back(std::pair<int,int> (so+o+b,bo++));
|
||||
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||
}
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<table.size();i++){
|
||||
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
|
||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
if(cbmask == 0x3 ){
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o =n*stride+b;
|
||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o =n*stride+b;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||
if ( ocb&cbmask ) {
|
||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
lhs._odata[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
||||
int e2=rhs._grid->_slice_block [dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
double t_tab,t_perm;
|
||||
if ( cbmask == 0x3 ) {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) {
|
||||
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
||||
}
|
||||
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
}
|
||||
|
||||
}}
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
double t_local;
|
||||
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid = rhs._grid;
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
@ -325,11 +348,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
||||
|
||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
// FIXME : This must change where we have a
|
||||
// Rotate slice.
|
||||
|
||||
// Document how this works ; why didn't I do this when I first wrote it...
|
||||
// wrap is whether sshift > rd.
|
||||
// num is sshift mod rd.
|
||||
//
|
||||
@ -365,10 +384,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
||||
|
||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
||||
|
||||
|
||||
if ( !comm_dim ) {
|
||||
// std::cout << "Cshift_local" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||
} else if ( splice_dim ) {
|
||||
// std::cout << "Cshift_comms_simd" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||
} else {
|
||||
// std::cout << "Cshift_comms" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift);
|
||||
}
|
||||
return ret;
|
||||
@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
||||
|
@ -256,9 +256,42 @@ public:
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
Lattice(Lattice&& r){ // move constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata=std::move(r._odata);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||
{
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata =std::move(r._odata);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
@ -277,15 +310,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||
|
@ -179,7 +179,7 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||
{\
|
||||
@ -198,11 +198,6 @@ namespace Grid {
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
} \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
@ -212,14 +207,21 @@ namespace Grid {
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
}
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
}
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL(==,seq);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
||||
|
@ -606,6 +606,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
|
||||
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* in_grid = in._grid;
|
||||
out.resize(in_grid->lSites());
|
||||
|
||||
int ndim = in_grid->Nd();
|
||||
int in_nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > in_icoor(in_nsimd);
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
in_icoor[lane].resize(ndim);
|
||||
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||
}
|
||||
|
||||
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||
|
||||
std::vector<int> in_ocoor(ndim);
|
||||
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||
|
||||
std::vector<int> lcoor(in_grid->Nd());
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
for(int mu=0;mu<ndim;mu++)
|
||||
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
|
||||
out_ptrs[lane] = &out[lex];
|
||||
}
|
||||
|
||||
//Unpack into those ptrs
|
||||
const vobj & in_vobj = in._odata[in_oidx];
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
@ -655,6 +700,54 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
&& !isSIMDvectorized<sobj>::value, void>::type
|
||||
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* grid = out._grid;
|
||||
assert(in.size()==grid->lSites());
|
||||
|
||||
int ndim = grid->Nd();
|
||||
int nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > icoor(nsimd);
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
icoor[lane].resize(ndim);
|
||||
grid->iCoorFromIindex(icoor[lane],lane);
|
||||
}
|
||||
|
||||
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> ptrs(nsimd);
|
||||
|
||||
std::vector<int> ocoor(ndim);
|
||||
grid->oCoorFromOindex(ocoor, oidx);
|
||||
|
||||
std::vector<int> lcoor(grid->Nd());
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
|
||||
for(int mu=0;mu<ndim;mu++){
|
||||
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
|
||||
}
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
|
||||
ptrs[lane] = &in[lex];
|
||||
}
|
||||
|
||||
//pack from those ptrs
|
||||
vobj vecobj;
|
||||
merge1(vecobj, ptrs, 0);
|
||||
out._odata[oidx] = vecobj;
|
||||
}
|
||||
}
|
||||
|
||||
//Convert a Lattice from one precision to another
|
||||
template<class VobjOut, class VobjIn>
|
||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
|
@ -110,11 +110,11 @@ class BinaryIO {
|
||||
lsites = 1;
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
PARALLEL_REGION
|
||||
{
|
||||
uint32_t nersc_csum_thr = 0;
|
||||
|
||||
#pragma omp for
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||
{
|
||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||
@ -124,7 +124,7 @@ class BinaryIO {
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
nersc_csum += nersc_csum_thr;
|
||||
}
|
||||
@ -146,14 +146,14 @@ class BinaryIO {
|
||||
std::vector<int> local_start =grid->LocalStarts();
|
||||
std::vector<int> global_vol =grid->FullDimensions();
|
||||
|
||||
#pragma omp parallel
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> coor(nd);
|
||||
uint32_t scidac_csuma_thr=0;
|
||||
uint32_t scidac_csumb_thr=0;
|
||||
uint32_t site_crc=0;
|
||||
|
||||
#pragma omp for
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||
|
||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||
@ -183,7 +183,7 @@ class BinaryIO {
|
||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
scidac_csuma^= scidac_csuma_thr;
|
||||
scidac_csumb^= scidac_csumb_thr;
|
||||
@ -263,7 +263,7 @@ class BinaryIO {
|
||||
GridBase *grid,
|
||||
std::vector<fobj> &iodata,
|
||||
std::string file,
|
||||
uint64_t offset,
|
||||
uint64_t& offset,
|
||||
const std::string &format, int control,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@ -431,14 +431,20 @@ class BinaryIO {
|
||||
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
||||
}
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
|
||||
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
||||
assert(ierr == 0);
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
|
||||
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
||||
assert(ierr == 0);
|
||||
|
||||
MPI_Offset os;
|
||||
MPI_File_get_position(fh, &os);
|
||||
MPI_File_get_byte_offset(fh, os, &disp);
|
||||
offset = disp;
|
||||
|
||||
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
@ -448,7 +454,7 @@ class BinaryIO {
|
||||
} else {
|
||||
|
||||
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
|
||||
std::ofstream fout;
|
||||
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
||||
@ -495,6 +501,7 @@ class BinaryIO {
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
offset = fout.tellp();
|
||||
fout.close();
|
||||
}
|
||||
timer.Stop();
|
||||
@ -699,7 +706,6 @@ class BinaryIO {
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
iodata.resize(1);
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
|
@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
|
||||
{
|
||||
filename= _filename;
|
||||
File = fopen(filename.c_str(), "r");
|
||||
if (File == nullptr)
|
||||
{
|
||||
std::cerr << "cannot open file '" << filename << "'" << std::endl;
|
||||
abort();
|
||||
}
|
||||
LimeR = limeCreateReader(File);
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
@ -248,7 +253,6 @@ class GridLimeReader : public BinaryIO {
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
// should this be a do while; can we miss a first record??
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
@ -262,7 +266,8 @@ class GridLimeReader : public BinaryIO {
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,object_name,object);
|
||||
return;
|
||||
}
|
||||
@ -698,9 +703,11 @@ class IldgReader : public GridLimeReader {
|
||||
|
||||
//////////////////////////////////
|
||||
// ILDG format record
|
||||
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"ildgFormat",ildgFormat_);
|
||||
|
||||
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
||||
@ -715,13 +722,13 @@ class IldgReader : public GridLimeReader {
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
||||
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
|
||||
FieldMetaData_.ildg_lfn = xmlstring;
|
||||
found_ildgLFN = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"FieldMetaData",FieldMetaData_);
|
||||
|
||||
format = FieldMetaData_.floating_point;
|
||||
@ -735,18 +742,17 @@ class IldgReader : public GridLimeReader {
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
||||
std::string xmls(&xmlc[0]);
|
||||
// is it a USQCD info field
|
||||
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"usqcdInfo",usqcdInfo_);
|
||||
found_usqcdInfo = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"scidacChecksum",scidacChecksum_);
|
||||
found_scidacChecksum = 1;
|
||||
}
|
||||
|
@ -237,20 +237,24 @@ namespace Grid {
|
||||
MachineCharacteristics(header);
|
||||
|
||||
uint64_t offset;
|
||||
|
||||
truncate(file);
|
||||
|
||||
// Sod it -- always write 3x3 double
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||
offset = writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
@ -293,12 +297,18 @@ namespace Grid {
|
||||
header.data_type = std::string("SITMO");
|
||||
#endif
|
||||
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
offset = writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage
|
||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||
|
@ -1,44 +0,0 @@
|
||||
pugixml [](https://travis-ci.org/zeux/pugixml) [](https://ci.appveyor.com/project/zeux/pugixml)
|
||||
=======
|
||||
|
||||
pugixml is a C++ XML processing library, which consists of a DOM-like interface with rich traversal/modification
|
||||
capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0
|
||||
implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface
|
||||
variants and conversions between different Unicode encodings (which happen automatically during parsing/saving).
|
||||
|
||||
pugixml is used by a lot of projects, both open-source and proprietary, for performance and easy-to-use interface.
|
||||
|
||||
## Documentation
|
||||
|
||||
Documentation for the current release of pugixml is available on-line as two separate documents:
|
||||
|
||||
* [Quick-start guide](http://pugixml.org/docs/quickstart.html), that aims to provide enough information to start using the library;
|
||||
* [Complete reference manual](http://pugixml.org/docs/manual.html), that describes all features of the library in detail.
|
||||
|
||||
You’re advised to start with the quick-start guide; however, many important library features are either not described in it at all or only mentioned briefly; if you require more information you should read the complete manual.
|
||||
|
||||
## License
|
||||
This library is available to anybody free of charge, under the terms of MIT License:
|
||||
|
||||
Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.6
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@ -17,6 +17,9 @@
|
||||
// Uncomment this to enable wchar_t mode
|
||||
// #define PUGIXML_WCHAR_MODE
|
||||
|
||||
// Uncomment this to enable compact mode
|
||||
// #define PUGIXML_COMPACT
|
||||
|
||||
// Uncomment this to disable XPath
|
||||
// #define PUGIXML_NO_XPATH
|
||||
|
||||
@ -46,7 +49,7 @@
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
@ -59,7 +62,7 @@
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.6
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
#ifndef PUGIXML_VERSION
|
||||
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
|
||||
# define PUGIXML_VERSION 160
|
||||
# define PUGIXML_VERSION 190
|
||||
#endif
|
||||
|
||||
// Include user configuration file (this can define various configuration macros)
|
||||
@ -72,6 +72,44 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// If the platform is known to have move semantics support, compile move ctor/operator implementation
|
||||
#ifndef PUGIXML_HAS_MOVE
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_HAS_MOVE
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1600
|
||||
# define PUGIXML_HAS_MOVE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'noexcept' specifiers
|
||||
#ifndef PUGIXML_NOEXCEPT
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1900
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# else
|
||||
# define PUGIXML_NOEXCEPT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Some functions can not be noexcept in compact mode
|
||||
#ifdef PUGIXML_COMPACT
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT
|
||||
#else
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'override' qualifiers
|
||||
#ifndef PUGIXML_OVERRIDE
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# else
|
||||
# define PUGIXML_OVERRIDE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Character interface macros
|
||||
#ifdef PUGIXML_WCHAR_MODE
|
||||
# define PUGIXML_TEXT(t) L ## t
|
||||
@ -133,13 +171,13 @@ namespace pugi
|
||||
|
||||
// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
|
||||
const unsigned int parse_eol = 0x0020;
|
||||
|
||||
|
||||
// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
|
||||
const unsigned int parse_wconv_attribute = 0x0040;
|
||||
|
||||
// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
|
||||
const unsigned int parse_wnorm_attribute = 0x0080;
|
||||
|
||||
|
||||
// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
|
||||
const unsigned int parse_declaration = 0x0100;
|
||||
|
||||
@ -158,6 +196,11 @@ namespace pugi
|
||||
// is a valid document. This flag is off by default.
|
||||
const unsigned int parse_fragment = 0x1000;
|
||||
|
||||
// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
|
||||
// the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments.
|
||||
// This flag is off by default.
|
||||
const unsigned int parse_embed_pcdata = 0x2000;
|
||||
|
||||
// The default parsing mode.
|
||||
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
|
||||
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
|
||||
@ -184,16 +227,16 @@ namespace pugi
|
||||
};
|
||||
|
||||
// Formatting flags
|
||||
|
||||
|
||||
// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
|
||||
const unsigned int format_indent = 0x01;
|
||||
|
||||
|
||||
// Write encoding-specific BOM to the output stream. This flag is off by default.
|
||||
const unsigned int format_write_bom = 0x02;
|
||||
|
||||
// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
|
||||
const unsigned int format_raw = 0x04;
|
||||
|
||||
|
||||
// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
|
||||
const unsigned int format_no_declaration = 0x08;
|
||||
|
||||
@ -206,6 +249,9 @@ namespace pugi
|
||||
// Write every attribute on a new line with appropriate indentation. This flag is off by default.
|
||||
const unsigned int format_indent_attributes = 0x40;
|
||||
|
||||
// Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default.
|
||||
const unsigned int format_no_empty_element_tags = 0x80;
|
||||
|
||||
// The default set of formatting flags.
|
||||
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
|
||||
const unsigned int format_default = format_indent;
|
||||
@ -225,7 +271,7 @@ namespace pugi
|
||||
class xml_node;
|
||||
|
||||
class xml_text;
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_XPATH
|
||||
class xpath_node;
|
||||
class xpath_node_set;
|
||||
@ -268,7 +314,7 @@ namespace pugi
|
||||
// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
|
||||
xml_writer_file(void* file);
|
||||
|
||||
virtual void write(const void* data, size_t size);
|
||||
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||
|
||||
private:
|
||||
void* file;
|
||||
@ -283,7 +329,7 @@ namespace pugi
|
||||
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
|
||||
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
|
||||
|
||||
virtual void write(const void* data, size_t size);
|
||||
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||
|
||||
private:
|
||||
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
|
||||
@ -299,13 +345,13 @@ namespace pugi
|
||||
|
||||
private:
|
||||
xml_attribute_struct* _attr;
|
||||
|
||||
|
||||
typedef void (*unspecified_bool_type)(xml_attribute***);
|
||||
|
||||
public:
|
||||
// Default constructor. Constructs an empty attribute.
|
||||
xml_attribute();
|
||||
|
||||
|
||||
// Constructs attribute from internal pointer
|
||||
explicit xml_attribute(xml_attribute_struct* attr);
|
||||
|
||||
@ -354,6 +400,8 @@ namespace pugi
|
||||
// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||
bool set_value(int rhs);
|
||||
bool set_value(unsigned int rhs);
|
||||
bool set_value(long rhs);
|
||||
bool set_value(unsigned long rhs);
|
||||
bool set_value(double rhs);
|
||||
bool set_value(float rhs);
|
||||
bool set_value(bool rhs);
|
||||
@ -367,6 +415,8 @@ namespace pugi
|
||||
xml_attribute& operator=(const char_t* rhs);
|
||||
xml_attribute& operator=(int rhs);
|
||||
xml_attribute& operator=(unsigned int rhs);
|
||||
xml_attribute& operator=(long rhs);
|
||||
xml_attribute& operator=(unsigned long rhs);
|
||||
xml_attribute& operator=(double rhs);
|
||||
xml_attribute& operator=(float rhs);
|
||||
xml_attribute& operator=(bool rhs);
|
||||
@ -417,7 +467,7 @@ namespace pugi
|
||||
|
||||
// Borland C++ workaround
|
||||
bool operator!() const;
|
||||
|
||||
|
||||
// Comparison operators (compares wrapped node pointers)
|
||||
bool operator==(const xml_node& r) const;
|
||||
bool operator!=(const xml_node& r) const;
|
||||
@ -438,7 +488,7 @@ namespace pugi
|
||||
// Get node value, or "" if node is empty or it has no value
|
||||
// Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
|
||||
const char_t* value() const;
|
||||
|
||||
|
||||
// Get attribute list
|
||||
xml_attribute first_attribute() const;
|
||||
xml_attribute last_attribute() const;
|
||||
@ -450,7 +500,7 @@ namespace pugi
|
||||
// Get next/previous sibling in the children list of the parent node
|
||||
xml_node next_sibling() const;
|
||||
xml_node previous_sibling() const;
|
||||
|
||||
|
||||
// Get parent node
|
||||
xml_node parent() const;
|
||||
|
||||
@ -478,7 +528,7 @@ namespace pugi
|
||||
// Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
|
||||
bool set_name(const char_t* rhs);
|
||||
bool set_value(const char_t* rhs);
|
||||
|
||||
|
||||
// Add attribute with specified name. Returns added attribute, or empty attribute on errors.
|
||||
xml_attribute append_attribute(const char_t* name);
|
||||
xml_attribute prepend_attribute(const char_t* name);
|
||||
@ -532,11 +582,11 @@ namespace pugi
|
||||
template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
|
||||
{
|
||||
if (!_root) return xml_attribute();
|
||||
|
||||
|
||||
for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
|
||||
if (pred(attrib))
|
||||
return attrib;
|
||||
|
||||
|
||||
return xml_attribute();
|
||||
}
|
||||
|
||||
@ -544,11 +594,11 @@ namespace pugi
|
||||
template <typename Predicate> xml_node find_child(Predicate pred) const
|
||||
{
|
||||
if (!_root) return xml_node();
|
||||
|
||||
|
||||
for (xml_node node = first_child(); node; node = node.next_sibling())
|
||||
if (pred(node))
|
||||
return node;
|
||||
|
||||
|
||||
return xml_node();
|
||||
}
|
||||
|
||||
@ -558,7 +608,7 @@ namespace pugi
|
||||
if (!_root) return xml_node();
|
||||
|
||||
xml_node cur = first_child();
|
||||
|
||||
|
||||
while (cur._root && cur._root != _root)
|
||||
{
|
||||
if (pred(cur)) return cur;
|
||||
@ -590,7 +640,7 @@ namespace pugi
|
||||
|
||||
// Recursively traverse subtree with xml_tree_walker
|
||||
bool traverse(xml_tree_walker& walker);
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_XPATH
|
||||
// Select single node by evaluating XPath query. Returns first node from the resulting node set.
|
||||
xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
@ -601,11 +651,11 @@ namespace pugi
|
||||
xpath_node_set select_nodes(const xpath_query& query) const;
|
||||
|
||||
// (deprecated: use select_node instead) Select single node by evaluating XPath query.
|
||||
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
xpath_node select_single_node(const xpath_query& query) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Print subtree using a writer object
|
||||
void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
|
||||
|
||||
@ -701,6 +751,8 @@ namespace pugi
|
||||
// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||
bool set(int rhs);
|
||||
bool set(unsigned int rhs);
|
||||
bool set(long rhs);
|
||||
bool set(unsigned long rhs);
|
||||
bool set(double rhs);
|
||||
bool set(float rhs);
|
||||
bool set(bool rhs);
|
||||
@ -714,6 +766,8 @@ namespace pugi
|
||||
xml_text& operator=(const char_t* rhs);
|
||||
xml_text& operator=(int rhs);
|
||||
xml_text& operator=(unsigned int rhs);
|
||||
xml_text& operator=(long rhs);
|
||||
xml_text& operator=(unsigned long rhs);
|
||||
xml_text& operator=(double rhs);
|
||||
xml_text& operator=(float rhs);
|
||||
xml_text& operator=(bool rhs);
|
||||
@ -867,11 +921,11 @@ namespace pugi
|
||||
|
||||
private:
|
||||
int _depth;
|
||||
|
||||
|
||||
protected:
|
||||
// Get current traversal depth
|
||||
int depth() const;
|
||||
|
||||
|
||||
public:
|
||||
xml_tree_walker();
|
||||
virtual ~xml_tree_walker();
|
||||
@ -942,13 +996,14 @@ namespace pugi
|
||||
char_t* _buffer;
|
||||
|
||||
char _memory[192];
|
||||
|
||||
|
||||
// Non-copyable semantics
|
||||
xml_document(const xml_document&);
|
||||
const xml_document& operator=(const xml_document&);
|
||||
xml_document& operator=(const xml_document&);
|
||||
|
||||
void create();
|
||||
void destroy();
|
||||
void _create();
|
||||
void _destroy();
|
||||
void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
|
||||
public:
|
||||
// Default constructor, makes empty document
|
||||
@ -957,6 +1012,12 @@ namespace pugi
|
||||
// Destructor, invalidates all node/attribute handles to this document
|
||||
~xml_document();
|
||||
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
#endif
|
||||
|
||||
// Removes all nodes, leaving the empty document
|
||||
void reset();
|
||||
|
||||
@ -970,7 +1031,7 @@ namespace pugi
|
||||
#endif
|
||||
|
||||
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
|
||||
// Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
|
||||
@ -1051,7 +1112,7 @@ namespace pugi
|
||||
// Non-copyable semantics
|
||||
xpath_variable(const xpath_variable&);
|
||||
xpath_variable& operator=(const xpath_variable&);
|
||||
|
||||
|
||||
public:
|
||||
// Get variable name
|
||||
const char_t* name() const;
|
||||
@ -1095,10 +1156,10 @@ namespace pugi
|
||||
xpath_variable_set(const xpath_variable_set& rhs);
|
||||
xpath_variable_set& operator=(const xpath_variable_set& rhs);
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_variable_set(xpath_variable_set&& rhs);
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs);
|
||||
xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Add a new variable or get the existing one, if the types match
|
||||
@ -1139,29 +1200,29 @@ namespace pugi
|
||||
// Destructor
|
||||
~xpath_query();
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_query(xpath_query&& rhs);
|
||||
xpath_query& operator=(xpath_query&& rhs);
|
||||
xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get query expression return type
|
||||
xpath_value_type return_type() const;
|
||||
|
||||
|
||||
// Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
bool evaluate_boolean(const xpath_node& n) const;
|
||||
|
||||
|
||||
// Evaluate expression as double value in the specified context; performs type conversion if necessary.
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
double evaluate_number(const xpath_node& n) const;
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_STL
|
||||
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
string_t evaluate_string(const xpath_node& n) const;
|
||||
#endif
|
||||
|
||||
|
||||
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
|
||||
// At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
@ -1188,7 +1249,7 @@ namespace pugi
|
||||
// Borland C++ workaround
|
||||
bool operator!() const;
|
||||
};
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_EXCEPTIONS
|
||||
// XPath exception class
|
||||
class PUGIXML_CLASS xpath_exception: public std::exception
|
||||
@ -1201,26 +1262,26 @@ namespace pugi
|
||||
explicit xpath_exception(const xpath_parse_result& result);
|
||||
|
||||
// Get error message
|
||||
virtual const char* what() const throw();
|
||||
virtual const char* what() const throw() PUGIXML_OVERRIDE;
|
||||
|
||||
// Get parse result
|
||||
const xpath_parse_result& result() const;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
// XPath node class (either xml_node or xml_attribute)
|
||||
class PUGIXML_CLASS xpath_node
|
||||
{
|
||||
private:
|
||||
xml_node _node;
|
||||
xml_attribute _attribute;
|
||||
|
||||
|
||||
typedef void (*unspecified_bool_type)(xpath_node***);
|
||||
|
||||
public:
|
||||
// Default constructor; constructs empty XPath node
|
||||
xpath_node();
|
||||
|
||||
|
||||
// Construct XPath node from XML node/attribute
|
||||
xpath_node(const xml_node& node);
|
||||
xpath_node(const xml_attribute& attribute, const xml_node& parent);
|
||||
@ -1228,13 +1289,13 @@ namespace pugi
|
||||
// Get node/attribute, if any
|
||||
xml_node node() const;
|
||||
xml_attribute attribute() const;
|
||||
|
||||
|
||||
// Get parent of contained node/attribute
|
||||
xml_node parent() const;
|
||||
|
||||
// Safe bool conversion operator
|
||||
operator unspecified_bool_type() const;
|
||||
|
||||
|
||||
// Borland C++ workaround
|
||||
bool operator!() const;
|
||||
|
||||
@ -1260,13 +1321,13 @@ namespace pugi
|
||||
type_sorted, // Sorted by document order (ascending)
|
||||
type_sorted_reverse // Sorted by document order (descending)
|
||||
};
|
||||
|
||||
|
||||
// Constant iterator type
|
||||
typedef const xpath_node* const_iterator;
|
||||
|
||||
// We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
|
||||
typedef const xpath_node* iterator;
|
||||
|
||||
|
||||
// Default constructor. Constructs empty set.
|
||||
xpath_node_set();
|
||||
|
||||
@ -1275,49 +1336,49 @@ namespace pugi
|
||||
|
||||
// Destructor
|
||||
~xpath_node_set();
|
||||
|
||||
|
||||
// Copy constructor/assignment operator
|
||||
xpath_node_set(const xpath_node_set& ns);
|
||||
xpath_node_set& operator=(const xpath_node_set& ns);
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_node_set(xpath_node_set&& rhs);
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs);
|
||||
xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get collection type
|
||||
type_t type() const;
|
||||
|
||||
|
||||
// Get collection size
|
||||
size_t size() const;
|
||||
|
||||
// Indexing operator
|
||||
const xpath_node& operator[](size_t index) const;
|
||||
|
||||
|
||||
// Collection iterators
|
||||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
|
||||
// Sort the collection in ascending/descending order by document order
|
||||
void sort(bool reverse = false);
|
||||
|
||||
|
||||
// Get first node in the collection by document order
|
||||
xpath_node first() const;
|
||||
|
||||
|
||||
// Check if collection is empty
|
||||
bool empty() const;
|
||||
|
||||
|
||||
private:
|
||||
type_t _type;
|
||||
|
||||
|
||||
xpath_node _storage;
|
||||
|
||||
|
||||
xpath_node* _begin;
|
||||
xpath_node* _end;
|
||||
|
||||
void _assign(const_iterator begin, const_iterator end, type_t type);
|
||||
void _move(xpath_node_set& rhs);
|
||||
void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT;
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -1325,7 +1386,7 @@ namespace pugi
|
||||
// Convert wide string to UTF8
|
||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
|
||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
|
||||
|
||||
|
||||
// Convert UTF8 to wide string
|
||||
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
|
||||
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
|
||||
@ -1333,13 +1394,13 @@ namespace pugi
|
||||
|
||||
// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
|
||||
typedef void* (*allocation_function)(size_t size);
|
||||
|
||||
|
||||
// Memory deallocation function interface
|
||||
typedef void (*deallocation_function)(void* ptr);
|
||||
|
||||
// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
|
||||
void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
|
||||
|
||||
|
||||
// Get current memory management functions
|
||||
allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
|
||||
deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
|
||||
@ -1375,7 +1436,7 @@ namespace std
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
@ -1388,7 +1449,7 @@ namespace std
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
|
@ -1,6 +1,6 @@
|
||||
pugixml 1.6 - an XML processing library
|
||||
pugixml 1.9 - an XML processing library
|
||||
|
||||
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Report bugs and download new versions at http://pugixml.org/
|
||||
|
||||
This is the distribution of pugixml, which is a C++ XML processing library,
|
||||
@ -28,7 +28,7 @@ The distribution contains the following folders:
|
||||
|
||||
This library is distributed under the MIT License:
|
||||
|
||||
Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
|
@ -52,6 +52,35 @@ namespace QCD {
|
||||
{
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Physical surface field utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
FermionField tmp(this->FermionGrid());
|
||||
tmp = solution5d;
|
||||
conformable(solution5d._grid,this->FermionGrid());
|
||||
conformable(exported4d._grid,this->GaugeGrid());
|
||||
axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0);
|
||||
axpby_ssp_pplus (tmp, 1., tmp , 1., solution5d, 0, Ls-1);
|
||||
ExtractSlice(exported4d, tmp, 0, 0);
|
||||
}
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
FermionField tmp(this->FermionGrid());
|
||||
conformable(imported5d._grid,this->FermionGrid());
|
||||
conformable(input4d._grid ,this->GaugeGrid());
|
||||
tmp = zero;
|
||||
InsertSlice(input4d, tmp, 0 , 0);
|
||||
InsertSlice(input4d, tmp, Ls-1, 0);
|
||||
axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0);
|
||||
axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1);
|
||||
Dminus(tmp,imported5d);
|
||||
}
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
|
@ -83,8 +83,13 @@ namespace Grid {
|
||||
virtual void M5D (const FermionField &psi, FermionField &chi);
|
||||
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Physical surface field utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
virtual void Dminus(const FermionField &psi, FermionField &chi);
|
||||
virtual void DminusDag(const FermionField &psi, FermionField &chi);
|
||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
||||
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Instantiate different versions depending on Impl
|
||||
|
@ -295,6 +295,27 @@ namespace Grid {
|
||||
assert((Ls&0x1)==1); // Odd Ls required
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ContinuedFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
conformable(solution5d._grid,this->FermionGrid());
|
||||
conformable(exported4d._grid,this->GaugeGrid());
|
||||
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
|
||||
}
|
||||
template<class Impl>
|
||||
void ContinuedFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
conformable(imported5d._grid,this->FermionGrid());
|
||||
conformable(input4d._grid ,this->GaugeGrid());
|
||||
FermionField tmp(this->FermionGrid());
|
||||
tmp=zero;
|
||||
InsertSlice(input4d, tmp, Ls-1, Ls-1);
|
||||
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
|
||||
this->Dminus(tmp,imported5d);
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(ContinuedFractionFermion5D);
|
||||
|
||||
}
|
||||
|
@ -65,6 +65,14 @@ namespace Grid {
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Physical surface field utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
||||
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
||||
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
|
||||
|
||||
// Constructors
|
||||
ContinuedFractionFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
|
@ -128,6 +128,19 @@ namespace Grid {
|
||||
std::vector<Real> mom,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax)=0;
|
||||
///////////////////////////////////////////////
|
||||
// Physical field import/export
|
||||
///////////////////////////////////////////////
|
||||
virtual void Dminus(const FermionField &psi, FermionField &chi) { chi=psi; }
|
||||
virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; }
|
||||
virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported)
|
||||
{
|
||||
imported = input;
|
||||
};
|
||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported)
|
||||
{
|
||||
exported=solution;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -396,6 +396,27 @@ namespace Grid {
|
||||
amax=zolo_hi;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void PartialFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
conformable(solution5d._grid,this->FermionGrid());
|
||||
conformable(exported4d._grid,this->GaugeGrid());
|
||||
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
|
||||
}
|
||||
template<class Impl>
|
||||
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
conformable(imported5d._grid,this->FermionGrid());
|
||||
conformable(input4d._grid ,this->GaugeGrid());
|
||||
FermionField tmp(this->FermionGrid());
|
||||
tmp=zero;
|
||||
InsertSlice(input4d, tmp, Ls-1, Ls-1);
|
||||
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
|
||||
this->Dminus(tmp,imported5d);
|
||||
}
|
||||
|
||||
// Constructors
|
||||
template<class Impl>
|
||||
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
|
||||
|
@ -70,6 +70,12 @@ namespace Grid {
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Physical surface field utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
||||
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
|
||||
|
||||
// Constructors
|
||||
PartialFractionFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
|
@ -69,39 +69,47 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
|
||||
/*****************************************************/
|
||||
/* Compress includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
|
||||
projector::Proj(buf[o],in,mu,dag);
|
||||
inline void Compress(SiteHalfSpinor * __restrict__ buf,Integer o,const SiteSpinor &in) {
|
||||
SiteHalfSpinor tmp;
|
||||
projector::Proj(tmp,in,mu,dag);
|
||||
vstream(buf[o],tmp);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Exchange includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Exchange(SiteHalfSpinor *mp,
|
||||
SiteHalfSpinor *vp0,
|
||||
SiteHalfSpinor *vp1,
|
||||
inline void Exchange(SiteHalfSpinor * __restrict__ mp,
|
||||
const SiteHalfSpinor * __restrict__ vp0,
|
||||
const SiteHalfSpinor * __restrict__ vp1,
|
||||
Integer type,Integer o){
|
||||
exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
|
||||
SiteHalfSpinor tmp1;
|
||||
SiteHalfSpinor tmp2;
|
||||
exchange(tmp1,tmp2,vp0[o],vp1[o],type);
|
||||
vstream(mp[2*o ],tmp1);
|
||||
vstream(mp[2*o+1],tmp2);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Have a decompression step if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Decompress(SiteHalfSpinor *out,
|
||||
SiteHalfSpinor *in, Integer o) {
|
||||
inline void Decompress(SiteHalfSpinor * __restrict__ out,
|
||||
SiteHalfSpinor * __restrict__ in, Integer o) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress Exchange */
|
||||
/*****************************************************/
|
||||
inline void CompressExchange(SiteHalfSpinor *out0,
|
||||
SiteHalfSpinor *out1,
|
||||
const SiteSpinor *in,
|
||||
inline void CompressExchange(SiteHalfSpinor * __restrict__ out0,
|
||||
SiteHalfSpinor * __restrict__ out1,
|
||||
const SiteSpinor * __restrict__ in,
|
||||
Integer j,Integer k, Integer m,Integer type){
|
||||
SiteHalfSpinor temp1, temp2,temp3,temp4;
|
||||
projector::Proj(temp1,in[k],mu,dag);
|
||||
projector::Proj(temp2,in[m],mu,dag);
|
||||
exchange(out0[j],out1[j],temp1,temp2,type);
|
||||
exchange(temp3,temp4,temp1,temp2,type);
|
||||
vstream(out0[j],temp3);
|
||||
vstream(out1[j],temp4);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
|
@ -30,181 +30,60 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHIMU_BODY(F) \
|
||||
Chimu_00=ref(F)(0)(0); \
|
||||
Chimu_01=ref(F)(0)(1); \
|
||||
Chimu_02=ref(F)(0)(2); \
|
||||
Chimu_10=ref(F)(1)(0); \
|
||||
Chimu_11=ref(F)(1)(1); \
|
||||
Chimu_12=ref(F)(1)(2); \
|
||||
Chimu_20=ref(F)(2)(0); \
|
||||
Chimu_21=ref(F)(2)(1); \
|
||||
Chimu_22=ref(F)(2)(2); \
|
||||
Chimu_30=ref(F)(3)(0); \
|
||||
Chimu_31=ref(F)(3)(1); \
|
||||
Chimu_32=ref(F)(3)(2)
|
||||
#define LOAD_CHIMU \
|
||||
{const SiteSpinor & ref (in._odata[offset]); \
|
||||
Chimu_00=ref()(0)(0);\
|
||||
Chimu_01=ref()(0)(1);\
|
||||
Chimu_02=ref()(0)(2);\
|
||||
Chimu_10=ref()(1)(0);\
|
||||
Chimu_11=ref()(1)(1);\
|
||||
Chimu_12=ref()(1)(2);\
|
||||
Chimu_20=ref()(2)(0);\
|
||||
Chimu_21=ref()(2)(1);\
|
||||
Chimu_22=ref()(2)(2);\
|
||||
Chimu_30=ref()(3)(0);\
|
||||
Chimu_31=ref()(3)(1);\
|
||||
Chimu_32=ref()(3)(2);}
|
||||
|
||||
#define LOAD_CHIMU(DIR,F,PERM) \
|
||||
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
|
||||
|
||||
#define LOAD_CHI_BODY(F) \
|
||||
Chi_00 = ref(F)(0)(0);\
|
||||
Chi_01 = ref(F)(0)(1);\
|
||||
Chi_02 = ref(F)(0)(2);\
|
||||
Chi_10 = ref(F)(1)(0);\
|
||||
Chi_11 = ref(F)(1)(1);\
|
||||
Chi_12 = ref(F)(1)(2)
|
||||
|
||||
#define LOAD_CHI(DIR,F,PERM) \
|
||||
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
|
||||
|
||||
|
||||
//G-parity implementations using in-place intrinsic ops
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
//0h,1l -> 1l,0h
|
||||
//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
|
||||
//Pulled fermion through forwards face, GPBC on upper component
|
||||
//Need 0= 0l 1h 1= 1l 0h
|
||||
//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
|
||||
//Pulled fermion through backwards face, GPBC on lower component
|
||||
//Need 0= 1l 0h 1= 0l 1h
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(1)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
//0l 0h -> 0h 0l
|
||||
//1l 1h, 0h 0l -> 1l 0h, 1h 0l
|
||||
#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(0)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
|
||||
|
||||
|
||||
#define LOAD_CHI_SETUP(DIR,F) \
|
||||
g = F; \
|
||||
direction = st._directions[DIR]; \
|
||||
distance = st._distances[DIR]; \
|
||||
sl = st._grid->_simd_layout[direction]; \
|
||||
inplace_twist = 0; \
|
||||
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
|
||||
if(sl == 1){ \
|
||||
g = (F+1) % 2; \
|
||||
}else{ \
|
||||
inplace_twist = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteSpinor &ref(in._odata[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHIMU_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteHalfSpinor &ref(buf[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHI_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
#define LOAD_CHI\
|
||||
{const SiteHalfSpinor &ref(buf[offset]); \
|
||||
Chi_00 = ref()(0)(0);\
|
||||
Chi_01 = ref()(0)(1);\
|
||||
Chi_02 = ref()(0)(2);\
|
||||
Chi_10 = ref()(1)(0);\
|
||||
Chi_11 = ref()(1)(1);\
|
||||
Chi_12 = ref()(1)(2);}
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT_2SPIN_BODY \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00; \
|
||||
UChi_10 = U_00*Chi_10; \
|
||||
UChi_01 = U_10*Chi_00; \
|
||||
UChi_11 = U_10*Chi_10; \
|
||||
UChi_02 = U_20*Chi_00; \
|
||||
UChi_12 = U_20*Chi_10; \
|
||||
UChi_00+= U_01*Chi_01; \
|
||||
UChi_10+= U_01*Chi_11; \
|
||||
UChi_01+= U_11*Chi_01; \
|
||||
UChi_11+= U_11*Chi_11; \
|
||||
UChi_02+= U_21*Chi_01; \
|
||||
UChi_12+= U_21*Chi_11; \
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02; \
|
||||
UChi_10+= U_00*Chi_12; \
|
||||
UChi_01+= U_10*Chi_02; \
|
||||
UChi_11+= U_10*Chi_12; \
|
||||
UChi_02+= U_20*Chi_02; \
|
||||
UChi_12+= U_20*Chi_12
|
||||
|
||||
|
||||
#define MULT_2SPIN(A,F) \
|
||||
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
|
||||
|
||||
#define MULT_2SPIN_GPARITY(A,F) \
|
||||
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
|
||||
#define MULT_2SPIN(A)\
|
||||
{auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00;\
|
||||
UChi_10 = U_00*Chi_10;\
|
||||
UChi_01 = U_10*Chi_00;\
|
||||
UChi_11 = U_10*Chi_10;\
|
||||
UChi_02 = U_20*Chi_00;\
|
||||
UChi_12 = U_20*Chi_10;\
|
||||
UChi_00+= U_01*Chi_01;\
|
||||
UChi_10+= U_01*Chi_11;\
|
||||
UChi_01+= U_11*Chi_01;\
|
||||
UChi_11+= U_11*Chi_11;\
|
||||
UChi_02+= U_21*Chi_01;\
|
||||
UChi_12+= U_21*Chi_11;\
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02;\
|
||||
UChi_10+= U_00*Chi_12;\
|
||||
UChi_01+= U_10*Chi_02;\
|
||||
UChi_11+= U_10*Chi_12;\
|
||||
UChi_02+= U_20*Chi_02;\
|
||||
UChi_12+= U_20*Chi_12;}
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
@ -428,87 +307,84 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
result_31-= UChi_11; \
|
||||
result_32-= UChi_12;
|
||||
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
LOAD_CHIMU; \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
LOAD_CHI; \
|
||||
} \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
MULT_2SPIN(DIR); \
|
||||
RECON;
|
||||
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
LOAD_CHIMU; \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else if ( st.same_node[DIR] ) { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
LOAD_CHI; \
|
||||
} \
|
||||
if (local || st.same_node[DIR] ) { \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
MULT_2SPIN(DIR); \
|
||||
RECON; \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
LOAD_CHI; \
|
||||
MULT_2SPIN(DIR); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define HAND_RESULT(ss,F) \
|
||||
#define HAND_RESULT(ss) \
|
||||
{ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
vstream(ref(F)(0)(0),result_00); \
|
||||
vstream(ref(F)(0)(1),result_01); \
|
||||
vstream(ref(F)(0)(2),result_02); \
|
||||
vstream(ref(F)(1)(0),result_10); \
|
||||
vstream(ref(F)(1)(1),result_11); \
|
||||
vstream(ref(F)(1)(2),result_12); \
|
||||
vstream(ref(F)(2)(0),result_20); \
|
||||
vstream(ref(F)(2)(1),result_21); \
|
||||
vstream(ref(F)(2)(2),result_22); \
|
||||
vstream(ref(F)(3)(0),result_30); \
|
||||
vstream(ref(F)(3)(1),result_31); \
|
||||
vstream(ref(F)(3)(2),result_32); \
|
||||
vstream(ref()(0)(0),result_00); \
|
||||
vstream(ref()(0)(1),result_01); \
|
||||
vstream(ref()(0)(2),result_02); \
|
||||
vstream(ref()(1)(0),result_10); \
|
||||
vstream(ref()(1)(1),result_11); \
|
||||
vstream(ref()(1)(2),result_12); \
|
||||
vstream(ref()(2)(0),result_20); \
|
||||
vstream(ref()(2)(1),result_21); \
|
||||
vstream(ref()(2)(2),result_22); \
|
||||
vstream(ref()(3)(0),result_30); \
|
||||
vstream(ref()(3)(1),result_31); \
|
||||
vstream(ref()(3)(2),result_32); \
|
||||
}
|
||||
|
||||
#define HAND_RESULT_EXT(ss,F) \
|
||||
#define HAND_RESULT_EXT(ss) \
|
||||
if (nmu){ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
ref(F)(0)(0)+=result_00; \
|
||||
ref(F)(0)(1)+=result_01; \
|
||||
ref(F)(0)(2)+=result_02; \
|
||||
ref(F)(1)(0)+=result_10; \
|
||||
ref(F)(1)(1)+=result_11; \
|
||||
ref(F)(1)(2)+=result_12; \
|
||||
ref(F)(2)(0)+=result_20; \
|
||||
ref(F)(2)(1)+=result_21; \
|
||||
ref(F)(2)(2)+=result_22; \
|
||||
ref(F)(3)(0)+=result_30; \
|
||||
ref(F)(3)(1)+=result_31; \
|
||||
ref(F)(3)(2)+=result_32; \
|
||||
ref()(0)(0)+=result_00; \
|
||||
ref()(0)(1)+=result_01; \
|
||||
ref()(0)(2)+=result_02; \
|
||||
ref()(1)(0)+=result_10; \
|
||||
ref()(1)(1)+=result_11; \
|
||||
ref()(1)(2)+=result_12; \
|
||||
ref()(2)(0)+=result_20; \
|
||||
ref()(2)(1)+=result_21; \
|
||||
ref()(2)(2)+=result_22; \
|
||||
ref()(3)(0)+=result_30; \
|
||||
ref()(3)(1)+=result_31; \
|
||||
ref()(3)(2)+=result_32; \
|
||||
}
|
||||
|
||||
|
||||
@ -587,18 +463,15 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -612,19 +485,16 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
@ -639,20 +509,16 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -666,20 +532,16 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
@ -695,20 +557,16 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
int nmu=0;
|
||||
|
||||
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -723,193 +581,18 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int nmu=0;
|
||||
|
||||
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Specialise Gparity to simple implementation
|
||||
////////////////////////////////////////////////
|
||||
#define HAND_SPECIALISE_EMPTY(IMPL) \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
|
||||
|
||||
|
||||
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
}
|
||||
|
||||
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
@ -930,8 +613,6 @@ INSTANTIATE_THEM(WilsonImplF);
|
||||
INSTANTIATE_THEM(WilsonImplD);
|
||||
INSTANTIATE_THEM(ZWilsonImplF);
|
||||
INSTANTIATE_THEM(ZWilsonImplD);
|
||||
INSTANTIATE_THEM(GparityWilsonImplF);
|
||||
INSTANTIATE_THEM(GparityWilsonImplD);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplF);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplD);
|
||||
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
|
||||
@ -940,12 +621,11 @@ INSTANTIATE_THEM(WilsonImplFH);
|
||||
INSTANTIATE_THEM(WilsonImplDF);
|
||||
INSTANTIATE_THEM(ZWilsonImplFH);
|
||||
INSTANTIATE_THEM(ZWilsonImplDF);
|
||||
INSTANTIATE_THEM(GparityWilsonImplFH);
|
||||
INSTANTIATE_THEM(GparityWilsonImplDF);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplFH);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplDF);
|
||||
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
|
||||
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
|
||||
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF);
|
||||
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD);
|
||||
|
||||
}}
|
||||
|
878
lib/qcd/action/fermion/WilsonKernelsHandGparity.cc
Normal file
878
lib/qcd/action/fermion/WilsonKernelsHandGparity.cc
Normal file
@ -0,0 +1,878 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernelsHand.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHIMU_BODY(F) \
|
||||
Chimu_00=ref(F)(0)(0); \
|
||||
Chimu_01=ref(F)(0)(1); \
|
||||
Chimu_02=ref(F)(0)(2); \
|
||||
Chimu_10=ref(F)(1)(0); \
|
||||
Chimu_11=ref(F)(1)(1); \
|
||||
Chimu_12=ref(F)(1)(2); \
|
||||
Chimu_20=ref(F)(2)(0); \
|
||||
Chimu_21=ref(F)(2)(1); \
|
||||
Chimu_22=ref(F)(2)(2); \
|
||||
Chimu_30=ref(F)(3)(0); \
|
||||
Chimu_31=ref(F)(3)(1); \
|
||||
Chimu_32=ref(F)(3)(2)
|
||||
|
||||
#define LOAD_CHIMU(DIR,F,PERM) \
|
||||
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
|
||||
|
||||
#define LOAD_CHI_BODY(F) \
|
||||
Chi_00 = ref(F)(0)(0);\
|
||||
Chi_01 = ref(F)(0)(1);\
|
||||
Chi_02 = ref(F)(0)(2);\
|
||||
Chi_10 = ref(F)(1)(0);\
|
||||
Chi_11 = ref(F)(1)(1);\
|
||||
Chi_12 = ref(F)(1)(2)
|
||||
|
||||
#define LOAD_CHI(DIR,F,PERM) \
|
||||
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
|
||||
|
||||
|
||||
//G-parity implementations using in-place intrinsic ops
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
//0h,1l -> 1l,0h
|
||||
//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
|
||||
//Pulled fermion through forwards face, GPBC on upper component
|
||||
//Need 0= 0l 1h 1= 1l 0h
|
||||
//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
|
||||
//Pulled fermion through backwards face, GPBC on lower component
|
||||
//Need 0= 1l 0h 1= 0l 1h
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(1)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
//0l 0h -> 0h 0l
|
||||
//1l 1h, 0h 0l -> 1l 0h, 1h 0l
|
||||
#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(0)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
|
||||
|
||||
|
||||
#define LOAD_CHI_SETUP(DIR,F) \
|
||||
g = F; \
|
||||
direction = st._directions[DIR]; \
|
||||
distance = st._distances[DIR]; \
|
||||
sl = st._grid->_simd_layout[direction]; \
|
||||
inplace_twist = 0; \
|
||||
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
|
||||
if(sl == 1){ \
|
||||
g = (F+1) % 2; \
|
||||
}else{ \
|
||||
inplace_twist = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteSpinor &ref(in._odata[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHIMU_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteHalfSpinor &ref(buf[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHI_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT_2SPIN_BODY \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00; \
|
||||
UChi_10 = U_00*Chi_10; \
|
||||
UChi_01 = U_10*Chi_00; \
|
||||
UChi_11 = U_10*Chi_10; \
|
||||
UChi_02 = U_20*Chi_00; \
|
||||
UChi_12 = U_20*Chi_10; \
|
||||
UChi_00+= U_01*Chi_01; \
|
||||
UChi_10+= U_01*Chi_11; \
|
||||
UChi_01+= U_11*Chi_01; \
|
||||
UChi_11+= U_11*Chi_11; \
|
||||
UChi_02+= U_21*Chi_01; \
|
||||
UChi_12+= U_21*Chi_11; \
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02; \
|
||||
UChi_10+= U_00*Chi_12; \
|
||||
UChi_01+= U_10*Chi_02; \
|
||||
UChi_11+= U_10*Chi_12; \
|
||||
UChi_02+= U_20*Chi_02; \
|
||||
UChi_12+= U_20*Chi_12
|
||||
|
||||
|
||||
#define MULT_2SPIN(A,F) \
|
||||
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
|
||||
|
||||
#define MULT_2SPIN_GPARITY(A,F) \
|
||||
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_00,Chi_00);\
|
||||
permute##dir(Chi_01,Chi_01);\
|
||||
permute##dir(Chi_02,Chi_02);\
|
||||
permute##dir(Chi_10,Chi_10);\
|
||||
permute##dir(Chi_11,Chi_11);\
|
||||
permute##dir(Chi_12,Chi_12);
|
||||
|
||||
// hspin(0)=fspin(0)+timesI(fspin(3));
|
||||
// hspin(1)=fspin(1)+timesI(fspin(2));
|
||||
#define XP_PROJ \
|
||||
Chi_00 = Chimu_00+timesI(Chimu_30);\
|
||||
Chi_01 = Chimu_01+timesI(Chimu_31);\
|
||||
Chi_02 = Chimu_02+timesI(Chimu_32);\
|
||||
Chi_10 = Chimu_10+timesI(Chimu_20);\
|
||||
Chi_11 = Chimu_11+timesI(Chimu_21);\
|
||||
Chi_12 = Chimu_12+timesI(Chimu_22);
|
||||
|
||||
#define YP_PROJ \
|
||||
Chi_00 = Chimu_00-Chimu_30;\
|
||||
Chi_01 = Chimu_01-Chimu_31;\
|
||||
Chi_02 = Chimu_02-Chimu_32;\
|
||||
Chi_10 = Chimu_10+Chimu_20;\
|
||||
Chi_11 = Chimu_11+Chimu_21;\
|
||||
Chi_12 = Chimu_12+Chimu_22;
|
||||
|
||||
#define ZP_PROJ \
|
||||
Chi_00 = Chimu_00+timesI(Chimu_20); \
|
||||
Chi_01 = Chimu_01+timesI(Chimu_21); \
|
||||
Chi_02 = Chimu_02+timesI(Chimu_22); \
|
||||
Chi_10 = Chimu_10-timesI(Chimu_30); \
|
||||
Chi_11 = Chimu_11-timesI(Chimu_31); \
|
||||
Chi_12 = Chimu_12-timesI(Chimu_32);
|
||||
|
||||
#define TP_PROJ \
|
||||
Chi_00 = Chimu_00+Chimu_20; \
|
||||
Chi_01 = Chimu_01+Chimu_21; \
|
||||
Chi_02 = Chimu_02+Chimu_22; \
|
||||
Chi_10 = Chimu_10+Chimu_30; \
|
||||
Chi_11 = Chimu_11+Chimu_31; \
|
||||
Chi_12 = Chimu_12+Chimu_32;
|
||||
|
||||
|
||||
// hspin(0)=fspin(0)-timesI(fspin(3));
|
||||
// hspin(1)=fspin(1)-timesI(fspin(2));
|
||||
#define XM_PROJ \
|
||||
Chi_00 = Chimu_00-timesI(Chimu_30);\
|
||||
Chi_01 = Chimu_01-timesI(Chimu_31);\
|
||||
Chi_02 = Chimu_02-timesI(Chimu_32);\
|
||||
Chi_10 = Chimu_10-timesI(Chimu_20);\
|
||||
Chi_11 = Chimu_11-timesI(Chimu_21);\
|
||||
Chi_12 = Chimu_12-timesI(Chimu_22);
|
||||
|
||||
#define YM_PROJ \
|
||||
Chi_00 = Chimu_00+Chimu_30;\
|
||||
Chi_01 = Chimu_01+Chimu_31;\
|
||||
Chi_02 = Chimu_02+Chimu_32;\
|
||||
Chi_10 = Chimu_10-Chimu_20;\
|
||||
Chi_11 = Chimu_11-Chimu_21;\
|
||||
Chi_12 = Chimu_12-Chimu_22;
|
||||
|
||||
#define ZM_PROJ \
|
||||
Chi_00 = Chimu_00-timesI(Chimu_20); \
|
||||
Chi_01 = Chimu_01-timesI(Chimu_21); \
|
||||
Chi_02 = Chimu_02-timesI(Chimu_22); \
|
||||
Chi_10 = Chimu_10+timesI(Chimu_30); \
|
||||
Chi_11 = Chimu_11+timesI(Chimu_31); \
|
||||
Chi_12 = Chimu_12+timesI(Chimu_32);
|
||||
|
||||
#define TM_PROJ \
|
||||
Chi_00 = Chimu_00-Chimu_20; \
|
||||
Chi_01 = Chimu_01-Chimu_21; \
|
||||
Chi_02 = Chimu_02-Chimu_22; \
|
||||
Chi_10 = Chimu_10-Chimu_30; \
|
||||
Chi_11 = Chimu_11-Chimu_31; \
|
||||
Chi_12 = Chimu_12-Chimu_32;
|
||||
|
||||
// fspin(0)=hspin(0);
|
||||
// fspin(1)=hspin(1);
|
||||
// fspin(2)=timesMinusI(hspin(1));
|
||||
// fspin(3)=timesMinusI(hspin(0));
|
||||
#define XP_RECON\
|
||||
result_00 = UChi_00;\
|
||||
result_01 = UChi_01;\
|
||||
result_02 = UChi_02;\
|
||||
result_10 = UChi_10;\
|
||||
result_11 = UChi_11;\
|
||||
result_12 = UChi_12;\
|
||||
result_20 = timesMinusI(UChi_10);\
|
||||
result_21 = timesMinusI(UChi_11);\
|
||||
result_22 = timesMinusI(UChi_12);\
|
||||
result_30 = timesMinusI(UChi_00);\
|
||||
result_31 = timesMinusI(UChi_01);\
|
||||
result_32 = timesMinusI(UChi_02);
|
||||
|
||||
#define XP_RECON_ACCUM\
|
||||
result_00+=UChi_00;\
|
||||
result_01+=UChi_01;\
|
||||
result_02+=UChi_02;\
|
||||
result_10+=UChi_10;\
|
||||
result_11+=UChi_11;\
|
||||
result_12+=UChi_12;\
|
||||
result_20-=timesI(UChi_10);\
|
||||
result_21-=timesI(UChi_11);\
|
||||
result_22-=timesI(UChi_12);\
|
||||
result_30-=timesI(UChi_00);\
|
||||
result_31-=timesI(UChi_01);\
|
||||
result_32-=timesI(UChi_02);
|
||||
|
||||
#define XM_RECON\
|
||||
result_00 = UChi_00;\
|
||||
result_01 = UChi_01;\
|
||||
result_02 = UChi_02;\
|
||||
result_10 = UChi_10;\
|
||||
result_11 = UChi_11;\
|
||||
result_12 = UChi_12;\
|
||||
result_20 = timesI(UChi_10);\
|
||||
result_21 = timesI(UChi_11);\
|
||||
result_22 = timesI(UChi_12);\
|
||||
result_30 = timesI(UChi_00);\
|
||||
result_31 = timesI(UChi_01);\
|
||||
result_32 = timesI(UChi_02);
|
||||
|
||||
#define XM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= timesI(UChi_10);\
|
||||
result_21+= timesI(UChi_11);\
|
||||
result_22+= timesI(UChi_12);\
|
||||
result_30+= timesI(UChi_00);\
|
||||
result_31+= timesI(UChi_01);\
|
||||
result_32+= timesI(UChi_02);
|
||||
|
||||
#define YP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= UChi_10;\
|
||||
result_21+= UChi_11;\
|
||||
result_22+= UChi_12;\
|
||||
result_30-= UChi_00;\
|
||||
result_31-= UChi_01;\
|
||||
result_32-= UChi_02;
|
||||
|
||||
#define YM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= UChi_10;\
|
||||
result_21-= UChi_11;\
|
||||
result_22-= UChi_12;\
|
||||
result_30+= UChi_00;\
|
||||
result_31+= UChi_01;\
|
||||
result_32+= UChi_02;
|
||||
|
||||
#define ZP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= timesI(UChi_00); \
|
||||
result_21-= timesI(UChi_01); \
|
||||
result_22-= timesI(UChi_02); \
|
||||
result_30+= timesI(UChi_10); \
|
||||
result_31+= timesI(UChi_11); \
|
||||
result_32+= timesI(UChi_12);
|
||||
|
||||
#define ZM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= timesI(UChi_00); \
|
||||
result_21+= timesI(UChi_01); \
|
||||
result_22+= timesI(UChi_02); \
|
||||
result_30-= timesI(UChi_10); \
|
||||
result_31-= timesI(UChi_11); \
|
||||
result_32-= timesI(UChi_12);
|
||||
|
||||
#define TP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= UChi_00; \
|
||||
result_21+= UChi_01; \
|
||||
result_22+= UChi_02; \
|
||||
result_30+= UChi_10; \
|
||||
result_31+= UChi_11; \
|
||||
result_32+= UChi_12;
|
||||
|
||||
#define TM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= UChi_00; \
|
||||
result_21-= UChi_01; \
|
||||
result_22-= UChi_02; \
|
||||
result_30-= UChi_10; \
|
||||
result_31-= UChi_11; \
|
||||
result_32-= UChi_12;
|
||||
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
} \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON;
|
||||
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else if ( st.same_node[DIR] ) { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
} \
|
||||
if (local || st.same_node[DIR] ) { \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define HAND_RESULT(ss,F) \
|
||||
{ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
vstream(ref(F)(0)(0),result_00); \
|
||||
vstream(ref(F)(0)(1),result_01); \
|
||||
vstream(ref(F)(0)(2),result_02); \
|
||||
vstream(ref(F)(1)(0),result_10); \
|
||||
vstream(ref(F)(1)(1),result_11); \
|
||||
vstream(ref(F)(1)(2),result_12); \
|
||||
vstream(ref(F)(2)(0),result_20); \
|
||||
vstream(ref(F)(2)(1),result_21); \
|
||||
vstream(ref(F)(2)(2),result_22); \
|
||||
vstream(ref(F)(3)(0),result_30); \
|
||||
vstream(ref(F)(3)(1),result_31); \
|
||||
vstream(ref(F)(3)(2),result_32); \
|
||||
}
|
||||
|
||||
#define HAND_RESULT_EXT(ss,F) \
|
||||
if (nmu){ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
ref(F)(0)(0)+=result_00; \
|
||||
ref(F)(0)(1)+=result_01; \
|
||||
ref(F)(0)(2)+=result_02; \
|
||||
ref(F)(1)(0)+=result_10; \
|
||||
ref(F)(1)(1)+=result_11; \
|
||||
ref(F)(1)(2)+=result_12; \
|
||||
ref(F)(2)(0)+=result_20; \
|
||||
ref(F)(2)(1)+=result_21; \
|
||||
ref(F)(2)(2)+=result_22; \
|
||||
ref(F)(3)(0)+=result_30; \
|
||||
ref(F)(3)(1)+=result_31; \
|
||||
ref(F)(3)(2)+=result_32; \
|
||||
}
|
||||
|
||||
|
||||
#define HAND_DECLARATIONS(a) \
|
||||
Simd result_00; \
|
||||
Simd result_01; \
|
||||
Simd result_02; \
|
||||
Simd result_10; \
|
||||
Simd result_11; \
|
||||
Simd result_12; \
|
||||
Simd result_20; \
|
||||
Simd result_21; \
|
||||
Simd result_22; \
|
||||
Simd result_30; \
|
||||
Simd result_31; \
|
||||
Simd result_32; \
|
||||
Simd Chi_00; \
|
||||
Simd Chi_01; \
|
||||
Simd Chi_02; \
|
||||
Simd Chi_10; \
|
||||
Simd Chi_11; \
|
||||
Simd Chi_12; \
|
||||
Simd UChi_00; \
|
||||
Simd UChi_01; \
|
||||
Simd UChi_02; \
|
||||
Simd UChi_10; \
|
||||
Simd UChi_11; \
|
||||
Simd UChi_12; \
|
||||
Simd U_00; \
|
||||
Simd U_10; \
|
||||
Simd U_20; \
|
||||
Simd U_01; \
|
||||
Simd U_11; \
|
||||
Simd U_21;
|
||||
|
||||
#define ZERO_RESULT \
|
||||
result_00=zero; \
|
||||
result_01=zero; \
|
||||
result_02=zero; \
|
||||
result_10=zero; \
|
||||
result_11=zero; \
|
||||
result_12=zero; \
|
||||
result_20=zero; \
|
||||
result_21=zero; \
|
||||
result_22=zero; \
|
||||
result_30=zero; \
|
||||
result_31=zero; \
|
||||
result_32=zero;
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
#define Chimu_02 Chi_02
|
||||
#define Chimu_10 Chi_10
|
||||
#define Chimu_11 Chi_11
|
||||
#define Chimu_12 Chi_12
|
||||
#define Chimu_20 UChi_00
|
||||
#define Chimu_21 UChi_01
|
||||
#define Chimu_22 UChi_02
|
||||
#define Chimu_30 UChi_10
|
||||
#define Chimu_31 UChi_11
|
||||
#define Chimu_32 UChi_12
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
int nmu=0;
|
||||
|
||||
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int nmu=0;
|
||||
|
||||
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
}
|
||||
|
||||
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
INSTANTIATE_THEM(GparityWilsonImplF);
|
||||
INSTANTIATE_THEM(GparityWilsonImplD);
|
||||
INSTANTIATE_THEM(GparityWilsonImplFH);
|
||||
INSTANTIATE_THEM(GparityWilsonImplDF);
|
||||
}}
|
@ -48,6 +48,22 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
} \
|
||||
}
|
||||
|
||||
#define RegisterLoadCheckPointerMetadataFunction(NAME) \
|
||||
template < class Metadata > \
|
||||
void Load##NAME##Checkpointer(const CheckpointerParameters& Params_, const Metadata& M_) { \
|
||||
if (!have_CheckPointer) { \
|
||||
std::cout << GridLogDebug << "Loading Metadata Checkpointer " << #NAME \
|
||||
<< std::endl; \
|
||||
CP = std::unique_ptr<CheckpointerBaseModule>( \
|
||||
new NAME##CPModule<ImplementationPolicy, Metadata >(Params_, M_)); \
|
||||
have_CheckPointer = true; \
|
||||
} else { \
|
||||
std::cout << GridLogError << "Checkpointer already loaded " \
|
||||
<< std::endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
@ -77,7 +93,7 @@ class HMCResourceManager {
|
||||
bool have_CheckPointer;
|
||||
|
||||
// NOTE: operator << is not overloaded for std::vector<string>
|
||||
// so thsi function is necessary
|
||||
// so this function is necessary
|
||||
void output_vector_string(const std::vector<std::string> &vs){
|
||||
for (auto &i: vs)
|
||||
std::cout << i << " ";
|
||||
@ -254,6 +270,7 @@ class HMCResourceManager {
|
||||
RegisterLoadCheckPointerFunction(Nersc);
|
||||
#ifdef HAVE_LIME
|
||||
RegisterLoadCheckPointerFunction(ILDG);
|
||||
RegisterLoadCheckPointerMetadataFunction(Scidac);
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
|
@ -76,6 +76,14 @@ class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
|
||||
}
|
||||
}
|
||||
|
||||
void check_filename(const std::string &filename){
|
||||
std::ifstream f(filename.c_str());
|
||||
if(!f.good()){
|
||||
std::cout << GridLogError << "Filename " << filename << " not found. Aborting. " << std::endl;
|
||||
abort();
|
||||
};
|
||||
}
|
||||
|
||||
virtual void initialize(const CheckpointerParameters &Params) = 0;
|
||||
|
||||
virtual void CheckpointRestore(int traj, typename Impl::Field &U,
|
||||
|
@ -93,6 +93,9 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer<Impl> {
|
||||
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
BinarySimpleMunger<sobj_double, sobj> munge;
|
||||
|
||||
|
@ -136,6 +136,20 @@ class ILDGCPModule: public CheckPointerModule< ImplementationPolicy> {
|
||||
|
||||
};
|
||||
|
||||
template<class ImplementationPolicy, class Metadata>
|
||||
class ScidacCPModule: public CheckPointerModule< ImplementationPolicy> {
|
||||
typedef CheckPointerModule< ImplementationPolicy> CPBase;
|
||||
Metadata M;
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->CheckPointPtr.reset(new ScidacHmcCheckpointer<ImplementationPolicy, Metadata>(this->Par_, M));
|
||||
}
|
||||
public:
|
||||
ScidacCPModule(typename CPBase::APar Par, Metadata M_):M(M_), CPBase(Par) {}
|
||||
template <class ReaderClass>
|
||||
ScidacCPModule(Reader<ReaderClass>& Reader) : Parametrized<typename CPBase::APar>(Reader), M(Reader){};
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -34,6 +34,7 @@ directory
|
||||
#include <Grid/qcd/hmc/checkpointers/NerscCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h>
|
||||
//#include <Grid/qcd/hmc/checkpointers/CheckPointerModules.h>
|
||||
|
||||
|
||||
|
@ -95,6 +95,10 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
|
||||
GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
@ -69,6 +69,9 @@ class NerscHmcCheckpointer : public BaseHmcCheckpointer<Gimpl> {
|
||||
GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
FieldMetaData header;
|
||||
NerscIO::readRNGState(sRNG, pRNG, header, rng);
|
||||
|
125
lib/qcd/hmc/checkpointers/ScidacCheckpointer.h
Normal file
125
lib/qcd/hmc/checkpointers/ScidacCheckpointer.h
Normal file
@ -0,0 +1,125 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/ScidacCheckpointer.h
|
||||
|
||||
Copyright (C) 2018
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef SCIDAC_CHECKPOINTER
|
||||
#define SCIDAC_CHECKPOINTER
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// For generic fields
|
||||
template <class Implementation, class Metadata>
|
||||
class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
|
||||
private:
|
||||
CheckpointerParameters Params;
|
||||
Metadata MData;
|
||||
|
||||
typedef typename Implementation::Field Field;
|
||||
|
||||
public:
|
||||
//INHERIT_GIMPL_TYPES(Implementation);
|
||||
|
||||
ScidacHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
||||
ScidacHmcCheckpointer(const CheckpointerParameters &Params_, const Metadata& M_):MData(M_) { initialize(Params_); }
|
||||
|
||||
void initialize(const CheckpointerParameters &Params_) {
|
||||
Params = Params_;
|
||||
|
||||
// check here that the format is valid
|
||||
int ieee32big = (Params.format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (Params.format == std::string("IEEE32"));
|
||||
int ieee64big = (Params.format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (Params.format == std::string("IEEE64"));
|
||||
|
||||
if (!(ieee64big || ieee32 || ieee32big || ieee64)) {
|
||||
std::cout << GridLogError << "Unrecognized file format " << Params.format
|
||||
<< std::endl;
|
||||
std::cout << GridLogError
|
||||
<< "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
|
||||
<< std::endl;
|
||||
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
if ((traj % Params.saveInterval) == 0) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
GridBase *grid = U._grid;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
ScidacWriter _ScidacWriter(grid->IsBoss());
|
||||
_ScidacWriter.open(config);
|
||||
_ScidacWriter.writeScidacFieldRecord(U, MData);
|
||||
_ScidacWriter.close();
|
||||
|
||||
std::cout << GridLogMessage << "Written Scidac Configuration on " << config
|
||||
<< " checksum " << std::hex << nersc_csum<<"/"
|
||||
<< scidac_csuma<<"/" << scidac_csumb
|
||||
<< std::dec << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
Metadata md_content;
|
||||
ScidacReader _ScidacReader;
|
||||
_ScidacReader.open(config);
|
||||
_ScidacReader.readScidacFieldRecord(U,md_content); // format from the header
|
||||
_ScidacReader.close();
|
||||
|
||||
std::cout << GridLogMessage << "Read Scidac Configuration from " << config
|
||||
<< " checksum " << std::hex
|
||||
<< nersc_csum<<"/"
|
||||
<< scidac_csuma<<"/"
|
||||
<< scidac_csumb
|
||||
<< std::dec << std::endl;
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAVE_LIME
|
||||
#endif // ILDG_CHECKPOINTER
|
@ -114,18 +114,26 @@ class Integrator {
|
||||
// input U actually not used in the fundamental case
|
||||
// Fundamental updates, include smearing
|
||||
|
||||
for (int a = 0; a < as[level].actions.size(); ++a) {
|
||||
for (int a = 0; a < as[level].actions.size(); ++a) {
|
||||
double start_full = usecond();
|
||||
Field force(U._grid);
|
||||
conformable(U._grid, Mom._grid);
|
||||
|
||||
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
|
||||
double start_force = usecond();
|
||||
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
|
||||
|
||||
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
|
||||
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
|
||||
force = FieldImplementation::projectForce(force); // Ta for gauge fields
|
||||
double end_force = usecond();
|
||||
Real force_abs = std::sqrt(norm2(force)/U._grid->gSites());
|
||||
std::cout << GridLogIntegrator << "Force average: " << force_abs << std::endl;
|
||||
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
|
||||
Mom -= force * ep;
|
||||
double end_full = usecond();
|
||||
double time_full = (end_full - start_full) / 1e3;
|
||||
double time_force = (end_force - start_force) / 1e3;
|
||||
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
|
||||
}
|
||||
|
||||
// Force from the other representations
|
||||
|
@ -6,30 +6,33 @@
|
||||
#ifndef GAUGE_CONFIG_
|
||||
#define GAUGE_CONFIG_
|
||||
|
||||
namespace Grid {
|
||||
namespace Grid
|
||||
{
|
||||
|
||||
namespace QCD {
|
||||
namespace QCD
|
||||
{
|
||||
|
||||
//trivial class for no smearing
|
||||
template< class Impl >
|
||||
class NoSmearing {
|
||||
//trivial class for no smearing
|
||||
template <class Impl>
|
||||
class NoSmearing
|
||||
{
|
||||
public:
|
||||
INHERIT_FIELD_TYPES(Impl);
|
||||
|
||||
Field* ThinField;
|
||||
Field *ThinField;
|
||||
|
||||
NoSmearing(): ThinField(NULL) {}
|
||||
NoSmearing() : ThinField(NULL) {}
|
||||
|
||||
void set_Field(Field& U) { ThinField = &U; }
|
||||
void set_Field(Field &U) { ThinField = &U; }
|
||||
|
||||
void smeared_force(Field&) const {}
|
||||
void smeared_force(Field &) const {}
|
||||
|
||||
Field& get_SmearedU() { return *ThinField; }
|
||||
Field &get_SmearedU() { return *ThinField; }
|
||||
|
||||
Field& get_U(bool smeared = false) {
|
||||
Field &get_U(bool smeared = false)
|
||||
{
|
||||
return *ThinField;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/*!
|
||||
@ -44,32 +47,36 @@ public:
|
||||
It stores a list of smeared configurations.
|
||||
*/
|
||||
template <class Gimpl>
|
||||
class SmearedConfiguration {
|
||||
public:
|
||||
class SmearedConfiguration
|
||||
{
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
private:
|
||||
private:
|
||||
const unsigned int smearingLevels;
|
||||
Smear_Stout<Gimpl> StoutSmearing;
|
||||
std::vector<GaugeField> SmearedSet;
|
||||
|
||||
// Member functions
|
||||
//====================================================================
|
||||
void fill_smearedSet(GaugeField& U) {
|
||||
ThinLinks = &U; // attach the smearing routine to the field U
|
||||
void fill_smearedSet(GaugeField &U)
|
||||
{
|
||||
ThinLinks = &U; // attach the smearing routine to the field U
|
||||
|
||||
// check the pointer is not null
|
||||
if (ThinLinks == NULL)
|
||||
std::cout << GridLogError
|
||||
<< "[SmearedConfiguration] Error in ThinLinks pointer\n";
|
||||
|
||||
if (smearingLevels > 0) {
|
||||
if (smearingLevels > 0)
|
||||
{
|
||||
std::cout << GridLogDebug
|
||||
<< "[SmearedConfiguration] Filling SmearedSet\n";
|
||||
GaugeField previous_u(ThinLinks->_grid);
|
||||
|
||||
previous_u = *ThinLinks;
|
||||
for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) {
|
||||
for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl)
|
||||
{
|
||||
StoutSmearing.smear(SmearedSet[smearLvl], previous_u);
|
||||
previous_u = SmearedSet[smearLvl];
|
||||
|
||||
@ -81,9 +88,10 @@ class SmearedConfiguration {
|
||||
}
|
||||
}
|
||||
//====================================================================
|
||||
GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
|
||||
const GaugeField& GaugeK) const {
|
||||
GridBase* grid = GaugeK._grid;
|
||||
GaugeField AnalyticSmearedForce(const GaugeField &SigmaKPrime,
|
||||
const GaugeField &GaugeK) const
|
||||
{
|
||||
GridBase *grid = GaugeK._grid;
|
||||
GaugeField C(grid), SigmaK(grid), iLambda(grid);
|
||||
GaugeLinkField iLambda_mu(grid);
|
||||
GaugeLinkField iQ(grid), e_iQ(grid);
|
||||
@ -94,7 +102,8 @@ class SmearedConfiguration {
|
||||
SigmaK = zero;
|
||||
iLambda = zero;
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int mu = 0; mu < Nd; mu++)
|
||||
{
|
||||
Cmu = peekLorentz(C, mu);
|
||||
GaugeKmu = peekLorentz(GaugeK, mu);
|
||||
SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu);
|
||||
@ -104,20 +113,22 @@ class SmearedConfiguration {
|
||||
pokeLorentz(iLambda, iLambda_mu, mu);
|
||||
}
|
||||
StoutSmearing.derivative(SigmaK, iLambda,
|
||||
GaugeK); // derivative of SmearBase
|
||||
GaugeK); // derivative of SmearBase
|
||||
return SigmaK;
|
||||
}
|
||||
|
||||
/*! @brief Returns smeared configuration at level 'Level' */
|
||||
const GaugeField& get_smeared_conf(int Level) const {
|
||||
const GaugeField &get_smeared_conf(int Level) const
|
||||
{
|
||||
return SmearedSet[Level];
|
||||
}
|
||||
|
||||
//====================================================================
|
||||
void set_iLambda(GaugeLinkField& iLambda, GaugeLinkField& e_iQ,
|
||||
const GaugeLinkField& iQ, const GaugeLinkField& Sigmap,
|
||||
const GaugeLinkField& GaugeK) const {
|
||||
GridBase* grid = iQ._grid;
|
||||
void set_iLambda(GaugeLinkField &iLambda, GaugeLinkField &e_iQ,
|
||||
const GaugeLinkField &iQ, const GaugeLinkField &Sigmap,
|
||||
const GaugeLinkField &GaugeK) const
|
||||
{
|
||||
GridBase *grid = iQ._grid;
|
||||
GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid);
|
||||
GaugeLinkField unity(grid);
|
||||
unity = 1.0;
|
||||
@ -206,15 +217,15 @@ class SmearedConfiguration {
|
||||
}
|
||||
|
||||
//====================================================================
|
||||
public:
|
||||
GaugeField*
|
||||
ThinLinks; /*!< @brief Pointer to the thin
|
||||
links configuration */
|
||||
public:
|
||||
GaugeField *
|
||||
ThinLinks; /* Pointer to the thin links configuration */
|
||||
|
||||
/*! @brief Standard constructor */
|
||||
SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
|
||||
Smear_Stout<Gimpl>& Stout)
|
||||
: smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) {
|
||||
/* Standard constructor */
|
||||
SmearedConfiguration(GridCartesian *UGrid, unsigned int Nsmear,
|
||||
Smear_Stout<Gimpl> &Stout)
|
||||
: smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL)
|
||||
{
|
||||
for (unsigned int i = 0; i < smearingLevels; ++i)
|
||||
SmearedSet.push_back(*(new GaugeField(UGrid)));
|
||||
}
|
||||
@ -223,21 +234,29 @@ class SmearedConfiguration {
|
||||
SmearedConfiguration()
|
||||
: smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
|
||||
|
||||
|
||||
|
||||
// attach the smeared routines to the thin links U and fill the smeared set
|
||||
void set_Field(GaugeField& U) { fill_smearedSet(U); }
|
||||
void set_Field(GaugeField &U)
|
||||
{
|
||||
double start = usecond();
|
||||
fill_smearedSet(U);
|
||||
double end = usecond();
|
||||
double time = (end - start)/ 1e3;
|
||||
std::cout << GridLogMessage << "Smearing in " << time << " ms" << std::endl;
|
||||
}
|
||||
|
||||
//====================================================================
|
||||
void smeared_force(GaugeField& SigmaTilde) const {
|
||||
if (smearingLevels > 0) {
|
||||
void smeared_force(GaugeField &SigmaTilde) const
|
||||
{
|
||||
if (smearingLevels > 0)
|
||||
{
|
||||
double start = usecond();
|
||||
GaugeField force = SigmaTilde; // actually = U*SigmaTilde
|
||||
GaugeLinkField tmp_mu(SigmaTilde._grid);
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int mu = 0; mu < Nd; mu++)
|
||||
{
|
||||
// to get just SigmaTilde
|
||||
tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) *
|
||||
peekLorentz(force, mu);
|
||||
tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * peekLorentz(force, mu);
|
||||
pokeLorentz(force, tmp_mu, mu);
|
||||
}
|
||||
|
||||
@ -246,33 +265,43 @@ class SmearedConfiguration {
|
||||
|
||||
force = AnalyticSmearedForce(force, *ThinLinks);
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int mu = 0; mu < Nd; mu++)
|
||||
{
|
||||
tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu);
|
||||
pokeLorentz(SigmaTilde, tmp_mu, mu);
|
||||
}
|
||||
} // if smearingLevels = 0 do nothing
|
||||
double end = usecond();
|
||||
double time = (end - start)/ 1e3;
|
||||
std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;
|
||||
} // if smearingLevels = 0 do nothing
|
||||
}
|
||||
//====================================================================
|
||||
|
||||
GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
|
||||
GaugeField &get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
|
||||
|
||||
GaugeField& get_U(bool smeared = false) {
|
||||
GaugeField &get_U(bool smeared = false)
|
||||
{
|
||||
// get the config, thin links by default
|
||||
if (smeared) {
|
||||
if (smearingLevels) {
|
||||
if (smeared)
|
||||
{
|
||||
if (smearingLevels)
|
||||
{
|
||||
RealD impl_plaq =
|
||||
WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]);
|
||||
std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq
|
||||
<< std::endl;
|
||||
return get_SmearedU();
|
||||
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
|
||||
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
|
||||
<< std::endl;
|
||||
return *ThinLinks;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
|
||||
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
|
||||
<< std::endl;
|
||||
|
@ -173,8 +173,8 @@ void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
|
||||
std::cout << "Time to evolve " << diff.count() << " s\n";
|
||||
#endif
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
|
||||
<< step << " "
|
||||
<< energyDensityPlaquette(step,out) << std::endl;
|
||||
<< step << " " << tau(step) << " "
|
||||
<< energyDensityPlaquette(step,out) << std::endl;
|
||||
if( step % measure_interval == 0){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
|
||||
<< step << " "
|
||||
@ -193,8 +193,8 @@ void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, Re
|
||||
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
|
||||
evolve_step_adaptive(out, maxTau);
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
|
||||
<< step << " "
|
||||
<< energyDensityPlaquette(out) << std::endl;
|
||||
<< step << " " << taus << " "
|
||||
<< energyDensityPlaquette(out) << std::endl;
|
||||
if( step % measure_interval == 0){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
|
||||
<< step << " "
|
||||
|
@ -55,6 +55,11 @@ void Hdf5Writer::writeDefault(const std::string &s, const char *x)
|
||||
writeDefault(s, sx);
|
||||
}
|
||||
|
||||
Group & Hdf5Writer::getGroup(void)
|
||||
{
|
||||
return group_;
|
||||
}
|
||||
|
||||
// Reader implementation ///////////////////////////////////////////////////////
|
||||
Hdf5Reader::Hdf5Reader(const std::string &fileName)
|
||||
: fileName_(fileName)
|
||||
@ -103,3 +108,8 @@ void Hdf5Reader::readDefault(const std::string &s, std::string &x)
|
||||
x.resize(strType.getSize());
|
||||
attribute.read(strType, &(x[0]));
|
||||
}
|
||||
|
||||
Group & Hdf5Reader::getGroup(void)
|
||||
{
|
||||
return group_;
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ namespace Grid
|
||||
template <typename U>
|
||||
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
||||
writeDefault(const std::string &s, const std::vector<U> &x);
|
||||
H5NS::Group & getGroup(void);
|
||||
private:
|
||||
template <typename U>
|
||||
void writeSingleAttribute(const U &x, const std::string &name,
|
||||
@ -65,6 +66,7 @@ namespace Grid
|
||||
template <typename U>
|
||||
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
||||
readDefault(const std::string &s, std::vector<U> &x);
|
||||
H5NS::Group & getGroup(void);
|
||||
private:
|
||||
template <typename U>
|
||||
void readSingleAttribute(U &x, const std::string &name,
|
||||
|
@ -30,6 +30,48 @@ namespace Grid {
|
||||
typedef typename std::vector<std::vector<typename TensorToVec<T>::type>> type;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void tensorDim(std::vector<size_t> &dim, const T &t, const bool wipe = true)
|
||||
{
|
||||
if (wipe)
|
||||
{
|
||||
dim.clear();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void tensorDim(std::vector<size_t> &dim, const iScalar<T> &t, const bool wipe = true)
|
||||
{
|
||||
if (wipe)
|
||||
{
|
||||
dim.clear();
|
||||
}
|
||||
tensorDim(dim, t._internal, false);
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
void tensorDim(std::vector<size_t> &dim, const iVector<T, N> &t, const bool wipe = true)
|
||||
{
|
||||
if (wipe)
|
||||
{
|
||||
dim.clear();
|
||||
}
|
||||
dim.push_back(N);
|
||||
tensorDim(dim, t._internal[0], false);
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
void tensorDim(std::vector<size_t> &dim, const iMatrix<T, N> &t, const bool wipe = true)
|
||||
{
|
||||
if (wipe)
|
||||
{
|
||||
dim.clear();
|
||||
}
|
||||
dim.push_back(N);
|
||||
dim.push_back(N);
|
||||
tensorDim(dim, t._internal[0][0], false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename TensorToVec<T>::type tensorToVec(const T &t)
|
||||
{
|
||||
|
@ -31,6 +31,17 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
using namespace Grid;
|
||||
using namespace std;
|
||||
|
||||
void Grid::xmlCheckParse(const pugi::xml_parse_result &result, const std::string name)
|
||||
{
|
||||
if (!result)
|
||||
{
|
||||
std::cerr << "XML parsing error for " << name << std::endl;
|
||||
std::cerr << "XML error description: " << result.description() << std::endl;
|
||||
std::cerr << "XML error offset : " << result.offset << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Writer implementation ///////////////////////////////////////////////////////
|
||||
XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName)
|
||||
{
|
||||
@ -54,6 +65,19 @@ void XmlWriter::push(const string &s)
|
||||
node_ = node_.append_child(s.c_str());
|
||||
}
|
||||
|
||||
void XmlWriter::pushXmlString(const std::string &s)
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
auto result = doc.load_buffer(s.c_str(), s.size());
|
||||
|
||||
xmlCheckParse(result, "fragment\n'" + s +"'");
|
||||
for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling())
|
||||
{
|
||||
node_ = node_.append_copy(child);
|
||||
}
|
||||
pop();
|
||||
}
|
||||
|
||||
void XmlWriter::pop(void)
|
||||
{
|
||||
node_ = node_.parent();
|
||||
@ -65,40 +89,32 @@ std::string XmlWriter::XmlString(void)
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("")
|
||||
{
|
||||
pugi::xml_parse_result result;
|
||||
result = doc_.load_string(xmlstring);
|
||||
if ( !result ) {
|
||||
cerr << "XML error description (from char *): " << result.description() << "\nXML\n"<< xmlstring << "\n";
|
||||
cerr << "XML error offset (from char *) " << result.offset << "\nXML\n"<< xmlstring <<"\n";
|
||||
abort();
|
||||
}
|
||||
if ( toplev == std::string("") ) {
|
||||
node_ = doc_;
|
||||
} else {
|
||||
node_ = doc_.child(toplev.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Reader implementation ///////////////////////////////////////////////////////
|
||||
XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName)
|
||||
XmlReader::XmlReader(const std::string &s, const bool isBuffer,
|
||||
std::string toplev)
|
||||
{
|
||||
pugi::xml_parse_result result;
|
||||
result = doc_.load_file(fileName_.c_str());
|
||||
if ( !result ) {
|
||||
cerr << "XML error description: " << result.description() <<" "<< fileName_ <<"\n";
|
||||
cerr << "XML error offset : " << result.offset <<" "<< fileName_ <<"\n";
|
||||
abort();
|
||||
|
||||
if (isBuffer)
|
||||
{
|
||||
fileName_ = "<string>";
|
||||
result = doc_.load_string(s.c_str());
|
||||
xmlCheckParse(result, "string\n'" + s + "'");
|
||||
}
|
||||
else
|
||||
{
|
||||
fileName_ = s;
|
||||
result = doc_.load_file(s.c_str());
|
||||
xmlCheckParse(result, "file '" + fileName_ + "'");
|
||||
}
|
||||
if ( toplev == std::string("") ) {
|
||||
node_ = doc_;
|
||||
node_ = doc_;
|
||||
} else {
|
||||
node_ = doc_.child(toplev.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
bool XmlReader::push(const string &s)
|
||||
bool XmlReader::push(const std::string &s)
|
||||
{
|
||||
if (node_.child(s.c_str()))
|
||||
{
|
||||
@ -129,7 +145,6 @@ bool XmlReader::nextElement(const std::string &s)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -43,13 +43,15 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
namespace Grid
|
||||
{
|
||||
void xmlCheckParse(const pugi::xml_parse_result &result, const std::string name);
|
||||
|
||||
class XmlWriter: public Writer<XmlWriter>
|
||||
{
|
||||
public:
|
||||
XmlWriter(const std::string &fileName,std::string toplev = std::string("grid") );
|
||||
XmlWriter(const std::string &fileName, std::string toplev = std::string("grid") );
|
||||
virtual ~XmlWriter(void);
|
||||
void push(const std::string &s);
|
||||
void pushXmlString(const std::string &s);
|
||||
void pop(void);
|
||||
template <typename U>
|
||||
void writeDefault(const std::string &s, const U &x);
|
||||
@ -65,8 +67,8 @@ namespace Grid
|
||||
class XmlReader: public Reader<XmlReader>
|
||||
{
|
||||
public:
|
||||
XmlReader(const char *xmlstring,std::string toplev = std::string("grid") );
|
||||
XmlReader(const std::string &fileName,std::string toplev = std::string("grid") );
|
||||
XmlReader(const std::string &fileName, const bool isBuffer = false,
|
||||
std::string toplev = std::string("grid") );
|
||||
virtual ~XmlReader(void) = default;
|
||||
bool push(const std::string &s);
|
||||
void pop(void);
|
||||
@ -75,6 +77,8 @@ namespace Grid
|
||||
void readDefault(const std::string &s, U &output);
|
||||
template <typename U>
|
||||
void readDefault(const std::string &s, std::vector<U> &output);
|
||||
private:
|
||||
void checkParse(const pugi::xml_parse_result &result, const std::string name);
|
||||
private:
|
||||
pugi::xml_document doc_;
|
||||
pugi::xml_node node_;
|
||||
|
@ -66,6 +66,8 @@ void Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const La
|
||||
parallel_for(int i=0;i<num;i++){
|
||||
compress.Compress(&buffer[off],table[i].first,rhs._odata[so+table[i].second]);
|
||||
}
|
||||
// Further optimisatoin: i) streaming store the result
|
||||
// ii) software prefetch the first element of the next table entry
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
@ -505,25 +507,24 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
|
||||
template<class decompressor>
|
||||
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) {
|
||||
|
||||
mergetime-=usecond();
|
||||
for(int i=0;i<mm.size();i++){
|
||||
mergetime-=usecond();
|
||||
parallel_for(int o=0;o<mm[i].buffer_size/2;o++){
|
||||
decompress.Exchange(mm[i].mpointer,
|
||||
mm[i].vpointers[0],
|
||||
mm[i].vpointers[1],
|
||||
mm[i].type,o);
|
||||
}
|
||||
mergetime+=usecond();
|
||||
}
|
||||
mergetime+=usecond();
|
||||
|
||||
decompresstime-=usecond();
|
||||
for(int i=0;i<dd.size();i++){
|
||||
decompresstime-=usecond();
|
||||
parallel_for(int o=0;o<dd[i].buffer_size;o++){
|
||||
decompress.Decompress(dd[i].kernel_p,dd[i].mpi_p,o);
|
||||
}
|
||||
decompresstime+=usecond();
|
||||
}
|
||||
|
||||
decompresstime+=usecond();
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Set up routines
|
||||
|
@ -385,6 +385,7 @@ void Grid_init(int *argc,char ***argv)
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads");
|
||||
GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
|
||||
assert(CartesianCommunicator::nCommThreads > 0);
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
||||
|
259
tests/Test_compressed_lanczos_hot_start.cc
Normal file
259
tests/Test_compressed_lanczos_hot_start.cc
Normal file
@ -0,0 +1,259 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Leans heavily on Christoph Lehner's code
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
/*
|
||||
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
|
||||
* in Grid that were intended to be used to support blocked Aggregates, from
|
||||
*/
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard)
|
||||
// Base constructor
|
||||
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
|
||||
{};
|
||||
|
||||
void checkpointFine(std::string evecs_file,std::string evals_file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
assert(this->subspace.size()==nbasis);
|
||||
emptyUserRecord record;
|
||||
Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
|
||||
WR.open(evecs_file);
|
||||
for(int k=0;k<nbasis;k++) {
|
||||
WR.writeScidacFieldRecord(this->subspace[k],record);
|
||||
}
|
||||
WR.close();
|
||||
|
||||
XmlWriter WRx(evals_file);
|
||||
write(WRx,"evals",this->evals_fine);
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
this->evals_fine.resize(nbasis);
|
||||
this->subspace.resize(nbasis,this->_FineGrid);
|
||||
|
||||
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
|
||||
XmlReader RDx(evals_file);
|
||||
read(RDx,"evals",this->evals_fine);
|
||||
|
||||
assert(this->evals_fine.size()==nbasis);
|
||||
|
||||
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||
emptyUserRecord record;
|
||||
Grid::QCD::ScidacReader RD ;
|
||||
RD.open(evecs_file);
|
||||
for(int k=0;k<nbasis;k++) {
|
||||
this->subspace[k].checkerboard=this->_checkerboard;
|
||||
RD.readScidacFieldRecord(this->subspace[k],record);
|
||||
|
||||
}
|
||||
RD.close();
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void checkpointCoarse(std::string evecs_file,std::string evals_file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
int n = this->evec_coarse.size();
|
||||
emptyUserRecord record;
|
||||
Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
|
||||
WR.open(evecs_file);
|
||||
for(int k=0;k<n;k++) {
|
||||
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
|
||||
}
|
||||
WR.close();
|
||||
|
||||
XmlWriter WRx(evals_file);
|
||||
write(WRx,"evals",this->evals_coarse);
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
|
||||
this->evals_coarse.resize(nvec);
|
||||
this->evec_coarse.resize(nvec,this->_CoarseGrid);
|
||||
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
|
||||
XmlReader RDx(evals_file);
|
||||
read(RDx,"evals",this->evals_coarse);
|
||||
|
||||
assert(this->evals_coarse.size()==nvec);
|
||||
emptyUserRecord record;
|
||||
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||
Grid::QCD::ScidacReader RD ;
|
||||
RD.open(evecs_file);
|
||||
for(int k=0;k<nvec;k++) {
|
||||
RD.readScidacFieldRecord(this->evec_coarse[k],record);
|
||||
}
|
||||
RD.close();
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv) {
|
||||
|
||||
Grid_init(&argc,&argv);
|
||||
GridLogIRL.TimingMode(1);
|
||||
|
||||
LocalCoherenceLanczosParams Params;
|
||||
{
|
||||
Params.omega.resize(10);
|
||||
Params.blockSize.resize(5);
|
||||
XmlWriter writer("Params_template.xml");
|
||||
write(writer,"Params",Params);
|
||||
std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
XmlReader reader(std::string("./Params.xml"));
|
||||
read(reader, "Params", Params);
|
||||
}
|
||||
|
||||
int Ls = (int)Params.omega.size();
|
||||
RealD mass = Params.mass;
|
||||
RealD M5 = Params.M5;
|
||||
std::vector<int> blockSize = Params.blockSize;
|
||||
std::vector<int> latt({32,32,32,32});
|
||||
uint64_t vol = Ls*latt[0]*latt[1]*latt[2]*latt[3];
|
||||
double mat_flop= 2.0*1320.0*vol;
|
||||
// Grids
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt,
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> fineLatt = latt;
|
||||
int dims=fineLatt.size();
|
||||
assert(blockSize.size()==dims+1);
|
||||
std::vector<int> coarseLatt(dims);
|
||||
std::vector<int> coarseLatt5d ;
|
||||
|
||||
for (int d=0;d<coarseLatt.size();d++){
|
||||
coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage<< " 5d coarse lattice is ";
|
||||
for (int i=0;i<coarseLatt.size();i++){
|
||||
std::cout << coarseLatt[i]<<"x";
|
||||
}
|
||||
int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
|
||||
std::cout << cLs<<std::endl;
|
||||
|
||||
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
|
||||
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
|
||||
|
||||
// Gauge field
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
// FieldMetaData header;
|
||||
// NerscIO::readConfiguration(Umu,header,Params.config);
|
||||
|
||||
std::cout << GridLogMessage << "Lattice dimensions: " << latt << " Ls: " << Ls << std::endl;
|
||||
|
||||
// ZMobius EO Operator
|
||||
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
|
||||
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
|
||||
// Eigenvector storage
|
||||
LanczosParams fine =Params.FineParams;
|
||||
LanczosParams coarse=Params.CoarseParams;
|
||||
|
||||
const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop;
|
||||
const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk;
|
||||
const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm;
|
||||
|
||||
std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl;
|
||||
std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
|
||||
assert(Nm2 >= Nm1);
|
||||
|
||||
const int nbasis= 60;
|
||||
assert(nbasis==Ns1);
|
||||
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
|
||||
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
|
||||
|
||||
assert( (Params.doFine)||(Params.doFineRead));
|
||||
|
||||
if ( Params.doFine ) {
|
||||
std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
|
||||
double t0=-usecond();
|
||||
_LocalCoherenceLanczos.calcFine(fine.Cheby,
|
||||
fine.Nstop,fine.Nk,fine.Nm,
|
||||
fine.resid,fine.MaxIt,
|
||||
fine.betastp,fine.MinRes);
|
||||
t0+=usecond();
|
||||
|
||||
double t1=-usecond();
|
||||
if ( Params.saveEvecs ) {
|
||||
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
|
||||
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
|
||||
}
|
||||
t1+=usecond();
|
||||
|
||||
std::cout << GridLogMessage << "Computation time is " << (t0)/1.0e6 <<" seconds"<<std::endl;
|
||||
if ( Params.saveEvecs ) std::cout << GridLogMessage << "I/O time is " << (t1)/1.0e6 <<" seconds"<<std::endl;
|
||||
std::cout << GridLogMessage << "Time to solution is " << (t1+t0)/1.0e6 <<" seconds"<<std::endl;
|
||||
std::cout << GridLogMessage << "Done"<<std::endl;
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -49,6 +49,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
const int Ls=8;
|
||||
|
||||
std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
@ -90,24 +92,23 @@ int main (int argc, char ** argv)
|
||||
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
|
||||
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
|
||||
|
||||
std::cout << "Starting mixed CG" << std::endl;
|
||||
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
|
||||
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
|
||||
mCG(src_o,result_o);
|
||||
|
||||
std::cout << "Starting regular CG" << std::endl;
|
||||
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
|
||||
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
||||
CG(HermOpEO,src_o,result_o_2);
|
||||
|
||||
LatticeFermionD diff_o(FrbGrid);
|
||||
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
|
||||
|
||||
std::cout << "Diff between mixed and regular CG: " << diff << std::endl;
|
||||
std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){
|
||||
|
||||
std::string file1("./Propagator1");
|
||||
std::string file2("./Propagator2");
|
||||
emptyUserRecord record;
|
||||
uint32_t nersc_csum;
|
||||
uint32_t scidac_csuma;
|
||||
@ -121,12 +122,12 @@ int main (int argc, char ** argv)
|
||||
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o,file1,munge, 0, format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
||||
std::cout << GridLogMessage << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
||||
|
||||
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o_2,file1,munge, 0, format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
||||
std::cout << GridLogMessage << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -34,6 +34,8 @@ class ScalarActionParameters : Serializable {
|
||||
double, lambda,
|
||||
double, g);
|
||||
|
||||
ScalarActionParameters() = default;
|
||||
|
||||
template <class ReaderClass >
|
||||
ScalarActionParameters(Reader<ReaderClass>& Reader){
|
||||
read(Reader, "ScalarAction", *this);
|
||||
@ -124,10 +126,13 @@ int main(int argc, char **argv) {
|
||||
ScalarGrid.set_rb(new GridRedBlackCartesian(ScalarGrid.get_full()));
|
||||
TheHMC.Resources.AddGrid("scalar", ScalarGrid);
|
||||
std::cout << "Lattice size : " << GridDefaultLatt() << std::endl;
|
||||
|
||||
ScalarActionParameters SPar(Reader);
|
||||
|
||||
// Checkpointer definition
|
||||
CheckpointerParameters CPparams(Reader);
|
||||
TheHMC.Resources.LoadBinaryCheckpointer(CPparams);
|
||||
//TheHMC.Resources.LoadBinaryCheckpointer(CPparams);
|
||||
TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar);
|
||||
|
||||
RNGModuleParameters RNGpar(Reader);
|
||||
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||
@ -140,7 +145,6 @@ int main(int argc, char **argv) {
|
||||
// Collect actions, here use more encapsulation
|
||||
|
||||
// Scalar action in adjoint representation
|
||||
ScalarActionParameters SPar(Reader);
|
||||
ScalarAction Saction(SPar.mass_squared, SPar.lambda, SPar.g);
|
||||
|
||||
// Collect actions
|
||||
|
@ -33,6 +33,7 @@ namespace Grid{
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ActionParameters,
|
||||
double, beta)
|
||||
|
||||
ActionParameters() = default;
|
||||
|
||||
template <class ReaderClass >
|
||||
ActionParameters(Reader<ReaderClass>& Reader){
|
||||
@ -68,11 +69,15 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
Serialiser Reader(TheHMC.ParameterFile);
|
||||
|
||||
|
||||
// Read parameters from input file
|
||||
ActionParameters WilsonPar(Reader);
|
||||
|
||||
// Checkpointer definition
|
||||
CheckpointerParameters CPparams(Reader);
|
||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||
//TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||
|
||||
// Store metadata in the Scidac checkpointer
|
||||
TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar);
|
||||
|
||||
RNGModuleParameters RNGpar(Reader);
|
||||
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||
@ -91,8 +96,6 @@ int main(int argc, char **argv) {
|
||||
// need wrappers of the fermionic classes
|
||||
// that have a complex construction
|
||||
// standard
|
||||
ActionParameters WilsonPar(Reader);
|
||||
//RealD beta = 6.4 ;
|
||||
WilsonGaugeActionR Waction(WilsonPar.beta);
|
||||
|
||||
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||
|
253
tests/lanczos/Test_compressed_lanczos.cc
Normal file
253
tests/lanczos/Test_compressed_lanczos.cc
Normal file
@ -0,0 +1,253 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Leans heavily on Christoph Lehner's code
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
/*
|
||||
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
|
||||
* in Grid that were intended to be used to support blocked Aggregates, from
|
||||
*/
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard)
|
||||
// Base constructor
|
||||
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
|
||||
{};
|
||||
|
||||
void checkpointFine(std::string evecs_file,std::string evals_file)
|
||||
{
|
||||
assert(this->subspace.size()==nbasis);
|
||||
emptyUserRecord record;
|
||||
Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
|
||||
WR.open(evecs_file);
|
||||
for(int k=0;k<nbasis;k++) {
|
||||
WR.writeScidacFieldRecord(this->subspace[k],record);
|
||||
}
|
||||
WR.close();
|
||||
|
||||
XmlWriter WRx(evals_file);
|
||||
write(WRx,"evals",this->evals_fine);
|
||||
}
|
||||
|
||||
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
|
||||
{
|
||||
this->evals_fine.resize(nbasis);
|
||||
this->subspace.resize(nbasis,this->_FineGrid);
|
||||
|
||||
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
|
||||
XmlReader RDx(evals_file);
|
||||
read(RDx,"evals",this->evals_fine);
|
||||
|
||||
assert(this->evals_fine.size()==nbasis);
|
||||
|
||||
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||
emptyUserRecord record;
|
||||
Grid::QCD::ScidacReader RD ;
|
||||
RD.open(evecs_file);
|
||||
for(int k=0;k<nbasis;k++) {
|
||||
this->subspace[k].checkerboard=this->_checkerboard;
|
||||
RD.readScidacFieldRecord(this->subspace[k],record);
|
||||
|
||||
}
|
||||
RD.close();
|
||||
}
|
||||
|
||||
void checkpointCoarse(std::string evecs_file,std::string evals_file)
|
||||
{
|
||||
int n = this->evec_coarse.size();
|
||||
emptyUserRecord record;
|
||||
Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
|
||||
WR.open(evecs_file);
|
||||
for(int k=0;k<n;k++) {
|
||||
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
|
||||
}
|
||||
WR.close();
|
||||
|
||||
XmlWriter WRx(evals_file);
|
||||
write(WRx,"evals",this->evals_coarse);
|
||||
}
|
||||
|
||||
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
|
||||
{
|
||||
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
|
||||
this->evals_coarse.resize(nvec);
|
||||
this->evec_coarse.resize(nvec,this->_CoarseGrid);
|
||||
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
|
||||
XmlReader RDx(evals_file);
|
||||
read(RDx,"evals",this->evals_coarse);
|
||||
|
||||
assert(this->evals_coarse.size()==nvec);
|
||||
emptyUserRecord record;
|
||||
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||
Grid::QCD::ScidacReader RD ;
|
||||
RD.open(evecs_file);
|
||||
for(int k=0;k<nvec;k++) {
|
||||
RD.readScidacFieldRecord(this->evec_coarse[k],record);
|
||||
}
|
||||
RD.close();
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv) {
|
||||
|
||||
Grid_init(&argc,&argv);
|
||||
GridLogIRL.TimingMode(1);
|
||||
|
||||
LocalCoherenceLanczosParams Params;
|
||||
{
|
||||
Params.omega.resize(10);
|
||||
Params.blockSize.resize(5);
|
||||
XmlWriter writer("Params_template.xml");
|
||||
write(writer,"Params",Params);
|
||||
std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
XmlReader reader(std::string("./Params.xml"));
|
||||
read(reader, "Params", Params);
|
||||
}
|
||||
|
||||
int Ls = (int)Params.omega.size();
|
||||
RealD mass = Params.mass;
|
||||
RealD M5 = Params.M5;
|
||||
std::vector<int> blockSize = Params.blockSize;
|
||||
|
||||
// Grids
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> fineLatt = GridDefaultLatt();
|
||||
int dims=fineLatt.size();
|
||||
assert(blockSize.size()==dims+1);
|
||||
std::vector<int> coarseLatt(dims);
|
||||
std::vector<int> coarseLatt5d ;
|
||||
|
||||
for (int d=0;d<coarseLatt.size();d++){
|
||||
coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage<< " 5d coarse lattice is ";
|
||||
for (int i=0;i<coarseLatt.size();i++){
|
||||
std::cout << coarseLatt[i]<<"x";
|
||||
}
|
||||
int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
|
||||
std::cout << cLs<<std::endl;
|
||||
|
||||
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
|
||||
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
|
||||
|
||||
// Gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
FieldMetaData header;
|
||||
NerscIO::readConfiguration(Umu,header,Params.config);
|
||||
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
|
||||
|
||||
// ZMobius EO Operator
|
||||
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
|
||||
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
|
||||
// Eigenvector storage
|
||||
LanczosParams fine =Params.FineParams;
|
||||
LanczosParams coarse=Params.CoarseParams;
|
||||
|
||||
const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop;
|
||||
const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk;
|
||||
const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm;
|
||||
|
||||
std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl;
|
||||
std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
|
||||
assert(Nm2 >= Nm1);
|
||||
|
||||
const int nbasis= 60;
|
||||
assert(nbasis==Ns1);
|
||||
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
|
||||
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
|
||||
|
||||
assert( (Params.doFine)||(Params.doFineRead));
|
||||
|
||||
if ( Params.doFine ) {
|
||||
std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
|
||||
_LocalCoherenceLanczos.calcFine(fine.Cheby,
|
||||
fine.Nstop,fine.Nk,fine.Nm,
|
||||
fine.resid,fine.MaxIt,
|
||||
fine.betastp,fine.MinRes);
|
||||
|
||||
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
|
||||
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
|
||||
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
|
||||
std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
|
||||
_LocalCoherenceLanczos.Orthogonalise();
|
||||
std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
|
||||
}
|
||||
|
||||
if ( Params.doFineRead ) {
|
||||
_LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml"));
|
||||
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
|
||||
_LocalCoherenceLanczos.Orthogonalise();
|
||||
}
|
||||
|
||||
if ( Params.doCoarse ) {
|
||||
std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl;
|
||||
_LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol,
|
||||
coarse.Nstop, coarse.Nk,coarse.Nm,
|
||||
coarse.resid, coarse.MaxIt,
|
||||
coarse.betastp,coarse.MinRes);
|
||||
|
||||
|
||||
std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl;
|
||||
_LocalCoherenceLanczos.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"));
|
||||
}
|
||||
|
||||
if ( Params.doCoarseRead ) {
|
||||
// Verify we can reread ???
|
||||
_LocalCoherenceLanczos.checkpointCoarseRestore(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"),coarse.Nstop);
|
||||
_LocalCoherenceLanczos.testCoarse(coarse.resid*100.0,Params.Smoother,Params.coarse_relax_tol); // Coarse check
|
||||
}
|
||||
Grid_finalize();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user