mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge remote-tracking branch 'upstream/master'
Conflicts: lib/math/Grid_math_tensors.h lib/simd/Grid_vector_types.h
This commit is contained in:
commit
9ad6d0c65f
@ -23,7 +23,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
|
||||
|
||||
for(int lat=4;lat<=16;lat+=4){
|
||||
for(int lat=4;lat<=32;lat+=2){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
@ -94,8 +94,7 @@ int main (int argc, char ** argv)
|
||||
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
|
||||
|
||||
|
||||
for(int lat=4;lat<=16;lat+=4){
|
||||
for(int lat=4;lat<=32;lat+=2){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
11
benchmarks/Grid_su3_expr.cc
Normal file
11
benchmarks/Grid_su3_expr.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
void su3_test_mult_expression(LatticeColourMatrix &z, LatticeColourMatrix &x,LatticeColourMatrix &y)
|
||||
{
|
||||
z=x*y;
|
||||
}
|
||||
|
11
benchmarks/Grid_su3_test.cc
Normal file
11
benchmarks/Grid_su3_test.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
|
||||
void su3_test_mult_routine(LatticeColourMatrix &z, LatticeColourMatrix &x,LatticeColourMatrix &y)
|
||||
{
|
||||
mult(z,x,y);
|
||||
}
|
@ -24,7 +24,8 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
@ -36,11 +37,11 @@ int main (int argc, char ** argv)
|
||||
// pRNG.SeedFixedIntegers(seeds);
|
||||
pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
LatticeFermion src (&Grid); random(pRNG,src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeFermion ref(&Grid); ref=zero;
|
||||
LatticeFermion err(&Grid);
|
||||
LatticeFermion tmp(&Grid); tmp=zero;
|
||||
LatticeFermion err(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
@ -51,8 +52,11 @@ int main (int argc, char ** argv)
|
||||
|
||||
// Only one non-zero (y)
|
||||
Umu=zero;
|
||||
Complex cone(1.0,0.0);
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
random(pRNG,U[nn]);
|
||||
if (nn!=0) U[nn]=zero;
|
||||
else U[nn] = cone;
|
||||
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||
}
|
||||
|
||||
@ -78,7 +82,7 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
WilsonMatrix Dw(Umu,mass);
|
||||
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
|
||||
|
||||
std::cout << "Calling Dw"<<std::endl;
|
||||
int ncall=1000;
|
||||
@ -93,7 +97,7 @@ int main (int argc, char ** argv)
|
||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
err = ref -result;
|
||||
err = ref-result;
|
||||
std::cout << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
|
||||
@ -129,9 +133,8 @@ int main (int argc, char ** argv)
|
||||
std::cout << "Called DwDag"<<std::endl;
|
||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
||||
err = ref -result;
|
||||
err = ref-result;
|
||||
std::cout << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
61
benchmarks/Grid_wilson_cg_prec.cc
Normal file
61
benchmarks/Grid_wilson_cg_prec.cc
Normal file
@ -0,0 +1,61 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
RealD nrm = norm2(src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
RealD mass=0.5;
|
||||
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
|
||||
|
||||
// HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
|
||||
// ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
// CG(HermOp,src,result);
|
||||
|
||||
LatticeFermion src_o(&RBGrid);
|
||||
LatticeFermion result_o(&RBGrid);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
result_o=zero;
|
||||
|
||||
HermitianCheckerBoardedOperator<WilsonMatrix,LatticeFermion> HermOpEO(Dw);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
CG(HermOpEO,src_o,result_o);
|
||||
|
||||
Grid_finalize();
|
||||
}
|
48
benchmarks/Grid_wilson_cg_schur.cc
Normal file
48
benchmarks/Grid_wilson_cg_schur.cc
Normal file
@ -0,0 +1,48 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeFermion resid(&Grid);
|
||||
|
||||
RealD mass=0.5;
|
||||
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
|
||||
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
SchurRedBlackSolve<LatticeFermion> SchurSolver(CG);
|
||||
|
||||
SchurSolver(Dw,src,result);
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -24,13 +24,14 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
std::cout << "src norm" << norm2(src)<<std::endl;
|
||||
RealD nrm = norm2(src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
|
||||
@ -46,11 +47,11 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
RealD mass=0.5;
|
||||
WilsonMatrix Dw(Umu,mass);
|
||||
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
|
||||
|
||||
HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
CG(HermOp,src,result);
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
201
benchmarks/Grid_wilson_evenodd.cc
Normal file
201
benchmarks/Grid_wilson_evenodd.cc
Normal file
@ -0,0 +1,201 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
// std::vector<int> seeds({1,2,3,4});
|
||||
// pRNG.SeedFixedIntegers(seeds);
|
||||
pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeFermion src (&Grid); random(pRNG,src);
|
||||
LatticeFermion phi (&Grid); random(pRNG,phi);
|
||||
LatticeFermion chi (&Grid); random(pRNG,chi);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeFermion ref(&Grid); ref=zero;
|
||||
LatticeFermion tmp(&Grid); tmp=zero;
|
||||
LatticeFermion err(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
// Only one non-zero (y)
|
||||
Umu=zero;
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
random(pRNG,U[nn]);
|
||||
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
|
||||
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
|
||||
|
||||
LatticeFermion src_e (&RBGrid);
|
||||
LatticeFermion src_o (&RBGrid);
|
||||
LatticeFermion r_e (&RBGrid);
|
||||
LatticeFermion r_o (&RBGrid);
|
||||
LatticeFermion r_eo (&Grid);
|
||||
|
||||
const int Even=0;
|
||||
const int Odd=1;
|
||||
std::cout<<"=========================================================="<<std::endl;
|
||||
std::cout<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
|
||||
std::cout<<"=========================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
Dw.Meooe(src_e,r_o); std::cout<<"Applied Meo"<<std::endl;
|
||||
Dw.Meooe(src_o,r_e); std::cout<<"Applied Moe"<<std::endl;
|
||||
Dw.Dhop (src,ref,0);
|
||||
|
||||
setCheckerboard(r_eo,r_o);
|
||||
setCheckerboard(r_eo,r_e);
|
||||
|
||||
ref = (-0.5)*ref;
|
||||
err= ref - r_eo;
|
||||
std::cout << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
|
||||
|
||||
LatticeComplex cerr(&Grid);
|
||||
cerr = localInnerProduct(err,err);
|
||||
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
std::cout<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
|
||||
std::cout<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
|
||||
LatticeFermion chi_e (&RBGrid);
|
||||
LatticeFermion chi_o (&RBGrid);
|
||||
|
||||
LatticeFermion dchi_e (&RBGrid);
|
||||
LatticeFermion dchi_o (&RBGrid);
|
||||
|
||||
LatticeFermion phi_e (&RBGrid);
|
||||
LatticeFermion phi_o (&RBGrid);
|
||||
|
||||
LatticeFermion dphi_e (&RBGrid);
|
||||
LatticeFermion dphi_o (&RBGrid);
|
||||
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
Dw.Meooe(chi_e,dchi_o);
|
||||
Dw.Meooe(chi_o,dchi_e);
|
||||
Dw.MeooeDag(phi_e,dphi_o);
|
||||
Dw.MeooeDag(phi_o,dphi_e);
|
||||
|
||||
ComplexD pDce = innerProduct(phi_e,dchi_e);
|
||||
ComplexD pDco = innerProduct(phi_o,dchi_o);
|
||||
ComplexD cDpe = innerProduct(chi_e,dphi_e);
|
||||
ComplexD cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||
std::cout <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
std::cout<<"= Test MeeInv Mee = 1 "<<std::endl;
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Dw.Mooee(chi_e,src_e);
|
||||
Dw.MooeeInv(src_e,phi_e);
|
||||
|
||||
Dw.Mooee(chi_o,src_o);
|
||||
Dw.MooeeInv(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
std::cout<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Dw.MooeeDag(chi_e,src_e);
|
||||
Dw.MooeeInvDag(src_e,phi_e);
|
||||
|
||||
Dw.MooeeDag(chi_o,src_o);
|
||||
Dw.MooeeInvDag(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
std::cout<<"= Test MpcDagMpc is Hermitian "<<std::endl;
|
||||
std::cout<<"=============================================================="<<std::endl;
|
||||
|
||||
random(pRNG,phi);
|
||||
random(pRNG,chi);
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
RealD t1,t2;
|
||||
|
||||
Dw.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
||||
Dw.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
||||
|
||||
Dw.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
||||
Dw.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
||||
|
||||
pDce = innerProduct(phi_e,dchi_e);
|
||||
pDco = innerProduct(phi_o,dchi_o);
|
||||
cDpe = innerProduct(chi_e,dphi_e);
|
||||
cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
|
||||
std::cout <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -5,18 +5,27 @@ AM_LDFLAGS = -L$(top_builddir)/lib
|
||||
#
|
||||
# Test code
|
||||
#
|
||||
bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec
|
||||
bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec Grid_wilson_evenodd Grid_wilson_cg_prec Grid_wilson_cg_schur
|
||||
|
||||
Grid_wilson_SOURCES = Grid_wilson.cc
|
||||
Grid_wilson_LDADD = -lGrid
|
||||
|
||||
Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
|
||||
Grid_wilson_evenodd_LDADD = -lGrid
|
||||
|
||||
Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
|
||||
Grid_wilson_cg_unprec_LDADD = -lGrid
|
||||
|
||||
Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc
|
||||
Grid_wilson_cg_prec_LDADD = -lGrid
|
||||
|
||||
Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc
|
||||
Grid_wilson_cg_schur_LDADD = -lGrid
|
||||
|
||||
Grid_comms_SOURCES = Grid_comms.cc
|
||||
Grid_comms_LDADD = -lGrid
|
||||
|
||||
Grid_su3_SOURCES = Grid_su3.cc
|
||||
Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc
|
||||
Grid_su3_LDADD = -lGrid
|
||||
|
||||
Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <sys/time.h>
|
||||
#include <signal.h>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <Grid.h>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
@ -13,28 +13,6 @@
|
||||
|
||||
typedef uint32_t Integer;
|
||||
|
||||
#ifdef SSE4
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
#include <immintrin.h>
|
||||
|
||||
// _mm256_set_m128i(hi,lo); // not defined in all versions of immintrin.h
|
||||
#ifndef _mm256_set_m128i
|
||||
#define _mm256_set_m128i(hi,lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo),(hi),1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef AVX512
|
||||
#include <immintrin.h>
|
||||
#ifndef KNC_ONLY_STORES
|
||||
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
|
||||
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
typedef float RealF;
|
||||
@ -117,7 +95,7 @@ namespace Grid {
|
||||
template<> inline void zeroit(RealF &arg){ arg=0; };
|
||||
template<> inline void zeroit(RealD &arg){ arg=0; };
|
||||
|
||||
|
||||
// Eventually delete this part
|
||||
#if defined (SSE4)
|
||||
typedef __m128 fvec;
|
||||
typedef __m128d dvec;
|
||||
@ -146,6 +124,7 @@ namespace Grid {
|
||||
typedef vector4double dvec;
|
||||
typedef vector4double zvec;
|
||||
#endif
|
||||
|
||||
#if defined (AVX1) || defined (AVX2) || defined (AVX512)
|
||||
inline void v_prefetch0(int size, const char *ptr){
|
||||
for(int i=0;i<size;i+=64){ // Define L1 linesize above// What about SSE?
|
||||
|
@ -120,8 +120,8 @@ namespace Grid {
|
||||
// Map to always positive shift modulo global full dimension.
|
||||
int shift = (displacement+fd)%fd;
|
||||
|
||||
int checkerboard = _grid->CheckerBoardDestination(source.checkerboard,shift);
|
||||
assert (checkerboard== _checkerboard);
|
||||
// int checkerboard = _grid->CheckerBoardDestination(source.checkerboard,shift);
|
||||
assert (source.checkerboard== _checkerboard);
|
||||
|
||||
// the permute type
|
||||
int simd_layout = _grid->_simd_layout[dimension];
|
||||
|
@ -5,6 +5,8 @@
|
||||
#define GRID_OMP
|
||||
#endif
|
||||
|
||||
#define UNROLL _Pragma("unroll")
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#include <omp.h>
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for")
|
||||
|
@ -25,7 +25,7 @@ namespace Grid {
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2);
|
||||
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2)=0;
|
||||
void AdjOp(const Field &in, Field &out) {
|
||||
Op(in,out);
|
||||
};
|
||||
@ -106,6 +106,10 @@ namespace Grid {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||
};
|
||||
template<class Field> class HermitianOperatorFunction {
|
||||
public:
|
||||
virtual void operator() (HermitianOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||
};
|
||||
|
||||
// FIXME : To think about
|
||||
|
||||
|
@ -10,6 +10,7 @@ namespace Grid {
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SparseMatrixBase {
|
||||
public:
|
||||
GridBase *_grid;
|
||||
// Full checkerboar operations
|
||||
virtual RealD M (const Field &in, Field &out)=0;
|
||||
virtual RealD Mdag (const Field &in, Field &out)=0;
|
||||
@ -18,6 +19,7 @@ namespace Grid {
|
||||
ni=M(in,tmp);
|
||||
no=Mdag(tmp,out);
|
||||
}
|
||||
SparseMatrixBase(GridBase *grid) : _grid(grid) {};
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -25,7 +27,7 @@ namespace Grid {
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
||||
public:
|
||||
|
||||
GridBase *_cbgrid;
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const Field &in, Field &out)=0;
|
||||
virtual void Mooee (const Field &in, Field &out)=0;
|
||||
@ -44,9 +46,7 @@ namespace Grid {
|
||||
Meooe(out,tmp);
|
||||
|
||||
Mooee(in,out);
|
||||
out=out-tmp; // axpy_norm
|
||||
RealD n=norm2(out);
|
||||
return n;
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
@ -56,15 +56,15 @@ namespace Grid {
|
||||
MeooeDag(out,tmp);
|
||||
|
||||
MooeeDag(in,out);
|
||||
out=out-tmp; // axpy_norm
|
||||
RealD n=norm2(out);
|
||||
return n;
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD ni,RealD no) {
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp(in._grid);
|
||||
ni=Mpc(in,tmp);
|
||||
no=Mpc(tmp,out);
|
||||
no=MpcDag(tmp,out);
|
||||
// std::cout<<"MpcDagMpc "<<ni<<" "<<no<<std::endl;
|
||||
}
|
||||
CheckerBoardedSparseMatrixBase(GridBase *grid,GridBase *cbgrid) : SparseMatrixBase<Field>(grid), _cbgrid(cbgrid) {};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ public:
|
||||
bigfloat(const double d) { mpf_init_set_d(x, d); }
|
||||
bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
|
||||
~bigfloat(void) { mpf_clear(x); }
|
||||
operator const double (void) const { return (double)mpf_get_d(x); }
|
||||
operator double (void) const { return (double)mpf_get_d(x); }
|
||||
static void setDefaultPrecision(unsigned long dprec) {
|
||||
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
|
||||
mpf_set_default_prec(bprec);
|
||||
|
@ -9,18 +9,21 @@ namespace Grid {
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateGradient : public OperatorFunction<Field> {
|
||||
class ConjugateGradient : public HermitianOperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
|
||||
int verbose;
|
||||
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
||||
std::cout << Tolerance<<std::endl;
|
||||
verbose=0;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi) {assert(0);};
|
||||
|
||||
void operator() (HermitianOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi,src);
|
||||
|
||||
RealD cp,c,a,d,b,ssq,qq,b_pred;
|
||||
|
||||
Field p(src);
|
||||
@ -38,14 +41,16 @@ public:
|
||||
a =norm2(p);
|
||||
cp =a;
|
||||
ssq=norm2(src);
|
||||
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: r "<<cp <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
||||
|
||||
|
||||
if ( verbose ) {
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
||||
}
|
||||
|
||||
RealD rsq = Tolerance* Tolerance*ssq;
|
||||
|
||||
//Check if guess is really REALLY good :)
|
||||
@ -61,14 +66,16 @@ public:
|
||||
c=cp;
|
||||
|
||||
Linop.OpAndNorm(p,mmp,d,qq);
|
||||
|
||||
// std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<std::endl;
|
||||
|
||||
RealD qqck = norm2(mmp);
|
||||
ComplexD dck = innerProduct(p,mmp);
|
||||
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<" qqcheck "<< qqck<< " dck "<< dck<<std::endl;
|
||||
|
||||
a = c/d;
|
||||
b_pred = a*(a*qq-d)/c;
|
||||
|
||||
// std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
|
||||
|
||||
|
||||
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
|
||||
cp = axpy_norm(r,-a,mmp,r);
|
||||
b = cp/c;
|
||||
// std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,b "<<cp<< " "<<b <<std::endl;
|
||||
@ -77,7 +84,16 @@ public:
|
||||
psi= a*p+psi;
|
||||
p = p*b+r;
|
||||
|
||||
std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
if (verbose) std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
// Hack
|
||||
if (0) {
|
||||
Field tt(src);
|
||||
Linop.Op(psi,mmp);
|
||||
tt=mmp-src;
|
||||
RealD resnorm = norm2(tt);
|
||||
std::cout<<"ConjugateGradient: Iteration " <<k<<" true residual "<<resnorm << " computed " << cp <<std::endl;
|
||||
}
|
||||
|
||||
// Stopping condition
|
||||
if ( cp <= rsq ) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||
#define GRID_SCHUR_RED_BLACK_H
|
||||
|
||||
|
||||
/*
|
||||
* Red black Schur decomposition
|
||||
*
|
||||
@ -25,80 +26,86 @@
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) (D_oo)^{\dag} D_oo psi_o = (D_oo)^\dag L^{-1} eta_o
|
||||
* eta_o' = D_oo (eta_o - Moe Mee^{-1} eta_e)
|
||||
* i) (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackSolve : public OperatorFunction<Field>{
|
||||
template<class Field> class SchurRedBlackSolve {
|
||||
private:
|
||||
SparseMatrixBase<Field> & _Matrix;
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
HermitianOperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackSolve(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianRBSolver)
|
||||
: _Matrix(Matrix), _HermitianRBSolver(HermitianRBSolver) {
|
||||
SchurRedBlackSolve(HermitianOperatorFunction<Field> &HermitianRBSolver) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
};
|
||||
|
||||
void operator() (const Field &in, Field &out){
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME need to make eo grid from full grid.
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
const int Even=0;
|
||||
const int Odd =1;
|
||||
|
||||
// Make a cartesianRedBlack from full Grid
|
||||
GridRedBlackCartesian grid(in._grid);
|
||||
GridBase *grid = _Matrix._cbgrid;
|
||||
GridBase *fgrid= _Matrix._grid;
|
||||
|
||||
Field src_e(&grid);
|
||||
Field src_o(&grid);
|
||||
Field sol_e(&grid);
|
||||
Field sol_o(&grid);
|
||||
Field tmp(&grid);
|
||||
Field Mtmp(&grid);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); // MooeeInv(source[Even],tmp,DaggerNo,Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); // Meo (tmp,src,Odd,DaggerNo);
|
||||
tmp=src_o-Mtmp; // axpy (tmp,src,source[Odd],-1.0);
|
||||
_Matrix.MpcDag(tmp,src_o); // Mprec(tmp,src,Mtmp,DaggerYes);
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
_Matrix.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
_HermitianRBSolver(src_o,sol_o); // CGNE_prec_MdagM(solution[Odd],src);
|
||||
HermitianCheckerBoardedOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
std::cout << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); // Meo(solution[Odd],tmp,Even,DaggerNo);
|
||||
src_e = src_e-tmp; // axpy(src,tmp,source[Even],-1.0);
|
||||
_Matrix.MooeeInv(src_e,sol_e); // MooeeInv(src,solution[Even],DaggerNo,Even);
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(out,sol_e);
|
||||
setCheckerboard(out,sol_o);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout << "SchurRedBlack solver true unprec resid "<< sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -4,6 +4,13 @@
|
||||
|
||||
namespace Grid {
|
||||
|
||||
static const int CbRed =0;
|
||||
static const int CbBlack=1;
|
||||
static const int Even =CbRed;
|
||||
static const int Odd =CbBlack;
|
||||
static const int DaggerNo=0;
|
||||
static const int DaggerYes=1;
|
||||
|
||||
// Specialise this for red black grids storing half the data like a chess board.
|
||||
class GridRedBlackCartesian : public GridBase
|
||||
{
|
||||
@ -44,6 +51,9 @@ public:
|
||||
return source_cb;
|
||||
}
|
||||
};
|
||||
|
||||
GridRedBlackCartesian(GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {};
|
||||
|
||||
GridRedBlackCartesian(std::vector<int> &dimensions,
|
||||
std::vector<int> &simd_layout,
|
||||
std::vector<int> &processor_grid ) : GridBase(processor_grid)
|
||||
|
@ -91,6 +91,47 @@ inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<
|
||||
GridFromExpression(grid,std::get<2>(expr.second));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
|
||||
inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
|
||||
{
|
||||
if ( (cb==Odd) || (cb==Even) ) {
|
||||
assert(cb==lat.checkerboard);
|
||||
}
|
||||
cb=lat.checkerboard;
|
||||
// std::cout<<"Lattice leaf cb "<<cb<<std::endl;
|
||||
}
|
||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
||||
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
|
||||
{
|
||||
// std::cout<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||
}
|
||||
template <typename Op, typename T1>
|
||||
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
|
||||
{
|
||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
||||
// std::cout<<"Unary node cb "<<cb<<std::endl;
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
{
|
||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
||||
CBFromExpression(cb,std::get<1>(expr.second));
|
||||
// std::cout<<"Binary node cb "<<cb<<std::endl;
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
||||
{
|
||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
||||
CBFromExpression(cb,std::get<1>(expr.second));
|
||||
CBFromExpression(cb,std::get<2>(expr.second));
|
||||
// std::cout<<"Trinary node cb "<<cb<<std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Unary operators and funcs
|
||||
////////////////////////////////////////////
|
||||
|
@ -9,45 +9,68 @@ namespace Grid {
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#else
|
||||
mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -56,6 +79,7 @@ PARALLEL_FOR_LOOP
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
@ -67,7 +91,8 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
conformable(lhs,ret);
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
obj1 tmp;
|
||||
@ -78,22 +103,32 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
conformable(lhs,ret);
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,84 +137,112 @@ PARALLEL_FOR_LOOP
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
#pragma omp parallel for
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
ret._odata[ss]=a*x._odata[ss]+y._odata[ss];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
#pragma omp parallel for
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
ret._odata[ss]=a*x._odata[ss]+b*y._odata[ss];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
vobj tmp = a*x._odata[ss]+y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
axpy(ret,a,x,y);
|
||||
return norm2(ret);
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
axpby(ret,a,b,x,y);
|
||||
return norm2(ret); // FIXME implement parallel norm in ss loop
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef GRID_LATTICE_BASE_H
|
||||
#define GRID_LATTICE_BASE_H
|
||||
|
||||
#define STREAMING_STORES
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// TODO:
|
||||
@ -64,60 +66,136 @@ public:
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
vobj tmp= eval(ss,expr);
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2> strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
vobj tmp= eval(ss,expr);
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
vobj tmp= eval(ss,expr);
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss] = eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
//GridFromExpression is tricky to do
|
||||
template<class Op,class T1>
|
||||
Lattice(const LatticeUnaryExpression<Op,T1> & expr): _grid(nullptr){
|
||||
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss] = eval(ss,expr);
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=_eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template<class Op,class T1, class T2>
|
||||
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr): _grid(nullptr){
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss] = eval(ss,expr);
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(tmp,ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template<class Op,class T1, class T2, class T3>
|
||||
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr): _grid(nullptr){
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss] = eval(ss,expr);
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
@ -140,6 +218,7 @@ PARALLEL_FOR_LOOP
|
||||
return *this;
|
||||
}
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
std::cout<<"Lattice operator ="<<std::endl;
|
||||
PARALLEL_FOR_LOOP
|
||||
|
@ -16,19 +16,18 @@ strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> *
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
int c3=0;
|
||||
for(int c2=0;c2<N;c2++){
|
||||
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
||||
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
|
||||
}
|
||||
}
|
||||
for(int c3=1;c3<N;c3++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c3=1;c3<N;c3++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
|
@ -34,66 +34,64 @@ public:
|
||||
|
||||
// Scalar no action
|
||||
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
|
||||
iScalar()=default;
|
||||
iScalar() = default;
|
||||
/*
|
||||
iScalar(const iScalar<vtype> ©me)=default;
|
||||
iScalar(iScalar<vtype> &©me)=default;
|
||||
iScalar<vtype> & operator= (const iScalar<vtype> ©me) = default;
|
||||
iScalar<vtype> & operator= (iScalar<vtype> &©me) = default;
|
||||
*/
|
||||
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
|
||||
iScalar(const Zero &z){ *this = zero; };
|
||||
|
||||
iScalar<vtype> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
}
|
||||
friend strong_inline void vstream(iScalar<vtype> &out,const iScalar<vtype> &in){
|
||||
vstream(out._internal,in._internal);
|
||||
}
|
||||
friend strong_inline void zeroit(iScalar<vtype> &that){
|
||||
zeroit(that._internal);
|
||||
}
|
||||
friend strong_inline void prefetch(iScalar<vtype> &that){
|
||||
prefetch(that._internal);
|
||||
}
|
||||
friend strong_inline void permute(iScalar<vtype> &out,const iScalar<vtype> &in,int permutetype){
|
||||
permute(out._internal,in._internal,permutetype);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
iScalar<vtype> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
}
|
||||
friend strong_inline void vstream(iScalar<vtype> &out,const iScalar<vtype> &in){
|
||||
vstream(out._internal,in._internal);
|
||||
}
|
||||
|
||||
|
||||
friend strong_inline void zeroit(iScalar<vtype> &that){
|
||||
zeroit(that._internal);
|
||||
}
|
||||
friend strong_inline void prefetch(iScalar<vtype> &that){
|
||||
prefetch(that._internal);
|
||||
}
|
||||
friend strong_inline void permute(iScalar<vtype> &out,const iScalar<vtype> &in,int permutetype){
|
||||
permute(out._internal,in._internal,permutetype);
|
||||
}
|
||||
|
||||
// Unary negation
|
||||
friend strong_inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
|
||||
iScalar<vtype> ret;
|
||||
ret._internal= -r._internal;
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
|
||||
strong_inline iScalar<vtype> &operator *=(const iScalar<vtype> &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iScalar<vtype> &operator -=(const iScalar<vtype> &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iScalar<vtype> &operator +=(const iScalar<vtype> &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
strong_inline vtype & operator ()(void) {
|
||||
return _internal;
|
||||
}
|
||||
|
||||
strong_inline const vtype & operator ()(void) const {
|
||||
return _internal;
|
||||
}
|
||||
|
||||
operator ComplexD () const { return(TensorRemove(_internal)); };
|
||||
operator RealD () const { return(real(TensorRemove(_internal))); }
|
||||
|
||||
// convert from a something to a scalar
|
||||
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iScalar<vtype>
|
||||
// Unary negation
|
||||
friend strong_inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
|
||||
iScalar<vtype> ret;
|
||||
ret._internal= -r._internal;
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
|
||||
strong_inline iScalar<vtype> &operator *=(const iScalar<vtype> &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iScalar<vtype> &operator -=(const iScalar<vtype> &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iScalar<vtype> &operator +=(const iScalar<vtype> &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline vtype & operator ()(void) {
|
||||
return _internal;
|
||||
}
|
||||
strong_inline const vtype & operator ()(void) const {
|
||||
return _internal;
|
||||
}
|
||||
|
||||
operator ComplexD () const { return(TensorRemove(_internal)); };
|
||||
operator RealD () const { return(real(TensorRemove(_internal))); }
|
||||
|
||||
// convert from a something to a scalar
|
||||
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iScalar<vtype>
|
||||
{
|
||||
_internal = vtype(arg);
|
||||
return *this;
|
||||
@ -129,67 +127,72 @@ public:
|
||||
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
||||
iVector(const Zero &z){ *this = zero; };
|
||||
iVector() =default;
|
||||
/*
|
||||
iVector(const iVector<vtype,N> ©me)=default;
|
||||
iVector(iVector<vtype,N> &©me)=default;
|
||||
iVector<vtype,N> & operator= (const iVector<vtype,N> ©me) = default;
|
||||
iVector<vtype,N> & operator= (iVector<vtype,N> &©me) = default;
|
||||
*/
|
||||
|
||||
iVector<vtype,N> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
iVector<vtype,N> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
}
|
||||
friend strong_inline void zeroit(iVector<vtype,N> &that){
|
||||
for(int i=0;i<N;i++){
|
||||
zeroit(that._internal[i]);
|
||||
}
|
||||
friend strong_inline void zeroit(iVector<vtype,N> &that){
|
||||
for(int i=0;i<N;i++){
|
||||
zeroit(that._internal[i]);
|
||||
}
|
||||
}
|
||||
friend strong_inline void prefetch(iVector<vtype,N> &that){
|
||||
for(int i=0;i<N;i++) prefetch(that._internal[i]);
|
||||
}
|
||||
friend strong_inline void vstream(iVector<vtype,N> &out,const iVector<vtype,N> &in){
|
||||
for(int i=0;i<N;i++){
|
||||
vstream(out._internal[i],in._internal[i]);
|
||||
}
|
||||
friend strong_inline void prefetch(iVector<vtype,N> &that){
|
||||
for(int i=0;i<N;i++) prefetch(that._internal[i]);
|
||||
}
|
||||
friend strong_inline void permute(iVector<vtype,N> &out,const iVector<vtype,N> &in,int permutetype){
|
||||
for(int i=0;i<N;i++){
|
||||
permute(out._internal[i],in._internal[i],permutetype);
|
||||
}
|
||||
friend strong_inline void vstream(iVector<vtype,N> &out,const iVector<vtype,N> &in){
|
||||
for(int i=0;i<N;i++){
|
||||
vstream(out._internal[i],in._internal[i]);
|
||||
}
|
||||
}
|
||||
// Unary negation
|
||||
friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++) ret._internal[i]= -r._internal[i];
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
|
||||
strong_inline iVector<vtype,N> &operator *=(const iScalar<vtype> &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iVector<vtype,N> &operator -=(const iVector<vtype,N> &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iVector<vtype,N> &operator +=(const iVector<vtype,N> &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline vtype & operator ()(int i) {
|
||||
return _internal[i];
|
||||
}
|
||||
strong_inline const vtype & operator ()(int i) const {
|
||||
return _internal[i];
|
||||
}
|
||||
friend std::ostream& operator<< (std::ostream& stream, const iVector<vtype,N> &o){
|
||||
stream<< "V<"<<N<<">{";
|
||||
for(int i=0;i<N;i++) {
|
||||
stream<<o._internal[i];
|
||||
if (i<N-1) stream<<",";
|
||||
}
|
||||
|
||||
friend strong_inline void permute(iVector<vtype,N> &out,const iVector<vtype,N> &in,int permutetype){
|
||||
for(int i=0;i<N;i++){
|
||||
permute(out._internal[i],in._internal[i],permutetype);
|
||||
}
|
||||
}
|
||||
// Unary negation
|
||||
friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++) ret._internal[i]= -r._internal[i];
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
|
||||
strong_inline iVector<vtype,N> &operator *=(const iScalar<vtype> &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iVector<vtype,N> &operator -=(const iVector<vtype,N> &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline iVector<vtype,N> &operator +=(const iVector<vtype,N> &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
strong_inline vtype & operator ()(int i) {
|
||||
return _internal[i];
|
||||
}
|
||||
strong_inline const vtype & operator ()(int i) const {
|
||||
return _internal[i];
|
||||
}
|
||||
friend std::ostream& operator<< (std::ostream& stream, const iVector<vtype,N> &o){
|
||||
stream<< "V<"<<N<<">{";
|
||||
for(int i=0;i<N;i++) {
|
||||
stream<<o._internal[i];
|
||||
if (i<N-1) stream<<",";
|
||||
}
|
||||
stream<<"}";
|
||||
return stream;
|
||||
};
|
||||
// strong_inline vtype && operator ()(int i) {
|
||||
// return _internal[i];
|
||||
// }
|
||||
stream<<"}";
|
||||
return stream;
|
||||
};
|
||||
// strong_inline vtype && operator ()(int i) {
|
||||
// return _internal[i];
|
||||
// }
|
||||
};
|
||||
|
||||
template<class vtype,int N> class iMatrix
|
||||
@ -209,8 +212,6 @@ public:
|
||||
|
||||
iMatrix(const Zero &z){ *this = zero; };
|
||||
iMatrix() =default;
|
||||
|
||||
// No copy constructor...
|
||||
|
||||
iMatrix& operator=(const iMatrix& rhs){
|
||||
for(int i=0;i<N;i++)
|
||||
@ -221,6 +222,17 @@ public:
|
||||
|
||||
|
||||
|
||||
iMatrix(scalar_type s) { (*this) = s ;};// recurse down and hit the constructor for vector_type
|
||||
|
||||
/*
|
||||
iMatrix(const iMatrix<vtype,N> ©me)=default;
|
||||
iMatrix(iMatrix<vtype,N> &©me)=default;
|
||||
iMatrix<vtype,N> & operator= (const iMatrix<vtype,N> ©me) = default;
|
||||
iMatrix<vtype,N> & operator= (iMatrix<vtype,N> &©me) = default;
|
||||
*/
|
||||
|
||||
|
||||
|
||||
iMatrix<vtype,N> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
|
@ -10,9 +10,6 @@ namespace QCD {
|
||||
static const int Nhs=2; // half spinor
|
||||
static const int Nds=8; // double stored gauge field
|
||||
|
||||
static const int CbRed =0;
|
||||
static const int CbBlack=1;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// QCD iMatrix types
|
||||
// Index conventions: Lorentz x Spin x Colour
|
||||
|
@ -4,8 +4,8 @@
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0});
|
||||
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0});
|
||||
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3});
|
||||
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1});
|
||||
|
||||
// Should be in header?
|
||||
const int WilsonMatrix::Xp = 0;
|
||||
@ -16,7 +16,6 @@ const int WilsonMatrix::Xm = 4;
|
||||
const int WilsonMatrix::Ym = 5;
|
||||
const int WilsonMatrix::Zm = 6;
|
||||
const int WilsonMatrix::Tm = 7;
|
||||
//const int WilsonMatrix::X0 = 8;
|
||||
|
||||
class WilsonCompressor {
|
||||
public:
|
||||
@ -72,20 +71,27 @@ const int WilsonMatrix::Tm = 7;
|
||||
}
|
||||
};
|
||||
|
||||
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,double _mass)
|
||||
: Stencil(Umu._grid,npoint,0,directions,displacements),
|
||||
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid, double _mass) :
|
||||
CheckerBoardedSparseMatrixBase<LatticeFermion>(&Fgrid,&Hgrid),
|
||||
Stencil ( _grid,npoint,Even,directions,displacements),
|
||||
StencilEven(_cbgrid,npoint,Even,directions,displacements), // source is Even
|
||||
StencilOdd (_cbgrid,npoint,Odd ,directions,displacements), // source is Odd
|
||||
mass(_mass),
|
||||
Umu(_Umu._grid)
|
||||
{
|
||||
// Allocate the required comms buffer
|
||||
grid = _Umu._grid;
|
||||
comm_buf.resize(Stencil._unified_buffer_size);
|
||||
DoubleStore(Umu,_Umu);
|
||||
}
|
||||
Umu(_grid),
|
||||
UmuEven(_cbgrid),
|
||||
UmuOdd (_cbgrid)
|
||||
{
|
||||
// Allocate the required comms buffer
|
||||
comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO
|
||||
|
||||
DoubleStore(Umu,_Umu);
|
||||
pickCheckerboard(Even,UmuEven,Umu);
|
||||
pickCheckerboard(Odd ,UmuOdd,Umu);
|
||||
}
|
||||
|
||||
void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
|
||||
{
|
||||
LatticeColourMatrix U(grid);
|
||||
LatticeColourMatrix U(_grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U = peekIndex<LorentzIndex>(Umu,mu);
|
||||
@ -97,48 +103,63 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
|
||||
|
||||
RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Dhop(in,out,0);
|
||||
out.checkerboard=in.checkerboard;
|
||||
Dhop(in,out,DaggerNo);
|
||||
out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
|
||||
return norm2(out);
|
||||
}
|
||||
RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Dhop(in,out,1);
|
||||
out.checkerboard=in.checkerboard;
|
||||
Dhop(in,out,DaggerYes);
|
||||
out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Dhop(in,out,0);
|
||||
out = 0.5*out; // FIXME : scale factor in Dhop
|
||||
if ( in.checkerboard == Odd ) {
|
||||
DhopEO(in,out,DaggerNo);
|
||||
} else {
|
||||
DhopOE(in,out,DaggerNo);
|
||||
}
|
||||
out = (-0.5)*out; // FIXME : scale factor in Dhop
|
||||
}
|
||||
void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Dhop(in,out,1);
|
||||
if ( in.checkerboard == Odd ) {
|
||||
DhopEO(in,out,DaggerYes);
|
||||
} else {
|
||||
DhopOE(in,out,DaggerYes);
|
||||
}
|
||||
out = (-0.5)*out; // FIXME : scale factor in Dhop
|
||||
}
|
||||
void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
out.checkerboard = in.checkerboard;
|
||||
out = (4.0+mass)*in;
|
||||
return ;
|
||||
}
|
||||
void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
out = (1.0/(4.0+mass))*in;
|
||||
return ;
|
||||
}
|
||||
void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
out.checkerboard = in.checkerboard;
|
||||
Mooee(in,out);
|
||||
}
|
||||
void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
out.checkerboard = in.checkerboard;
|
||||
out = (1.0/(4.0+mass))*in;
|
||||
return ;
|
||||
}
|
||||
void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
out = (1.0/(4.0+mass))*in;
|
||||
return ;
|
||||
out.checkerboard = in.checkerboard;
|
||||
MooeeInv(in,out);
|
||||
}
|
||||
|
||||
void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out)
|
||||
void WilsonMatrix::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
|
||||
int ss,const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
vHalfSpinColourVector tmp;
|
||||
vHalfSpinColourVector chi;
|
||||
@ -146,79 +167,78 @@ void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out
|
||||
vHalfSpinColourVector Uchi;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
// int ss = Stencil._LebesgueReorder[sss];
|
||||
int ssu= ss;
|
||||
//#define VERBOSE( A) if ( ss<10 ) { std::cout << "site " <<ss << " " #A " neigh " << offset << " perm "<< perm <<std::endl;}
|
||||
|
||||
// Xp
|
||||
offset = Stencil._offsets [Xp][ss];
|
||||
local = Stencil._is_local[Xp][ss];
|
||||
perm = Stencil._permute[Xp][ss];
|
||||
offset = st._offsets [Xp][ss];
|
||||
local = st._is_local[Xp][ss];
|
||||
perm = st._permute[Xp][ss];
|
||||
|
||||
ptype = Stencil._permute_type[Xp];
|
||||
ptype = st._permute_type[Xp];
|
||||
if ( local && perm ) {
|
||||
spProjXp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjXp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Xp),&chi());
|
||||
spReconXp(result,Uchi);
|
||||
|
||||
// Yp
|
||||
offset = Stencil._offsets [Yp][ss];
|
||||
local = Stencil._is_local[Yp][ss];
|
||||
perm = Stencil._permute[Yp][ss];
|
||||
ptype = Stencil._permute_type[Yp];
|
||||
offset = st._offsets [Yp][ss];
|
||||
local = st._is_local[Yp][ss];
|
||||
perm = st._permute[Yp][ss];
|
||||
ptype = st._permute_type[Yp];
|
||||
if ( local && perm ) {
|
||||
spProjYp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjYp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Yp),&chi());
|
||||
accumReconYp(result,Uchi);
|
||||
|
||||
// Zp
|
||||
offset = Stencil._offsets [Zp][ss];
|
||||
local = Stencil._is_local[Zp][ss];
|
||||
perm = Stencil._permute[Zp][ss];
|
||||
ptype = Stencil._permute_type[Zp];
|
||||
offset = st._offsets [Zp][ss];
|
||||
local = st._is_local[Zp][ss];
|
||||
perm = st._permute[Zp][ss];
|
||||
ptype = st._permute_type[Zp];
|
||||
if ( local && perm ) {
|
||||
spProjZp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjZp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Zp),&chi());
|
||||
accumReconZp(result,Uchi);
|
||||
|
||||
// Tp
|
||||
offset = Stencil._offsets [Tp][ss];
|
||||
local = Stencil._is_local[Tp][ss];
|
||||
perm = Stencil._permute[Tp][ss];
|
||||
ptype = Stencil._permute_type[Tp];
|
||||
offset = st._offsets [Tp][ss];
|
||||
local = st._is_local[Tp][ss];
|
||||
perm = st._permute[Tp][ss];
|
||||
ptype = st._permute_type[Tp];
|
||||
if ( local && perm ) {
|
||||
spProjTp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjTp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Tp),&chi());
|
||||
accumReconTp(result,Uchi);
|
||||
|
||||
// Xm
|
||||
offset = Stencil._offsets [Xm][ss];
|
||||
local = Stencil._is_local[Xm][ss];
|
||||
perm = Stencil._permute[Xm][ss];
|
||||
ptype = Stencil._permute_type[Xm];
|
||||
offset = st._offsets [Xm][ss];
|
||||
local = st._is_local[Xm][ss];
|
||||
perm = st._permute[Xm][ss];
|
||||
ptype = st._permute_type[Xm];
|
||||
|
||||
if ( local && perm )
|
||||
{
|
||||
@ -227,17 +247,17 @@ void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out
|
||||
} else if ( local ) {
|
||||
spProjXm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Xm),&chi());
|
||||
accumReconXm(result,Uchi);
|
||||
|
||||
|
||||
// Ym
|
||||
offset = Stencil._offsets [Ym][ss];
|
||||
local = Stencil._is_local[Ym][ss];
|
||||
perm = Stencil._permute[Ym][ss];
|
||||
ptype = Stencil._permute_type[Ym];
|
||||
offset = st._offsets [Ym][ss];
|
||||
local = st._is_local[Ym][ss];
|
||||
perm = st._permute[Ym][ss];
|
||||
ptype = st._permute_type[Ym];
|
||||
|
||||
if ( local && perm ) {
|
||||
spProjYm(tmp,in._odata[offset]);
|
||||
@ -245,46 +265,49 @@ void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out
|
||||
} else if ( local ) {
|
||||
spProjYm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Ym),&chi());
|
||||
accumReconYm(result,Uchi);
|
||||
|
||||
// Zm
|
||||
offset = Stencil._offsets [Zm][ss];
|
||||
local = Stencil._is_local[Zm][ss];
|
||||
perm = Stencil._permute[Zm][ss];
|
||||
ptype = Stencil._permute_type[Zm];
|
||||
offset = st._offsets [Zm][ss];
|
||||
local = st._is_local[Zm][ss];
|
||||
perm = st._permute[Zm][ss];
|
||||
ptype = st._permute_type[Zm];
|
||||
if ( local && perm ) {
|
||||
spProjZm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjZm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Zm),&chi());
|
||||
accumReconZm(result,Uchi);
|
||||
|
||||
// Tm
|
||||
offset = Stencil._offsets [Tm][ss];
|
||||
local = Stencil._is_local[Tm][ss];
|
||||
perm = Stencil._permute[Tm][ss];
|
||||
ptype = Stencil._permute_type[Tm];
|
||||
offset = st._offsets [Tm][ss];
|
||||
local = st._is_local[Tm][ss];
|
||||
perm = st._permute[Tm][ss];
|
||||
ptype = st._permute_type[Tm];
|
||||
if ( local && perm ) {
|
||||
spProjTm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjTm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Tm),&chi());
|
||||
accumReconTm(result,Uchi);
|
||||
|
||||
vstream(out._odata[ss],result);
|
||||
}
|
||||
void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &out)
|
||||
|
||||
void WilsonMatrix::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
|
||||
int ss,const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
vHalfSpinColourVector tmp;
|
||||
vHalfSpinColourVector chi;
|
||||
@ -292,78 +315,76 @@ void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &
|
||||
vHalfSpinColourVector Uchi;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
int ssu= ss;
|
||||
|
||||
// Xp
|
||||
offset = Stencil._offsets [Xm][ss];
|
||||
local = Stencil._is_local[Xm][ss];
|
||||
perm = Stencil._permute[Xm][ss];
|
||||
offset = st._offsets [Xm][ss];
|
||||
local = st._is_local[Xm][ss];
|
||||
perm = st._permute[Xm][ss];
|
||||
|
||||
ptype = Stencil._permute_type[Xm];
|
||||
ptype = st._permute_type[Xm];
|
||||
if ( local && perm ) {
|
||||
spProjXp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjXp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Xm),&chi());
|
||||
spReconXp(result,Uchi);
|
||||
|
||||
// Yp
|
||||
offset = Stencil._offsets [Ym][ss];
|
||||
local = Stencil._is_local[Ym][ss];
|
||||
perm = Stencil._permute[Ym][ss];
|
||||
ptype = Stencil._permute_type[Ym];
|
||||
offset = st._offsets [Ym][ss];
|
||||
local = st._is_local[Ym][ss];
|
||||
perm = st._permute[Ym][ss];
|
||||
ptype = st._permute_type[Ym];
|
||||
if ( local && perm ) {
|
||||
spProjYp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjYp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Ym),&chi());
|
||||
accumReconYp(result,Uchi);
|
||||
|
||||
// Zp
|
||||
offset = Stencil._offsets [Zm][ss];
|
||||
local = Stencil._is_local[Zm][ss];
|
||||
perm = Stencil._permute[Zm][ss];
|
||||
ptype = Stencil._permute_type[Zm];
|
||||
offset = st._offsets [Zm][ss];
|
||||
local = st._is_local[Zm][ss];
|
||||
perm = st._permute[Zm][ss];
|
||||
ptype = st._permute_type[Zm];
|
||||
if ( local && perm ) {
|
||||
spProjZp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjZp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Zm),&chi());
|
||||
accumReconZp(result,Uchi);
|
||||
|
||||
// Tp
|
||||
offset = Stencil._offsets [Tm][ss];
|
||||
local = Stencil._is_local[Tm][ss];
|
||||
perm = Stencil._permute[Tm][ss];
|
||||
ptype = Stencil._permute_type[Tm];
|
||||
offset = st._offsets [Tm][ss];
|
||||
local = st._is_local[Tm][ss];
|
||||
perm = st._permute[Tm][ss];
|
||||
ptype = st._permute_type[Tm];
|
||||
if ( local && perm ) {
|
||||
spProjTp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjTp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Tm),&chi());
|
||||
accumReconTp(result,Uchi);
|
||||
|
||||
// Xm
|
||||
offset = Stencil._offsets [Xp][ss];
|
||||
local = Stencil._is_local[Xp][ss];
|
||||
perm = Stencil._permute[Xp][ss];
|
||||
ptype = Stencil._permute_type[Xp];
|
||||
offset = st._offsets [Xp][ss];
|
||||
local = st._is_local[Xp][ss];
|
||||
perm = st._permute[Xp][ss];
|
||||
ptype = st._permute_type[Xp];
|
||||
|
||||
if ( local && perm )
|
||||
{
|
||||
@ -372,17 +393,16 @@ void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &
|
||||
} else if ( local ) {
|
||||
spProjXm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Xp),&chi());
|
||||
accumReconXm(result,Uchi);
|
||||
|
||||
|
||||
// Ym
|
||||
offset = Stencil._offsets [Yp][ss];
|
||||
local = Stencil._is_local[Yp][ss];
|
||||
perm = Stencil._permute[Yp][ss];
|
||||
ptype = Stencil._permute_type[Yp];
|
||||
offset = st._offsets [Yp][ss];
|
||||
local = st._is_local[Yp][ss];
|
||||
perm = st._permute[Yp][ss];
|
||||
ptype = st._permute_type[Yp];
|
||||
|
||||
if ( local && perm ) {
|
||||
spProjYm(tmp,in._odata[offset]);
|
||||
@ -390,69 +410,97 @@ void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &
|
||||
} else if ( local ) {
|
||||
spProjYm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Yp),&chi());
|
||||
accumReconYm(result,Uchi);
|
||||
|
||||
// Zm
|
||||
offset = Stencil._offsets [Zp][ss];
|
||||
local = Stencil._is_local[Zp][ss];
|
||||
perm = Stencil._permute[Zp][ss];
|
||||
ptype = Stencil._permute_type[Zp];
|
||||
offset = st._offsets [Zp][ss];
|
||||
local = st._is_local[Zp][ss];
|
||||
perm = st._permute[Zp][ss];
|
||||
ptype = st._permute_type[Zp];
|
||||
if ( local && perm ) {
|
||||
spProjZm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjZm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Zp),&chi());
|
||||
accumReconZm(result,Uchi);
|
||||
|
||||
// Tm
|
||||
offset = Stencil._offsets [Tp][ss];
|
||||
local = Stencil._is_local[Tp][ss];
|
||||
perm = Stencil._permute[Tp][ss];
|
||||
ptype = Stencil._permute_type[Tp];
|
||||
offset = st._offsets [Tp][ss];
|
||||
local = st._is_local[Tp][ss];
|
||||
perm = st._permute[Tp][ss];
|
||||
ptype = st._permute_type[Tp];
|
||||
if ( local && perm ) {
|
||||
spProjTm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjTm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=comm_buf[offset];
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
|
||||
mult(&Uchi(),&U._odata[ss](Tp),&chi());
|
||||
accumReconTm(result,Uchi);
|
||||
|
||||
vstream(out._odata[ss],result);
|
||||
}
|
||||
|
||||
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
|
||||
void WilsonMatrix::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U,
|
||||
const LatticeFermion &in, LatticeFermion &out,int dag)
|
||||
{
|
||||
assert((dag==0) ||(dag==1));
|
||||
|
||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
WilsonCompressor compressor(dag);
|
||||
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
||||
|
||||
if ( dag ) {
|
||||
st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
||||
|
||||
if ( dag == DaggerYes ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<grid->oSites();sss++){
|
||||
DhopSiteDag(sss,in,out);
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
DhopSiteDag(st,U,comm_buf,sss,in,out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<grid->oSites();sss++){
|
||||
DhopSite(sss,in,out);
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
DhopSite(st,U,comm_buf,sss,in,out);
|
||||
}
|
||||
}
|
||||
}
|
||||
void WilsonMatrix::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
|
||||
{
|
||||
conformable(in._grid,_cbgrid); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
assert(in.checkerboard==Even);
|
||||
out.checkerboard = Odd;
|
||||
|
||||
DhopInternal(StencilEven,UmuOdd,in,out,dag);
|
||||
}
|
||||
void WilsonMatrix::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
|
||||
{
|
||||
conformable(in._grid,_cbgrid); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
assert(in.checkerboard==Odd);
|
||||
out.checkerboard = Even;
|
||||
|
||||
DhopInternal(StencilOdd,UmuEven,in,out,dag);
|
||||
}
|
||||
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
|
||||
{
|
||||
conformable(in._grid,_grid); // verifies full grid
|
||||
conformable(in._grid,out._grid);
|
||||
|
||||
out.checkerboard = in.checkerboard;
|
||||
|
||||
DhopInternal(Stencil,Umu,in,out,dag);
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
|
||||
|
||||
|
@ -11,14 +11,20 @@ namespace Grid {
|
||||
//NB r=1;
|
||||
public:
|
||||
double mass;
|
||||
GridBase *grid;
|
||||
// GridBase * grid; // Inherited
|
||||
// GridBase * cbgrid;
|
||||
|
||||
// Copy of the gauge field
|
||||
LatticeDoubledGaugeField Umu;
|
||||
//Defines the stencils for even and odd
|
||||
CartesianStencil Stencil;
|
||||
CartesianStencil StencilEven;
|
||||
CartesianStencil StencilOdd;
|
||||
|
||||
//Defines the stencil
|
||||
CartesianStencil Stencil;
|
||||
static const int npoint=9;
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
LatticeDoubledGaugeField Umu;
|
||||
LatticeDoubledGaugeField UmuEven;
|
||||
LatticeDoubledGaugeField UmuOdd;
|
||||
|
||||
static const int npoint=8;
|
||||
static const std::vector<int> directions ;
|
||||
static const std::vector<int> displacements;
|
||||
static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
|
||||
@ -27,7 +33,7 @@ namespace Grid {
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > comm_buf;
|
||||
|
||||
// Constructor
|
||||
WilsonMatrix(LatticeGaugeField &Umu,double mass);
|
||||
WilsonMatrix(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,double _mass);
|
||||
|
||||
// DoubleStore
|
||||
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
|
||||
@ -45,9 +51,19 @@ namespace Grid {
|
||||
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
|
||||
|
||||
// non-hermitian hopping term; half cb or both
|
||||
void Dhop(const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
void DhopSite (int ss,const LatticeFermion &in, LatticeFermion &out);
|
||||
void DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &out);
|
||||
void Dhop (const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
void DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField &U,
|
||||
const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
// These ones will need to be package intelligently. WilsonType base class
|
||||
// for use by DWF etc..
|
||||
void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
|
||||
int ss,const LatticeFermion &in, LatticeFermion &out);
|
||||
void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
|
||||
int ss,const LatticeFermion &in, LatticeFermion &out);
|
||||
|
||||
typedef iScalar<iMatrix<vComplex, Nc> > matrix;
|
||||
|
||||
|
@ -283,6 +283,7 @@ namespace Optimization {
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
namespace Grid {
|
||||
|
||||
typedef __m128 SIMD_Ftype; // Single precision type
|
||||
typedef __m128d SIMD_Dtype; // Double precision type
|
||||
typedef __m128i SIMD_Itype; // Integer type
|
||||
|
@ -2,7 +2,7 @@
|
||||
/*! @file Grid_vector_types.h
|
||||
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||
*/
|
||||
// Time-stamp: <2015-05-22 17:08:19 neo>
|
||||
// Time-stamp: <2015-05-26 12:05:39 neo>
|
||||
//---------------------------------------------------------------------------
|
||||
#ifndef GRID_VECTOR_TYPES
|
||||
#define GRID_VECTOR_TYPES
|
||||
@ -21,31 +21,24 @@
|
||||
namespace Grid {
|
||||
|
||||
// To take the floating point type of real/complex type
|
||||
template <typename T>
|
||||
struct RealPart {
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T>
|
||||
struct RealPart< std::complex<T> >{
|
||||
template <typename T> struct RealPart {
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct RealPart< std::complex<T> >{
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
// type alias used to simplify the syntax of std::enable_if
|
||||
template <typename T> using Invoke =
|
||||
typename T::type;
|
||||
template <typename Condition, typename ReturnType> using EnableIf =
|
||||
Invoke<std::enable_if<Condition::value, ReturnType>>;
|
||||
template <typename Condition, typename ReturnType> using NotEnableIf =
|
||||
Invoke<std::enable_if<!Condition::value, ReturnType>>;
|
||||
template <typename T> using Invoke = typename T::type;
|
||||
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType>>;
|
||||
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType>>;
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Check for complexity with type traits
|
||||
template <typename T>
|
||||
struct is_complex : std::false_type {};
|
||||
template < typename T >
|
||||
struct is_complex< std::complex<T> >: std::true_type {};
|
||||
template <typename T> struct is_complex : std::false_type {};
|
||||
template < typename T > struct is_complex< std::complex<T> >: std::true_type {};
|
||||
////////////////////////////////////////////////////////
|
||||
// Define the operation templates functors
|
||||
// general forms to allow for vsplat syntax
|
||||
@ -102,8 +95,6 @@ namespace Grid {
|
||||
Grid_simd(const Real a){
|
||||
vsplat(*this,Scalar_type(a));
|
||||
};
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// mac, mult, sub, add, adj
|
||||
@ -145,10 +136,6 @@ namespace Grid {
|
||||
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
|
||||
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||
friend inline void vfalse(Grid_simd &ret){vsplat(ret,0);}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
@ -184,7 +171,6 @@ namespace Grid {
|
||||
ret.v = binary<Vector_type>(a.v,b.v, MultSIMD());
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// FIXME: gonna remove these load/store, get, set, prefetch
|
||||
|
@ -345,20 +345,30 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
||||
// REDUCE FIXME must be a cleaner implementation
|
||||
friend inline ComplexD Reduce(const vComplexD & in)
|
||||
{
|
||||
vComplexD v1,v2;
|
||||
union {
|
||||
zvec v;
|
||||
double f[sizeof(zvec)/sizeof(double)];
|
||||
} conv;
|
||||
|
||||
#ifdef SSE4
|
||||
return ComplexD(in.v[0],in.v[1]);
|
||||
v1=in;
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
vComplexD v1;
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
return ComplexD(v1.v[0],v1.v[1]);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1); // avx 256; quad complex single
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
conv.v = v1.v;
|
||||
return ComplexD(conv.f[0],conv.f[1]);
|
||||
}
|
||||
|
||||
// Unary negation
|
||||
|
@ -234,26 +234,34 @@ namespace Grid {
|
||||
}
|
||||
friend inline ComplexF Reduce(const vComplexF & in)
|
||||
{
|
||||
vComplexF v1,v2;
|
||||
union {
|
||||
cvec v;
|
||||
float f[sizeof(cvec)/sizeof(float)];
|
||||
} conv;
|
||||
#ifdef SSE4
|
||||
vComplexF v1;
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
return ComplexF(v1.v[0],v1.v[1]);
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
vComplexF v1,v2;
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1); // avx 256; quad complex single
|
||||
v1=v1+v2;
|
||||
return ComplexF(v1.v[0],v1.v[1]);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
|
||||
permute(v1,in,0); // avx512 octo-complex single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
conv.v = v1.v;
|
||||
return ComplexF(conv.f[0],conv.f[1]);
|
||||
}
|
||||
|
||||
friend inline vComplexF operator * (const ComplexF &a, vComplexF b){
|
||||
|
@ -210,25 +210,33 @@ namespace Grid {
|
||||
|
||||
friend inline RealD Reduce(const vRealD & in)
|
||||
{
|
||||
vRealD v1,v2;
|
||||
union {
|
||||
dvec v;
|
||||
double f[sizeof(dvec)/sizeof(double)];
|
||||
} conv;
|
||||
#ifdef SSE4
|
||||
vRealD v1;
|
||||
permute(v1,in,0); // sse 128; paired real double
|
||||
v1=v1+in;
|
||||
return RealD(v1.v[0]);
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
vRealD v1,v2;
|
||||
permute(v1,in,0); // avx 256; quad double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
return v1.v[0];
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
return _mm512_reduce_add_pd(in.v);
|
||||
permute(v1,in,0); // avx 512; octo-double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#endif
|
||||
conv.v=v1.v;
|
||||
return conv.f[0];
|
||||
}
|
||||
|
||||
// *=,+=,-= operators
|
||||
|
@ -243,29 +243,39 @@ friend inline void vstore(const vRealF &ret, float *a){
|
||||
}
|
||||
friend inline RealF Reduce(const vRealF & in)
|
||||
{
|
||||
#ifdef SSE4
|
||||
vRealF v1,v2;
|
||||
union {
|
||||
fvec v;
|
||||
float f[sizeof(fvec)/sizeof(double)];
|
||||
} conv;
|
||||
#ifdef SSE4
|
||||
permute(v1,in,0); // sse 128; quad single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
return v1.v[0];
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
vRealF v1,v2;
|
||||
permute(v1,in,0); // avx 256; octo-double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
return v1.v[0];
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
return _mm512_reduce_add_ps(in.v);
|
||||
permute(v1,in,0); // avx 256; octo-double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,3);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#endif
|
||||
conv.v=v1.v;
|
||||
return conv.f[0];
|
||||
}
|
||||
|
||||
// *=,+=,-= operators
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx clang-sse"
|
||||
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx clang-sse icpc-avx-openmp-mpi icpc-avx-openmp"
|
||||
|
||||
for D in $DIRS
|
||||
do
|
||||
|
@ -25,6 +25,12 @@ g++-avx)
|
||||
icpc-avx)
|
||||
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
;;
|
||||
icpc-avx-openmp-mpi)
|
||||
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
||||
;;
|
||||
icpc-avx-openmp)
|
||||
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LIBS="-fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
||||
;;
|
||||
icpc-avx2)
|
||||
CXX=icpc ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
;;
|
||||
|
@ -1,8 +1,9 @@
|
||||
#include "Grid.h"
|
||||
|
||||
//DEBUG
|
||||
#ifdef SSE4
|
||||
#include "simd/Grid_vector_types.h"
|
||||
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
@ -13,7 +13,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
std::vector<int> latt_size ({16,16,16,32});
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> clatt_size ({4,4,4,8});
|
||||
int orthodir=3;
|
||||
int orthosz =latt_size[orthodir];
|
||||
@ -44,13 +44,15 @@ int main (int argc, char ** argv)
|
||||
// (1+2+3)=6 = N(N-1)/2 terms
|
||||
LatticeComplex Plaq(&Fine);
|
||||
LatticeComplex cPlaq(&Coarse);
|
||||
|
||||
Plaq = zero;
|
||||
#if 1
|
||||
for(int mu=1;mu<Nd;mu++){
|
||||
for(int nu=0;nu<mu;nu++){
|
||||
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
double vol = Fine.gSites();
|
||||
Complex PlaqScale(1.0/vol/6.0/3.0);
|
||||
std::cout <<"PlaqScale" << PlaqScale<<std::endl;
|
||||
|
0
tests/InvSqrt.gnu
Normal file
0
tests/InvSqrt.gnu
Normal file
2
tests/Sqrt.gnu
Normal file
2
tests/Sqrt.gnu
Normal file
@ -0,0 +1,2 @@
|
||||
f(x) = 6.81384+(-2.34645e-06/(x+0.000228091))+(-1.51593e-05/(x+0.00112084))+(-6.89254e-05/(x+0.003496))+(-0.000288983/(x+0.00954309))+(-0.00119277/(x+0.024928))+(-0.0050183/(x+0.0646627))+(-0.0226449/(x+0.171576))+(-0.123767/(x+0.491792))+(-1.1705/(x+1.78667))+(-102.992/(x+18.4866));
|
||||
f(x) = 0.14676+(0.00952992/(x+5.40933e-05))+(0.0115952/(x+0.000559699))+(0.0161824/(x+0.00203338))+(0.0243252/(x+0.00582831))+(0.0379533/(x+0.0154649))+(0.060699/(x+0.0401156))+(0.100345/(x+0.104788))+(0.178335/(x+0.286042))+(0.381586/(x+0.892189))+(1.42625/(x+4.38422));
|
Loading…
Reference in New Issue
Block a user