diff --git a/Makefile.am b/Makefile.am index 5bb858eb..fc3f6a0a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,11 +1,3 @@ # additional include paths necessary to compile the C++ library AM_CXXFLAGS = -I$(top_srcdir)/ -SUBDIRS = lib tests benchmarks docs - - -if DOXYGEN_DOC -directory = $(top_srcdir)/docs/ - -doxyfile: - (cd $(directory) && $(MAKE) $(AM_MAKEFLAGS) $@) || exit 1; -endif +SUBDIRS = lib tests benchmarks diff --git a/Makefile.in b/Makefile.in index 8c277dae..cc3284b3 100644 --- a/Makefile.in +++ b/Makefile.in @@ -304,8 +304,7 @@ top_srcdir = @top_srcdir@ # additional include paths necessary to compile the C++ library AM_CXXFLAGS = -I$(top_srcdir)/ -SUBDIRS = lib tests benchmarks docs -@DOXYGEN_DOC_TRUE@directory = $(top_srcdir)/docs/ +SUBDIRS = lib tests benchmarks all: all-recursive .SUFFIXES: @@ -762,9 +761,6 @@ uninstall-am: pdf-am ps ps-am tags tags-am uninstall uninstall-am -@DOXYGEN_DOC_TRUE@doxyfile: -@DOXYGEN_DOC_TRUE@ (cd $(directory) && $(MAKE) $(AM_MAKEFLAGS) $@) || exit 1; - # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/benchmarks/Grid_wilson.cc b/benchmarks/Grid_wilson.cc index 32255b3e..3b0d04bc 100644 --- a/benchmarks/Grid_wilson.cc +++ b/benchmarks/Grid_wilson.cc @@ -31,11 +31,9 @@ int main (int argc, char ** argv) std::cout << "Grid is setup to use "< seeds({1,2,3,4}); - GridParallelRNG pRNG(&Grid); - // std::vector seeds({1,2,3,4}); - // pRNG.SeedFixedIntegers(seeds); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedRandomDevice(); LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion result(&Grid); result=zero; @@ -55,8 +53,10 @@ int main (int argc, char ** argv) Complex cone(1.0,0.0); for(int nn=0;nn(Umu,U[nn],nn); } @@ -85,7 +85,7 @@ int main (int argc, char ** argv) WilsonMatrix Dw(Umu,Grid,RBGrid,mass); std::cout << "Calling Dw"< >, publ LatticeTrinaryExpression(const std::pair > &arg): std::pair >(arg) {}; }; +void inline conformable(GridBase *lhs,GridBase *rhs) +{ + assert(lhs == rhs); +} + template class Lattice : public LatticeBase { @@ -60,7 +65,8 @@ public: typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; typedef vobj vector_object; - + + //////////////////////////////////////////////////////////////////////////////// // Expression Template closure support //////////////////////////////////////////////////////////////////////////////// @@ -276,17 +282,15 @@ PARALLEL_FOR_LOOP } -#include +#include #define GRID_LATTICE_EXPRESSION_TEMPLATES #ifdef GRID_LATTICE_EXPRESSION_TEMPLATES #include #else #include #endif - #include - #include #include #include diff --git a/lib/lattice/Grid_lattice_conformable.h b/lib/lattice/Grid_lattice_conformable.h index faa8c7a7..a77e57af 100644 --- a/lib/lattice/Grid_lattice_conformable.h +++ b/lib/lattice/Grid_lattice_conformable.h @@ -3,16 +3,11 @@ namespace Grid { - template - void conformable(const Lattice &lhs,const Lattice &rhs) + template void conformable(const Lattice &lhs,const Lattice &rhs) { assert(lhs._grid == rhs._grid); assert(lhs.checkerboard == rhs.checkerboard); } - void inline conformable(const GridBase *lhs,GridBase *rhs) - { - assert(lhs == rhs); - } } #endif diff --git a/lib/qcd/Grid_qcd_dhop.cc b/lib/qcd/Grid_qcd_dhop.cc new file mode 100644 index 00000000..1e5dcd16 --- /dev/null +++ b/lib/qcd/Grid_qcd_dhop.cc @@ -0,0 +1,309 @@ +#include + +namespace Grid { +namespace QCD { + +void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out) +{ + vHalfSpinColourVector tmp; + vHalfSpinColourVector chi; + vSpinColourVector result; + vHalfSpinColourVector Uchi; + int offset,local,perm, ptype; + + //#define VERBOSE( A) if ( ss<10 ) { std::cout << "site " < > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out) +{ + vHalfSpinColourVector tmp; + vHalfSpinColourVector chi; + vSpinColourVector result; + vHalfSpinColourVector Uchi; + int offset,local,perm, ptype; + + // Xp + offset = st._offsets [Xm][ss]; + local = st._is_local[Xm][ss]; + perm = st._permute[Xm][ss]; + + ptype = st._permute_type[Xm]; + if ( local && perm ) { + spProjXp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjXp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Xm),&chi()); + spReconXp(result,Uchi); + + // Yp + offset = st._offsets [Ym][ss]; + local = st._is_local[Ym][ss]; + perm = st._permute[Ym][ss]; + ptype = st._permute_type[Ym]; + if ( local && perm ) { + spProjYp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjYp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Ym),&chi()); + accumReconYp(result,Uchi); + + // Zp + offset = st._offsets [Zm][ss]; + local = st._is_local[Zm][ss]; + perm = st._permute[Zm][ss]; + ptype = st._permute_type[Zm]; + if ( local && perm ) { + spProjZp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjZp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Zm),&chi()); + accumReconZp(result,Uchi); + + // Tp + offset = st._offsets [Tm][ss]; + local = st._is_local[Tm][ss]; + perm = st._permute[Tm][ss]; + ptype = st._permute_type[Tm]; + if ( local && perm ) { + spProjTp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjTp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Tm),&chi()); + accumReconTp(result,Uchi); + + // Xm + offset = st._offsets [Xp][ss]; + local = st._is_local[Xp][ss]; + perm = st._permute[Xp][ss]; + ptype = st._permute_type[Xp]; + + if ( local && perm ) + { + spProjXm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjXm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Xp),&chi()); + accumReconXm(result,Uchi); + + // Ym + offset = st._offsets [Yp][ss]; + local = st._is_local[Yp][ss]; + perm = st._permute[Yp][ss]; + ptype = st._permute_type[Yp]; + + if ( local && perm ) { + spProjYm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjYm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Yp),&chi()); + accumReconYm(result,Uchi); + + // Zm + offset = st._offsets [Zp][ss]; + local = st._is_local[Zp][ss]; + perm = st._permute[Zp][ss]; + ptype = st._permute_type[Zp]; + if ( local && perm ) { + spProjZm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjZm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Zp),&chi()); + accumReconZm(result,Uchi); + + // Tm + offset = st._offsets [Tp][ss]; + local = st._is_local[Tp][ss]; + perm = st._permute[Tp][ss]; + ptype = st._permute_type[Tp]; + if ( local && perm ) { + spProjTm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjTm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[ss](Tp),&chi()); + accumReconTm(result,Uchi); + + vstream(out._odata[ss],result); +} +}} diff --git a/lib/qcd/Grid_qcd_dhop_hand.cc b/lib/qcd/Grid_qcd_dhop_hand.cc new file mode 100644 index 00000000..f8d464fb --- /dev/null +++ b/lib/qcd/Grid_qcd_dhop_hand.cc @@ -0,0 +1,769 @@ +#include + +#define REGISTER + +#define LOAD_CHIMU \ + const vSpinColourVector & ref (in._odata[offset]); \ + Chimu_00=ref()(0)(0);\ + Chimu_01=ref()(0)(1);\ + Chimu_02=ref()(0)(2);\ + Chimu_10=ref()(1)(0);\ + Chimu_11=ref()(1)(1);\ + Chimu_12=ref()(1)(2);\ + Chimu_20=ref()(2)(0);\ + Chimu_21=ref()(2)(1);\ + Chimu_22=ref()(2)(2);\ + Chimu_30=ref()(3)(0);\ + Chimu_31=ref()(3)(1);\ + Chimu_32=ref()(3)(2); + +#define LOAD_CHI\ + const vHalfSpinColourVector &ref(buf[offset]); \ + Chi_00 = ref()(0)(0);\ + Chi_01 = ref()(0)(1);\ + Chi_02 = ref()(0)(2);\ + Chi_10 = ref()(1)(0);\ + Chi_11 = ref()(1)(1);\ + Chi_12 = ref()(1)(2); + +#define MULT_2SPIN(A)\ + auto & ref(U._odata[ss](A)); \ + U_00 = ref()(0,0);\ + U_10 = ref()(1,0);\ + U_20 = ref()(2,0);\ + U_01 = ref()(0,1);\ + U_11 = ref()(1,1); \ + U_21 = ref()(2,1);\ + UChi_00 = U_00*Chi_00;\ + UChi_10 = U_00*Chi_10;\ + UChi_01 = U_10*Chi_00;\ + UChi_11 = U_10*Chi_10;\ + UChi_02 = U_20*Chi_00;\ + UChi_12 = U_20*Chi_10;\ + UChi_00+= U_01*Chi_01;\ + UChi_10+= U_01*Chi_11;\ + UChi_01+= U_11*Chi_01;\ + UChi_11+= U_11*Chi_11;\ + UChi_02+= U_21*Chi_01;\ + UChi_12+= U_21*Chi_11;\ + U_00 = ref()(0,2);\ + U_10 = ref()(1,2);\ + U_20 = ref()(2,2);\ + UChi_00+= U_00*Chi_02;\ + UChi_10+= U_00*Chi_12;\ + UChi_01+= U_10*Chi_02;\ + UChi_11+= U_10*Chi_12;\ + UChi_02+= U_20*Chi_02;\ + UChi_12+= U_20*Chi_12; + +#define PERMUTE\ + permute(Chi_00,Chi_00,ptype);\ + permute(Chi_01,Chi_01,ptype);\ + permute(Chi_02,Chi_02,ptype);\ + permute(Chi_10,Chi_10,ptype);\ + permute(Chi_11,Chi_11,ptype);\ + permute(Chi_12,Chi_12,ptype); + +// hspin(0)=fspin(0)+timesI(fspin(3)); +// hspin(1)=fspin(1)+timesI(fspin(2)); +#define XP_PROJ \ + Chi_00 = Chimu_00+timesI(Chimu_30);\ + Chi_01 = Chimu_01+timesI(Chimu_31);\ + Chi_02 = Chimu_02+timesI(Chimu_32);\ + Chi_10 = Chimu_10+timesI(Chimu_20);\ + Chi_11 = Chimu_11+timesI(Chimu_21);\ + Chi_12 = Chimu_12+timesI(Chimu_22); + +#define YP_PROJ \ + Chi_00 = Chimu_00-Chimu_30;\ + Chi_01 = Chimu_01-Chimu_31;\ + Chi_02 = Chimu_02-Chimu_32;\ + Chi_10 = Chimu_10+Chimu_20;\ + Chi_11 = Chimu_11+Chimu_21;\ + Chi_12 = Chimu_12+Chimu_22; + +#define ZP_PROJ \ + Chi_00 = Chimu_00+timesI(Chimu_20); \ + Chi_01 = Chimu_01+timesI(Chimu_21); \ + Chi_02 = Chimu_02+timesI(Chimu_22); \ + Chi_10 = Chimu_10-timesI(Chimu_30); \ + Chi_11 = Chimu_11-timesI(Chimu_31); \ + Chi_12 = Chimu_12-timesI(Chimu_32); + +#define TP_PROJ \ + Chi_00 = Chimu_00+Chimu_20; \ + Chi_01 = Chimu_01+Chimu_21; \ + Chi_02 = Chimu_02+Chimu_22; \ + Chi_10 = Chimu_10+Chimu_30; \ + Chi_11 = Chimu_11+Chimu_31; \ + Chi_12 = Chimu_12+Chimu_32; + + +// hspin(0)=fspin(0)-timesI(fspin(3)); +// hspin(1)=fspin(1)-timesI(fspin(2)); +#define XM_PROJ \ + Chi_00 = Chimu_00-timesI(Chimu_30);\ + Chi_01 = Chimu_01-timesI(Chimu_31);\ + Chi_02 = Chimu_02-timesI(Chimu_32);\ + Chi_10 = Chimu_10-timesI(Chimu_20);\ + Chi_11 = Chimu_11-timesI(Chimu_21);\ + Chi_12 = Chimu_12-timesI(Chimu_22); + +#define YM_PROJ \ + Chi_00 = Chimu_00+Chimu_30;\ + Chi_01 = Chimu_01+Chimu_31;\ + Chi_02 = Chimu_02+Chimu_32;\ + Chi_10 = Chimu_10-Chimu_20;\ + Chi_11 = Chimu_11-Chimu_21;\ + Chi_12 = Chimu_12-Chimu_22; + +#define ZM_PROJ \ + Chi_00 = Chimu_00-timesI(Chimu_20); \ + Chi_01 = Chimu_01-timesI(Chimu_21); \ + Chi_02 = Chimu_02-timesI(Chimu_22); \ + Chi_10 = Chimu_10+timesI(Chimu_30); \ + Chi_11 = Chimu_11+timesI(Chimu_31); \ + Chi_12 = Chimu_12+timesI(Chimu_32); + +#define TM_PROJ \ + Chi_00 = Chimu_00-Chimu_20; \ + Chi_01 = Chimu_01-Chimu_21; \ + Chi_02 = Chimu_02-Chimu_22; \ + Chi_10 = Chimu_10-Chimu_30; \ + Chi_11 = Chimu_11-Chimu_31; \ + Chi_12 = Chimu_12-Chimu_32; + +// fspin(0)=hspin(0); +// fspin(1)=hspin(1); +// fspin(2)=timesMinusI(hspin(1)); +// fspin(3)=timesMinusI(hspin(0)); +#define XP_RECON\ + result_00 = UChi_00;\ + result_01 = UChi_01;\ + result_02 = UChi_02;\ + result_10 = UChi_10;\ + result_11 = UChi_11;\ + result_12 = UChi_12;\ + result_20 = timesMinusI(UChi_10);\ + result_21 = timesMinusI(UChi_11);\ + result_22 = timesMinusI(UChi_12);\ + result_30 = timesMinusI(UChi_00);\ + result_31 = timesMinusI(UChi_01);\ + result_32 = timesMinusI(UChi_02); + +#define XP_RECON_ACCUM\ + result_00+=UChi_00;\ + result_01+=UChi_01;\ + result_02+=UChi_02;\ + result_10+=UChi_10;\ + result_11+=UChi_11;\ + result_12+=UChi_12;\ + result_20-=timesI(UChi_10);\ + result_21-=timesI(UChi_11);\ + result_22-=timesI(UChi_12);\ + result_30-=timesI(UChi_00);\ + result_31-=timesI(UChi_01);\ + result_32-=timesI(UChi_02); + +#define XM_RECON\ + result_00 = UChi_00;\ + result_01 = UChi_01;\ + result_02 = UChi_02;\ + result_10 = UChi_10;\ + result_11 = UChi_11;\ + result_12 = UChi_12;\ + result_20 = timesI(UChi_10);\ + result_21 = timesI(UChi_11);\ + result_22 = timesI(UChi_12);\ + result_30 = timesI(UChi_00);\ + result_31 = timesI(UChi_01);\ + result_32 = timesI(UChi_02); + +#define XM_RECON_ACCUM\ + result_00+= UChi_00;\ + result_01+= UChi_01;\ + result_02+= UChi_02;\ + result_10+= UChi_10;\ + result_11+= UChi_11;\ + result_12+= UChi_12;\ + result_20+= timesI(UChi_10);\ + result_21+= timesI(UChi_11);\ + result_22+= timesI(UChi_12);\ + result_30+= timesI(UChi_00);\ + result_31+= timesI(UChi_01);\ + result_32+= timesI(UChi_02); + +#define YP_RECON_ACCUM\ + result_00+= UChi_00;\ + result_01+= UChi_01;\ + result_02+= UChi_02;\ + result_10+= UChi_10;\ + result_11+= UChi_11;\ + result_12+= UChi_12;\ + result_20+= UChi_10;\ + result_21+= UChi_11;\ + result_22+= UChi_12;\ + result_30-= UChi_00;\ + result_31-= UChi_01;\ + result_32-= UChi_02; + +#define YM_RECON_ACCUM\ + result_00+= UChi_00;\ + result_01+= UChi_01;\ + result_02+= UChi_02;\ + result_10+= UChi_10;\ + result_11+= UChi_11;\ + result_12+= UChi_12;\ + result_20-= UChi_10;\ + result_21-= UChi_11;\ + result_22-= UChi_12;\ + result_30+= UChi_00;\ + result_31+= UChi_01;\ + result_32+= UChi_02; + +#define ZP_RECON_ACCUM\ + result_00+= UChi_00;\ + result_01+= UChi_01;\ + result_02+= UChi_02;\ + result_10+= UChi_10;\ + result_11+= UChi_11;\ + result_12+= UChi_12;\ + result_20-= timesI(UChi_00); \ + result_21-= timesI(UChi_01); \ + result_22-= timesI(UChi_02); \ + result_30+= timesI(UChi_10); \ + result_31+= timesI(UChi_11); \ + result_32+= timesI(UChi_12); + +#define ZM_RECON_ACCUM\ + result_00+= UChi_00;\ + result_01+= UChi_01;\ + result_02+= UChi_02;\ + result_10+= UChi_10;\ + result_11+= UChi_11;\ + result_12+= UChi_12;\ + result_20+= timesI(UChi_00); \ + result_21+= timesI(UChi_01); \ + result_22+= timesI(UChi_02); \ + result_30-= timesI(UChi_10); \ + result_31-= timesI(UChi_11); \ + result_32-= timesI(UChi_12); + +#define TP_RECON_ACCUM\ + result_00+= UChi_00;\ + result_01+= UChi_01;\ + result_02+= UChi_02;\ + result_10+= UChi_10;\ + result_11+= UChi_11;\ + result_12+= UChi_12;\ + result_20+= UChi_00; \ + result_21+= UChi_01; \ + result_22+= UChi_02; \ + result_30+= UChi_10; \ + result_31+= UChi_11; \ + result_32+= UChi_12; + +#define TM_RECON_ACCUM\ + result_00+= UChi_00;\ + result_01+= UChi_01;\ + result_02+= UChi_02;\ + result_10+= UChi_10;\ + result_11+= UChi_11;\ + result_12+= UChi_12;\ + result_20-= UChi_00; \ + result_21-= UChi_01; \ + result_22-= UChi_02; \ + result_30-= UChi_10; \ + result_31-= UChi_11; \ + result_32-= UChi_12; + +namespace Grid { +namespace QCD { + +void DiracOptHand::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out) +{ + REGISTER vComplex result_00; // 12 regs on knc + REGISTER vComplex result_01; + REGISTER vComplex result_02; + + REGISTER vComplex result_10; + REGISTER vComplex result_11; + REGISTER vComplex result_12; + + REGISTER vComplex result_20; + REGISTER vComplex result_21; + REGISTER vComplex result_22; + + REGISTER vComplex result_30; + REGISTER vComplex result_31; + REGISTER vComplex result_32; // 20 left + + REGISTER vComplex Chi_00; // two spinor; 6 regs + REGISTER vComplex Chi_01; + REGISTER vComplex Chi_02; + + REGISTER vComplex Chi_10; + REGISTER vComplex Chi_11; + REGISTER vComplex Chi_12; // 14 left + + REGISTER vComplex UChi_00; // two spinor; 6 regs + REGISTER vComplex UChi_01; + REGISTER vComplex UChi_02; + + REGISTER vComplex UChi_10; + REGISTER vComplex UChi_11; + REGISTER vComplex UChi_12; // 8 left + + REGISTER vComplex U_00; // two rows of U matrix + REGISTER vComplex U_10; + REGISTER vComplex U_20; + REGISTER vComplex U_01; + REGISTER vComplex U_11; + REGISTER vComplex U_21; // 2 reg left. + +#define Chimu_00 Chi_00 +#define Chimu_01 Chi_01 +#define Chimu_02 Chi_02 +#define Chimu_10 Chi_10 +#define Chimu_11 Chi_11 +#define Chimu_12 Chi_12 +#define Chimu_20 UChi_00 +#define Chimu_21 UChi_01 +#define Chimu_22 UChi_02 +#define Chimu_30 UChi_10 +#define Chimu_31 UChi_11 +#define Chimu_32 UChi_12 + + + int offset,local,perm, ptype; + + // Xp + offset = st._offsets [Xp][ss]; + local = st._is_local[Xp][ss]; + perm = st._permute[Xp][ss]; + ptype = st._permute_type[Xp]; + + if ( local ) { + LOAD_CHIMU; + XP_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + + { + MULT_2SPIN(Xp); + } + XP_RECON; + // std::cout << "XP_RECON"< > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out) +{ + REGISTER vComplex result_00; // 12 regs on knc + REGISTER vComplex result_01; + REGISTER vComplex result_02; + + REGISTER vComplex result_10; + REGISTER vComplex result_11; + REGISTER vComplex result_12; + + REGISTER vComplex result_20; + REGISTER vComplex result_21; + REGISTER vComplex result_22; + + REGISTER vComplex result_30; + REGISTER vComplex result_31; + REGISTER vComplex result_32; // 20 left + + REGISTER vComplex Chi_00; // two spinor; 6 regs + REGISTER vComplex Chi_01; + REGISTER vComplex Chi_02; + + REGISTER vComplex Chi_10; + REGISTER vComplex Chi_11; + REGISTER vComplex Chi_12; // 14 left + + REGISTER vComplex UChi_00; // two spinor; 6 regs + REGISTER vComplex UChi_01; + REGISTER vComplex UChi_02; + + REGISTER vComplex UChi_10; + REGISTER vComplex UChi_11; + REGISTER vComplex UChi_12; // 8 left + + REGISTER vComplex U_00; // two rows of U matrix + REGISTER vComplex U_10; + REGISTER vComplex U_20; + REGISTER vComplex U_01; + REGISTER vComplex U_11; + REGISTER vComplex U_21; // 2 reg left. + +#define Chimu_00 Chi_00 +#define Chimu_01 Chi_01 +#define Chimu_02 Chi_02 +#define Chimu_10 Chi_10 +#define Chimu_11 Chi_11 +#define Chimu_12 Chi_12 +#define Chimu_20 UChi_00 +#define Chimu_21 UChi_01 +#define Chimu_22 UChi_02 +#define Chimu_30 UChi_10 +#define Chimu_31 UChi_11 +#define Chimu_32 UChi_12 + + + int offset,local,perm, ptype; + + // Xp + offset = st._offsets [Xp][ss]; + local = st._is_local[Xp][ss]; + perm = st._permute[Xp][ss]; + ptype = st._permute_type[Xp]; + + if ( local ) { + LOAD_CHIMU; + XM_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Xp); + } + XM_RECON; + + // Yp + offset = st._offsets [Yp][ss]; + local = st._is_local[Yp][ss]; + perm = st._permute[Yp][ss]; + ptype = st._permute_type[Yp]; + + if ( local ) { + LOAD_CHIMU; + YM_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Yp); + } + YM_RECON_ACCUM; + + + // Zp + offset = st._offsets [Zp][ss]; + local = st._is_local[Zp][ss]; + perm = st._permute[Zp][ss]; + ptype = st._permute_type[Zp]; + + if ( local ) { + LOAD_CHIMU; + ZM_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Zp); + } + ZM_RECON_ACCUM; + + // Tp + offset = st._offsets [Tp][ss]; + local = st._is_local[Tp][ss]; + perm = st._permute[Tp][ss]; + ptype = st._permute_type[Tp]; + + if ( local ) { + LOAD_CHIMU; + TM_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Tp); + } + TM_RECON_ACCUM; + + // Xm + offset = st._offsets [Xm][ss]; + local = st._is_local[Xm][ss]; + perm = st._permute[Xm][ss]; + ptype = st._permute_type[Xm]; + + if ( local ) { + LOAD_CHIMU; + XP_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Xm); + } + XP_RECON_ACCUM; + + + // Ym + offset = st._offsets [Ym][ss]; + local = st._is_local[Ym][ss]; + perm = st._permute[Ym][ss]; + ptype = st._permute_type[Ym]; + + if ( local ) { + LOAD_CHIMU; + YP_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Ym); + } + YP_RECON_ACCUM; + + // Zm + offset = st._offsets [Zm][ss]; + local = st._is_local[Zm][ss]; + perm = st._permute[Zm][ss]; + ptype = st._permute_type[Zm]; + + if ( local ) { + LOAD_CHIMU; + ZP_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Zm); + } + ZP_RECON_ACCUM; + + // Tm + offset = st._offsets [Tm][ss]; + local = st._is_local[Tm][ss]; + perm = st._permute[Tm][ss]; + ptype = st._permute_type[Tm]; + + if ( local ) { + LOAD_CHIMU; + TP_PROJ; + if ( perm) { + PERMUTE; + } + } else { + LOAD_CHI; + } + { + MULT_2SPIN(Tm); + } + TP_RECON_ACCUM; + + { + vSpinColourVector & ref (out._odata[ss]); + vstream(ref()(0)(0),result_00); + vstream(ref()(0)(1),result_01); + vstream(ref()(0)(2),result_02); + vstream(ref()(1)(0),result_10); + vstream(ref()(1)(1),result_11); + vstream(ref()(1)(2),result_12); + vstream(ref()(2)(0),result_20); + vstream(ref()(2)(1),result_21); + vstream(ref()(2)(2),result_22); + vstream(ref()(3)(0),result_30); + vstream(ref()(3)(1),result_31); + vstream(ref()(3)(2),result_32); + } +} +}} diff --git a/lib/qcd/Grid_qcd_wilson_dop.cc b/lib/qcd/Grid_qcd_wilson_dop.cc index 318e18df..9a3f5f6a 100644 --- a/lib/qcd/Grid_qcd_wilson_dop.cc +++ b/lib/qcd/Grid_qcd_wilson_dop.cc @@ -1,4 +1,3 @@ - #include namespace Grid { @@ -7,15 +6,7 @@ namespace QCD { const std::vector WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3}); const std::vector WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1}); - // Should be in header? -const int WilsonMatrix::Xp = 0; -const int WilsonMatrix::Yp = 1; -const int WilsonMatrix::Zp = 2; -const int WilsonMatrix::Tp = 3; -const int WilsonMatrix::Xm = 4; -const int WilsonMatrix::Ym = 5; -const int WilsonMatrix::Zm = 6; -const int WilsonMatrix::Tm = 7; + int WilsonMatrix::HandOptDslash; class WilsonCompressor { public: @@ -39,28 +30,28 @@ const int WilsonMatrix::Tm = 7; mudag=(mu+Nd)%(2*Nd); } switch(mudag) { - case WilsonMatrix::Xp: + case Xp: spProjXp(ret,in); break; - case WilsonMatrix::Yp: + case Yp: spProjYp(ret,in); break; - case WilsonMatrix::Zp: + case Zp: spProjZp(ret,in); break; - case WilsonMatrix::Tp: + case Tp: spProjTp(ret,in); break; - case WilsonMatrix::Xm: + case Xm: spProjXm(ret,in); break; - case WilsonMatrix::Ym: + case Ym: spProjYm(ret,in); break; - case WilsonMatrix::Zm: + case Zm: spProjZm(ret,in); break; - case WilsonMatrix::Tm: + case Tm: spProjTm(ret,in); break; default: @@ -157,316 +148,36 @@ void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out) MooeeInv(in,out); } -void WilsonMatrix::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U, - std::vector > &buf, - int ss,const LatticeFermion &in, LatticeFermion &out) -{ - vHalfSpinColourVector tmp; - vHalfSpinColourVector chi; - vSpinColourVector result; - vHalfSpinColourVector Uchi; - int offset,local,perm, ptype; - - //#define VERBOSE( A) if ( ss<10 ) { std::cout << "site " < > &buf, - int ss,const LatticeFermion &in, LatticeFermion &out) -{ - vHalfSpinColourVector tmp; - vHalfSpinColourVector chi; - vSpinColourVector result; - vHalfSpinColourVector Uchi; - int offset,local,perm, ptype; - - // Xp - offset = st._offsets [Xm][ss]; - local = st._is_local[Xm][ss]; - perm = st._permute[Xm][ss]; - - ptype = st._permute_type[Xm]; - if ( local && perm ) { - spProjXp(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjXp(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Xm),&chi()); - spReconXp(result,Uchi); - - // Yp - offset = st._offsets [Ym][ss]; - local = st._is_local[Ym][ss]; - perm = st._permute[Ym][ss]; - ptype = st._permute_type[Ym]; - if ( local && perm ) { - spProjYp(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjYp(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Ym),&chi()); - accumReconYp(result,Uchi); - - // Zp - offset = st._offsets [Zm][ss]; - local = st._is_local[Zm][ss]; - perm = st._permute[Zm][ss]; - ptype = st._permute_type[Zm]; - if ( local && perm ) { - spProjZp(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjZp(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Zm),&chi()); - accumReconZp(result,Uchi); - - // Tp - offset = st._offsets [Tm][ss]; - local = st._is_local[Tm][ss]; - perm = st._permute[Tm][ss]; - ptype = st._permute_type[Tm]; - if ( local && perm ) { - spProjTp(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjTp(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Tm),&chi()); - accumReconTp(result,Uchi); - - // Xm - offset = st._offsets [Xp][ss]; - local = st._is_local[Xp][ss]; - perm = st._permute[Xp][ss]; - ptype = st._permute_type[Xp]; - - if ( local && perm ) - { - spProjXm(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjXm(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Xp),&chi()); - accumReconXm(result,Uchi); - - // Ym - offset = st._offsets [Yp][ss]; - local = st._is_local[Yp][ss]; - perm = st._permute[Yp][ss]; - ptype = st._permute_type[Yp]; - - if ( local && perm ) { - spProjYm(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjYm(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Yp),&chi()); - accumReconYm(result,Uchi); - - // Zm - offset = st._offsets [Zp][ss]; - local = st._is_local[Zp][ss]; - perm = st._permute[Zp][ss]; - ptype = st._permute_type[Zp]; - if ( local && perm ) { - spProjZm(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjZm(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Zp),&chi()); - accumReconZm(result,Uchi); - - // Tm - offset = st._offsets [Tp][ss]; - local = st._is_local[Tp][ss]; - perm = st._permute[Tp][ss]; - ptype = st._permute_type[Tp]; - if ( local && perm ) { - spProjTm(tmp,in._odata[offset]); - permute(chi,tmp,ptype); - } else if ( local ) { - spProjTm(chi,in._odata[offset]); - } else { - chi=buf[offset]; - } - mult(&Uchi(),&U._odata[ss](Tp),&chi()); - accumReconTm(result,Uchi); - - vstream(out._odata[ss],result); -} - void WilsonMatrix::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U, const LatticeFermion &in, LatticeFermion &out,int dag) { assert((dag==DaggerNo) ||(dag==DaggerYes)); WilsonCompressor compressor(dag); - st.HaloExchange(in,comm_buf,compressor); if ( dag == DaggerYes ) { + if( HandOptDslash ) { PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - DhopSiteDag(st,U,comm_buf,sss,in,out); + for(int sss=0;sssoSites();sss++){ + DiracOptHand::DhopSiteDag(st,U,comm_buf,sss,in,out); + } + } else { +PARALLEL_FOR_LOOP + for(int sss=0;sssoSites();sss++){ + DiracOpt::DhopSiteDag(st,U,comm_buf,sss,in,out); + } } } else { + if( HandOptDslash ) { PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - DhopSite(st,U,comm_buf,sss,in,out); + for(int sss=0;sssoSites();sss++){ + DiracOptHand::DhopSite(st,U,comm_buf,sss,in,out); + } + } else { +PARALLEL_FOR_LOOP + for(int sss=0;sssoSites();sss++){ + DiracOpt::DhopSite(st,U,comm_buf,sss,in,out); + } } } } diff --git a/lib/qcd/Grid_qcd_wilson_dop.h b/lib/qcd/Grid_qcd_wilson_dop.h index 96b29cd0..87418603 100644 --- a/lib/qcd/Grid_qcd_wilson_dop.h +++ b/lib/qcd/Grid_qcd_wilson_dop.h @@ -6,10 +6,22 @@ namespace Grid { namespace QCD { + // Should be in header? + const int Xp = 0; + const int Yp = 1; + const int Zp = 2; + const int Tp = 3; + const int Xm = 4; + const int Ym = 5; + const int Zm = 6; + const int Tm = 7; + class WilsonMatrix : public CheckerBoardedSparseMatrixBase { //NB r=1; public: + static int HandOptDslash; + double mass; // GridBase * grid; // Inherited // GridBase * cbgrid; @@ -56,14 +68,6 @@ namespace Grid { void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag); void DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField &U, const LatticeFermion &in, LatticeFermion &out,int dag); - // These ones will need to be package intelligently. WilsonType base class - // for use by DWF etc.. - void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U, - std::vector > &buf, - int ss,const LatticeFermion &in, LatticeFermion &out); - void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U, - std::vector > &buf, - int ss,const LatticeFermion &in, LatticeFermion &out); typedef iScalar > matrix; @@ -71,6 +75,31 @@ namespace Grid { }; + class DiracOpt { + public: + // These ones will need to be package intelligently. WilsonType base class + // for use by DWF etc.. + static void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out); + static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out); + + }; + class DiracOptHand { + public: + // These ones will need to be package intelligently. WilsonType base class + // for use by DWF etc.. + static void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out); + static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int ss,const LatticeFermion &in, LatticeFermion &out); + + }; + } } #endif diff --git a/reconfigure_script b/reconfigure_script new file mode 100755 index 00000000..6bdb081e --- /dev/null +++ b/reconfigure_script @@ -0,0 +1,4 @@ +aclocal -I m4 +autoheader -f +automake -f +autoconf -f