diff --git a/bootstrap.sh b/bootstrap.sh index 461eb121..f847b7ab 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -4,7 +4,7 @@ EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz echo "-- deploying Eigen source..." -wget ${EIGEN_URL} +wget ${EIGEN_URL} --no-check-certificate ./scripts/update_eigen.sh `basename ${EIGEN_URL}` rm `basename ${EIGEN_URL}` diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 184209dc..5eddb57d 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -194,22 +194,22 @@ class BinaryIO { std::vector site({x,y,z,t}); - if ( grid->IsBoss() ) { - fin.read((char *)&file_object,sizeof(file_object)); - bytes += sizeof(file_object); - if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); + if (grid->IsBoss()) { + fin.read((char *)&file_object, sizeof(file_object)); + bytes += sizeof(file_object); + if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); + if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); + if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); + if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); - munge(file_object,munged,csum); + munge(file_object, munged, csum); } // The boss who read the file has their value poked pokeSite(munged,Umu,site); }}}} timer.Stop(); std::cout<IsBoss() ) { - - if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); - if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); - if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); + + if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); + if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); + if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); + if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); - // NB could gather an xstrip as an optimisation. - fout.write((char *)&file_object,sizeof(file_object)); - bytes+=sizeof(file_object); + // NB could gather an xstrip as an optimisation. + fout.write((char *)&file_object,sizeof(file_object)); + bytes+=sizeof(file_object); } }}}} timer.Stop(); std::cout<ThisRank() ){ - // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank,(void *)&saved[0],bytes); if ( grid->IsBoss() ) { - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fout.write((char *)&saved[0],bytes); } } @@ -355,14 +355,14 @@ class BinaryIO { int l_idx=parallel.generator_idx(o_idx,i_idx); if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fin.read((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } grid->Broadcast(0,(void *)&saved[0],bytes); if( rank == grid->ThisRank() ){ - parallel.SetState(saved,l_idx); + parallel.SetState(saved,l_idx); } } @@ -415,15 +415,15 @@ class BinaryIO { if ( d == 0 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; } @@ -434,9 +434,9 @@ class BinaryIO { std::cout<< std::dec ; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -463,7 +463,7 @@ class BinaryIO { // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); @@ -472,8 +472,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } ///////////////////////// @@ -487,29 +487,29 @@ class BinaryIO { // iorank reads from the seek //////////////////////////////// if (myrank == iorank) { - - fin.seekg(offset+g_idx*sizeof(fileObj)); - fin.read((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); - - if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); - - munge(fileObj,siteObj,csum); + + fin.seekg(offset+g_idx*sizeof(fileObj)); + fin.read((char *)&fileObj,sizeof(fileObj)); + bytes+=sizeof(fileObj); + + if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); + + munge(fileObj,siteObj,csum); - } + } // Possibly do transport through pt2pt if ( rank != iorank ) { - if ( (myrank == rank) || (myrank==iorank) ) { - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); + } } // Poke at destination if ( myrank == rank ) { - pokeLocalSite(siteObj,Umu,lsite); + pokeLocalSite(siteObj,Umu,lsite); } grid->Barrier(); // necessary? } @@ -520,7 +520,7 @@ class BinaryIO { timer.Stop(); std::cout<_ndimension-1 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; @@ -577,9 +577,9 @@ class BinaryIO { grid->GlobalSum(tmp); std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -610,7 +610,7 @@ class BinaryIO { // should aggregate a whole chunk and then write. // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); @@ -619,8 +619,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } @@ -640,26 +640,26 @@ class BinaryIO { // Pair of nodes may need to do pt2pt send if ( rank != iorank ) { // comms is necessary - if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it - // Send to IOrank - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it + // Send to IOrank + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); + } } grid->Barrier(); // necessary? if (myrank == iorank) { - - munge(siteObj,fileObj,csum); + + munge(siteObj,fileObj,csum); - if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); - - fout.seekp(offset+g_idx*sizeof(fileObj)); - fout.write((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); + if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); + + fout.seekp(offset+g_idx*sizeof(fileObj)); + fout.write((char *)&fileObj,sizeof(fileObj)); + bytes+=sizeof(fileObj); } } @@ -668,7 +668,7 @@ class BinaryIO { timer.Stop(); std::cout< class WilsonKernels : public FermionOperator , public WilsonKernelsStatic { public: - INHERIT_IMPL_TYPES(Impl); - typedef FermionOperator Base; + INHERIT_IMPL_TYPES(Impl); + typedef FermionOperator Base; public: - template - typename std::enable_if::type - DiracOptDhopSite( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, - FermionField &out) { + template + typename std::enable_if::type + DiracOptDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { #ifdef AVX512 - if (AsmOpt) { - WilsonKernels::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, - in, out); + if (AsmOpt) { + WilsonKernels::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, + in, out); - } else { + } else { #else - { + { #endif - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if (HandOpt) - WilsonKernels::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, - in, out); - else - WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, - in, out); - sF++; - } - sU++; - } - } - } + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + if (HandOpt) + WilsonKernels::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, + in, out); + else + WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, + in, out); + sF++; + } + sU++; + } + } + } - template - typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type - DiracOptDhopSite( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, - FermionField &out) { - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, - out); - sF++; - } - sU++; - } - } + template + typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type + DiracOptDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, + out); + sF++; + } + sU++; + } + } - template - typename std::enable_if::type - DiracOptDhopSiteDag( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, - FermionField &out) { + template + typename std::enable_if::type + DiracOptDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { #ifdef AVX512 - if (AsmOpt) { - WilsonKernels::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls, - Ns, in, out); - } else { + if (AsmOpt) { + WilsonKernels::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls, + Ns, in, out); + } else { #else - { + { #endif - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if (HandOpt) - WilsonKernels::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU, - in, out); - else - WilsonKernels::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, - sU, in, out); - sF++; - } - sU++; + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + if (HandOpt) + WilsonKernels::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU, + in, out); + else + WilsonKernels::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, + sU, in, out); + sF++; + } + sU++; + } + } + } + + template + typename std::enable_if< + (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, + void>::type + DiracOptDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU, + in, out); + sF++; + } + sU++; + } + } + + void DiracOptDhopDir( + StencilImpl &st, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, + int gamma); + + private: + // Specialised variants + void DiracOptGenericDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptGenericDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptAsmDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out); + + void DiracOptAsmDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out); + + void DiracOptHandDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptHandDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + public: + WilsonKernels(const ImplParams &p = ImplParams()); + }; + + /////////////////////////////////////////////////////////// + // Default to no assembler implementation + /////////////////////////////////////////////////////////// + template + void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + { + assert(0); + } + template + void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + { + assert(0); + } + } } - } - - template - typename std::enable_if< - (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, - void>::type - DiracOptDhopSiteDag( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, - FermionField &out) { - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU, - in, out); - sF++; - } - sU++; - } - } - - void DiracOptDhopDir( - StencilImpl &st, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, - int gamma); - - private: - // Specialised variants - void DiracOptGenericDhopSite( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void DiracOptGenericDhopSiteDag( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void DiracOptAsmDhopSite( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, - FermionField &out); - - void DiracOptAsmDhopSiteDag( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, - FermionField &out); - - void DiracOptHandDhopSite( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void DiracOptHandDhopSiteDag( - StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU, const FermionField &in, FermionField &out); - - public: - WilsonKernels(const ImplParams &p = ImplParams()); - }; - - - } -} #endif diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index aa0c229c..d2cb4285 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,68 +26,56 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include namespace Grid { -namespace QCD { - - - /////////////////////////////////////////////////////////// - // Default to no assembler implementation - /////////////////////////////////////////////////////////// - template - void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} - + namespace QCD { + #if defined(AVX512) - - - /////////////////////////////////////////////////////////// - // If we are AVX512 specialise the single precision routine - /////////////////////////////////////////////////////////// - + + + /////////////////////////////////////////////////////////// + // If we are AVX512 specialise the single precision routine + /////////////////////////////////////////////////////////// + #include #include - -static Vector signs; - -int setupSigns(void ){ - Vector bother(2); - signs = bother; - vrsign(signs[0]); - visign(signs[1]); - return 1; -} -static int signInit = setupSigns(); - + + static Vector signs; + + int setupSigns(void ){ + Vector bother(2); + signs = bother; + vrsign(signs[0]); + visign(signs[1]); + return 1; + } + static int signInit = setupSigns(); + #define label(A) ilabel(A) #define ilabel(A) ".globl\n" #A ":\n" - + #define MAYBEPERM(A,perm) if (perm) { A ; } #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define FX(A) WILSONASM_ ##A - + #undef KERNEL_DAG -template<> -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + template<> + void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include - + #define KERNEL_DAG -template<> -void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + template<> + void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include - + #undef VMOVIDUP #undef VMOVRDUP #undef MAYBEPERM @@ -98,43 +86,22 @@ void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,Lebesgue #define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) #define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) - + #undef KERNEL_DAG -template<> -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + template<> + void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include - + #define KERNEL_DAG -template<> -void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + template<> + void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include - + #endif - - - -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); - -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -}} + } +}