diff --git a/Grid/qcd/action/fermion/WilsonKernelsAsm.cc b/Grid/qcd/action/fermion/WilsonKernelsAsm.cc new file mode 100644 index 00000000..55911988 --- /dev/null +++ b/Grid/qcd/action/fermion/WilsonKernelsAsm.cc @@ -0,0 +1,127 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + + + Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle +Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { +namespace QCD { + + +/////////////////////////////////////////////////////////// +// Default to no assembler implementation +/////////////////////////////////////////////////////////// +template void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} + +#include +#include + +#define INSTANTIATE_ASM(A)\ +template void WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + \ +template void WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + \ +template void WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + \ +template void WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + +INSTANTIATE_ASM(WilsonImplF); +INSTANTIATE_ASM(WilsonImplD); +INSTANTIATE_ASM(ZWilsonImplF); +INSTANTIATE_ASM(ZWilsonImplD); +INSTANTIATE_ASM(GparityWilsonImplF); +INSTANTIATE_ASM(GparityWilsonImplD); +INSTANTIATE_ASM(DomainWallVec5dImplF); +INSTANTIATE_ASM(DomainWallVec5dImplD); +INSTANTIATE_ASM(ZDomainWallVec5dImplF); +INSTANTIATE_ASM(ZDomainWallVec5dImplD); + +INSTANTIATE_ASM(WilsonImplFH); +INSTANTIATE_ASM(WilsonImplDF); +INSTANTIATE_ASM(ZWilsonImplFH); +INSTANTIATE_ASM(ZWilsonImplDF); +INSTANTIATE_ASM(GparityWilsonImplFH); +INSTANTIATE_ASM(GparityWilsonImplDF); +INSTANTIATE_ASM(DomainWallVec5dImplFH); +INSTANTIATE_ASM(DomainWallVec5dImplDF); +INSTANTIATE_ASM(ZDomainWallVec5dImplFH); +INSTANTIATE_ASM(ZDomainWallVec5dImplDF); + +}} + diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index 8bd01d57..e2605d81 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -25,8 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ - /* END LEGAL */ -#include +/* END LEGAL */ +#include #pragma once diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h index 2a8435f4..1a13e73a 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h @@ -28,11 +28,11 @@ Author: paboyle /* END LEGAL */ #pragma once -#include +#include #ifdef AVX512 -#include -#include +#include +#include #endif // Interleave operations from two directions @@ -681,7 +681,7 @@ void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, gauge3 =(uint64_t)&UU[sU]( T ); // This is the single precision 5th direction vectorised kernel -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, @@ -735,7 +735,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl } -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, @@ -820,7 +820,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl // This is the single precision 5th direction vectorised kernel -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, @@ -889,7 +889,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #endif } -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h index e1685957..62370d75 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include #pragma once diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 66d37b2f..2ee66005 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -29,7 +29,6 @@ Author: Peter Boyle using namespace std; using namespace Grid; - ; struct time_statistics{ double mean; @@ -187,9 +186,9 @@ int main (int argc, char ** argv) rbuf[mu].resize(lat*lat*lat*Ls); // std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] < xbuf(8); std::vector rbuf(8); Grid.ShmBufferFreeAll(); + uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); for(int d=0;d<8;d++){ +<<<<<<< HEAD xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); +======= + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + bzero((void *)xbuf[d],bytes); + bzero((void *)rbuf[d],bytes); +>>>>>>> develop } int ncomm; - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double dbytes; for(int i=0;i xbuf(8); std::vector rbuf(8); Grid.ShmBufferFreeAll(); + uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); for(int d=0;d<8;d++){ +<<<<<<< HEAD xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); +======= + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + bzero((void *)xbuf[d],bytes); + bzero((void *)rbuf[d],bytes); +>>>>>>> develop } int ncomm; - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double dbytes; for(int i=0;i xbuf(8); std::vector rbuf(8); Grid.ShmBufferFreeAll(); + uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); } int ncomm; - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double dbytes; for(int i=0;i