mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-10 06:00:45 +01:00
Merge branch 'develop' into feature/gpu-port
Conflicts: Grid/qcd/action/fermion/WilsonKernelsAsm.cc Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h benchmarks/Benchmark_comms.cc
This commit is contained in:
commit
48e6efc7c9
127
Grid/qcd/action/fermion/WilsonKernelsAsm.cc
Normal file
127
Grid/qcd/action/fermion/WilsonKernelsAsm.cc
Normal file
@ -0,0 +1,127 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Default to no assembler implementation
|
||||
///////////////////////////////////////////////////////////
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
#include <Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h>
|
||||
|
||||
#define INSTANTIATE_ASM(A)\
|
||||
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
|
||||
INSTANTIATE_ASM(WilsonImplF);
|
||||
INSTANTIATE_ASM(WilsonImplD);
|
||||
INSTANTIATE_ASM(ZWilsonImplF);
|
||||
INSTANTIATE_ASM(ZWilsonImplD);
|
||||
INSTANTIATE_ASM(GparityWilsonImplF);
|
||||
INSTANTIATE_ASM(GparityWilsonImplD);
|
||||
INSTANTIATE_ASM(DomainWallVec5dImplF);
|
||||
INSTANTIATE_ASM(DomainWallVec5dImplD);
|
||||
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
|
||||
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
|
||||
|
||||
INSTANTIATE_ASM(WilsonImplFH);
|
||||
INSTANTIATE_ASM(WilsonImplDF);
|
||||
INSTANTIATE_ASM(ZWilsonImplFH);
|
||||
INSTANTIATE_ASM(ZWilsonImplDF);
|
||||
INSTANTIATE_ASM(GparityWilsonImplFH);
|
||||
INSTANTIATE_ASM(GparityWilsonImplDF);
|
||||
INSTANTIATE_ASM(DomainWallVec5dImplFH);
|
||||
INSTANTIATE_ASM(DomainWallVec5dImplDF);
|
||||
INSTANTIATE_ASM(ZDomainWallVec5dImplFH);
|
||||
INSTANTIATE_ASM(ZDomainWallVec5dImplDF);
|
||||
|
||||
}}
|
||||
|
@ -25,8 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -28,11 +28,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid.h>
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#ifdef AVX512
|
||||
#include <simd/Intel512common.h>
|
||||
#include <simd/Intel512avx.h>
|
||||
#include <Grid/simd/Intel512common.h>
|
||||
#include <Grid/simd/Intel512avx.h>
|
||||
#endif
|
||||
|
||||
// Interleave operations from two directions
|
||||
@ -681,7 +681,7 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
gauge3 =(uint64_t)&UU[sU]( T );
|
||||
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
#include <simd/Intel512single.h>
|
||||
#include <Grid/simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
@ -735,7 +735,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
||||
|
||||
}
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
#include <Grid/simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
@ -820,7 +820,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
||||
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
|
||||
#include <simd/Intel512single.h>
|
||||
#include <Grid/simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
@ -889,7 +889,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
||||
#endif
|
||||
}
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
#include <Grid/simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -29,7 +29,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
struct time_statistics{
|
||||
double mean;
|
||||
@ -187,9 +186,9 @@ int main (int argc, char ** argv)
|
||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
|
||||
}
|
||||
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
@ -276,15 +275,22 @@ int main (int argc, char ** argv)
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int d=0;d<8;d++){
|
||||
<<<<<<< HEAD
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
=======
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
bzero((void *)xbuf[d],bytes);
|
||||
bzero((void *)rbuf[d],bytes);
|
||||
>>>>>>> develop
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
@ -373,15 +379,22 @@ int main (int argc, char ** argv)
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int d=0;d<8;d++){
|
||||
<<<<<<< HEAD
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
=======
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
bzero((void *)xbuf[d],bytes);
|
||||
bzero((void *)rbuf[d],bytes);
|
||||
>>>>>>> develop
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
@ -471,15 +484,13 @@ int main (int argc, char ** argv)
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
@ -262,7 +262,6 @@ Set HEADER_SEARCH_PATHS to:
|
||||
|
||||
$Grid/build$(CONFIGURATION)/Grid
|
||||
$Grid
|
||||
$Grid/Grid
|
||||
|
||||
followed by (***the order is important***) the locations reported by `grid-config --cxxflags`, ignoring duplicates, e.g.:
|
||||
|
||||
@ -272,7 +271,7 @@ followed by (***the order is important***) the locations reported by `grid-confi
|
||||
|
||||
**Note: the easiest way to set this value is to put it all on one line, space separated, and edit the text to the right of `HEADER_SEARCH_PATHS`**, i.e.:
|
||||
|
||||
$Grid/build$(CONFIGURATION)/Grid $Grid $Grid/Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include
|
||||
$Grid/build$(CONFIGURATION)/Grid $Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include
|
||||
|
||||
#### LIBRARY_SEARCH_PATHS
|
||||
|
||||
@ -298,7 +297,7 @@ The easiest way to link to all required libraries is to obtain a list of all lib
|
||||
|
||||
and pasting the output ***with `-lGrid -lHadrons ` prepended*** (including the `-l` switches) directly into `OTHER_LDFLAGS`, e.g.:
|
||||
|
||||
-lGrid -lHadrons -lmpi -lhdf5_cpp -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lstdc++ -lm -lz -lhdf5
|
||||
-lGrid -lHadrons -lmpi -lhdf5_cpp -lhdf5 -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lm
|
||||
|
||||
## Make additional configurations
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user