mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Enhanced SIMD interfacing
This commit is contained in:
parent
6103c29ee3
commit
6cec662ac5
@ -8,7 +8,7 @@ int main (int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
int Nloop=10;
|
int Nloop=10;
|
||||||
|
@ -13,7 +13,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int Nloop=1000;
|
int Nloop=1000;
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout << "===================================================================================================="<<std::endl;
|
||||||
@ -54,7 +54,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"GB/s"<<std::endl;
|
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"GB/s"<<std::endl;
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
std::cout << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
for(int lat=4;lat<=32;lat+=4){
|
||||||
|
|
||||||
@ -69,7 +68,6 @@ int main (int argc, char ** argv)
|
|||||||
LatticeVec y(&Grid); //random(pRNG,y);
|
LatticeVec y(&Grid); //random(pRNG,y);
|
||||||
double a=2.0;
|
double a=2.0;
|
||||||
|
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
z=a*x-y;
|
z=a*x-y;
|
||||||
|
@ -22,7 +22,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ namespace Grid {
|
|||||||
// C++11 time facilities better?
|
// C++11 time facilities better?
|
||||||
double usecond(void);
|
double usecond(void);
|
||||||
|
|
||||||
const std::vector<int> &GridDefaultSimd(void);
|
const std::vector<int> GridDefaultSimd(int dims,int nsimd);
|
||||||
const std::vector<int> &GridDefaultLatt(void);
|
const std::vector<int> &GridDefaultLatt(void);
|
||||||
const std::vector<int> &GridDefaultMpi(void);
|
const std::vector<int> &GridDefaultMpi(void);
|
||||||
const int &GridThreads(void) ;
|
const int &GridThreads(void) ;
|
||||||
|
@ -27,14 +27,28 @@ namespace Grid {
|
|||||||
// Convenience functions to access stadard command line arg
|
// Convenience functions to access stadard command line arg
|
||||||
// driven parallelism controls
|
// driven parallelism controls
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
static std::vector<int> Grid_default_simd;
|
|
||||||
static std::vector<int> Grid_default_latt;
|
static std::vector<int> Grid_default_latt;
|
||||||
static std::vector<int> Grid_default_mpi;
|
static std::vector<int> Grid_default_mpi;
|
||||||
|
|
||||||
int GridThread::_threads;
|
int GridThread::_threads;
|
||||||
|
|
||||||
|
const std::vector<int> GridDefaultSimd(int dims,int nsimd)
|
||||||
|
{
|
||||||
|
std::vector<int> layout(dims);
|
||||||
|
int nn=nsimd;
|
||||||
|
for(int d=dims-1;d>=0;d--){
|
||||||
|
if ( nn>=2) {
|
||||||
|
layout[d]=2;
|
||||||
|
nn/=2;
|
||||||
|
} else {
|
||||||
|
layout[d]=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(nn==1);
|
||||||
|
return layout;
|
||||||
|
}
|
||||||
|
|
||||||
const std::vector<int> &GridDefaultSimd(void) {return Grid_default_simd;};
|
|
||||||
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
|
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
|
||||||
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
|
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
|
||||||
|
|
||||||
@ -71,22 +85,11 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
|||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
std::vector<int> &latt,
|
std::vector<int> &latt,
|
||||||
std::vector<int> &simd,
|
|
||||||
std::vector<int> &mpi)
|
std::vector<int> &mpi)
|
||||||
{
|
{
|
||||||
mpi =std::vector<int>({1,1,1,1});
|
mpi =std::vector<int>({1,1,1,1});
|
||||||
latt=std::vector<int>({8,8,8,8});
|
latt=std::vector<int>({8,8,8,8});
|
||||||
|
|
||||||
#if defined(SSE4)
|
|
||||||
simd=std::vector<int>({1,1,1,2});
|
|
||||||
#endif
|
|
||||||
#if defined(AVX1) || defined (AVX2)
|
|
||||||
simd=std::vector<int>({1,1,2,2});
|
|
||||||
#endif
|
|
||||||
#if defined(AVX512)
|
|
||||||
simd=std::vector<int>({1,2,2,2});
|
|
||||||
#endif
|
|
||||||
|
|
||||||
GridThread::SetMaxThreads();
|
GridThread::SetMaxThreads();
|
||||||
|
|
||||||
std::string arg;
|
std::string arg;
|
||||||
@ -94,10 +97,6 @@ void GridParseLayout(char **argv,int argc,
|
|||||||
arg = GridCmdOptionPayload(argv,argv+argc,"--mpi");
|
arg = GridCmdOptionPayload(argv,argv+argc,"--mpi");
|
||||||
GridCmdOptionIntVector(arg,mpi);
|
GridCmdOptionIntVector(arg,mpi);
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--simd") ){
|
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--simd");
|
|
||||||
GridCmdOptionIntVector(arg,simd);
|
|
||||||
}
|
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--grid") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--grid") ){
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
|
||||||
GridCmdOptionIntVector(arg,latt);
|
GridCmdOptionIntVector(arg,latt);
|
||||||
@ -129,7 +128,6 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
}
|
}
|
||||||
GridParseLayout(*argv,*argc,
|
GridParseLayout(*argv,*argc,
|
||||||
Grid_default_latt,
|
Grid_default_latt,
|
||||||
Grid_default_simd,
|
|
||||||
Grid_default_mpi);
|
Grid_default_mpi);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -67,6 +67,10 @@ inline void GridFromExpression(GridBase * &grid,const T1& lat) // Lattice leaf
|
|||||||
}
|
}
|
||||||
grid=lat._grid;
|
grid=lat._grid;
|
||||||
}
|
}
|
||||||
|
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
||||||
|
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
|
||||||
|
{
|
||||||
|
}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
|
||||||
{
|
{
|
||||||
@ -86,10 +90,6 @@ inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<
|
|||||||
GridFromExpression(grid,std::get<1>(expr.second));
|
GridFromExpression(grid,std::get<1>(expr.second));
|
||||||
GridFromExpression(grid,std::get<2>(expr.second));
|
GridFromExpression(grid,std::get<2>(expr.second));
|
||||||
}
|
}
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
|
||||||
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Unary operators and funcs
|
// Unary operators and funcs
|
||||||
|
@ -145,7 +145,7 @@ PARALLEL_FOR_LOOP
|
|||||||
template<class sobj,class vobj>
|
template<class sobj,class vobj>
|
||||||
inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
PARALLEL_FOR_LOOP
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||||
vobj tmp = a*lhs._odata[ss];
|
vobj tmp = a*lhs._odata[ss];
|
||||||
vstream(ret._odata[ss],tmp+rhs._odata[ss]);
|
vstream(ret._odata[ss],tmp+rhs._odata[ss]);
|
||||||
|
@ -64,7 +64,8 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
vobj tmp= eval(ss,expr);
|
vobj tmp= eval(ss,expr);
|
||||||
vstream(_odata[ss] ,tmp);
|
vstream(_odata[ss] ,tmp);
|
||||||
@ -73,7 +74,8 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
// PARALLEL_FOR_LOOP
|
||||||
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
vobj tmp= eval(ss,expr);
|
vobj tmp= eval(ss,expr);
|
||||||
vstream(_odata[ss] ,tmp);
|
vstream(_odata[ss] ,tmp);
|
||||||
@ -82,7 +84,8 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
vobj tmp= eval(ss,expr);
|
vobj tmp= eval(ss,expr);
|
||||||
vstream(_odata[ss] ,tmp);
|
vstream(_odata[ss] ,tmp);
|
||||||
@ -176,15 +179,16 @@ PARALLEL_FOR_LOOP
|
|||||||
}; // class Lattice
|
}; // class Lattice
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef GRID_LATTICE_EXPRESSION_TEMPLATES
|
|
||||||
|
|
||||||
#include <lattice/Grid_lattice_conformable.h>
|
#include <lattice/Grid_lattice_conformable.h>
|
||||||
|
|
||||||
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
|
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
#include <lattice/Grid_lattice_ET.h>
|
#include <lattice/Grid_lattice_ET.h>
|
||||||
#else
|
#else
|
||||||
#include <lattice/Grid_lattice_overload.h>
|
#include <lattice/Grid_lattice_overload.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <lattice/Grid_lattice_arith.h>
|
#include <lattice/Grid_lattice_arith.h>
|
||||||
|
|
||||||
#include <lattice/Grid_lattice_trace.h>
|
#include <lattice/Grid_lattice_trace.h>
|
||||||
|
@ -28,9 +28,9 @@ namespace Grid {
|
|||||||
vzero(*this);
|
vzero(*this);
|
||||||
return (*this);
|
return (*this);
|
||||||
}
|
}
|
||||||
vComplexF( Zero & z){
|
// vComplexF( Zero & z){
|
||||||
vzero(*this);
|
// vzero(*this);
|
||||||
}
|
// }
|
||||||
vComplexF()=default;
|
vComplexF()=default;
|
||||||
vComplexF(ComplexF a){
|
vComplexF(ComplexF a){
|
||||||
vsplat(*this,a);
|
vsplat(*this,a);
|
||||||
|
@ -9,7 +9,7 @@ int main (int argc, char ** argv)
|
|||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
|
@ -15,7 +15,7 @@ int main (int argc, char ** argv)
|
|||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
@ -26,7 +26,7 @@ int main (int argc, char ** argv)
|
|||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
latt_size.resize(4);
|
latt_size.resize(4);
|
||||||
|
@ -11,7 +11,7 @@ int main (int argc, char ** argv)
|
|||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
std::vector<int> latt_size ({16,16,16,32});
|
std::vector<int> latt_size ({16,16,16,32});
|
||||||
std::vector<int> clatt_size ({4,4,4,8});
|
std::vector<int> clatt_size ({4,4,4,8});
|
||||||
|
@ -107,7 +107,7 @@ int main (int argc, char ** argv)
|
|||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
@ -10,7 +10,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd();
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
Loading…
Reference in New Issue
Block a user