mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Shrinking and organising the files
This commit is contained in:
parent
0fce523792
commit
d964d01d6a
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Grid.cpp
|
||||
// Grid.h
|
||||
// simd
|
||||
//
|
||||
// Created by Peter Boyle on 09/05/2014.
|
||||
|
@ -139,30 +139,6 @@ namespace QCD {
|
||||
return peekIndex<LorentzIndex>(rhs,i,j);
|
||||
}
|
||||
|
||||
// FIXME this is rather generic and should find a way to place it earlier.
|
||||
inline void LatticeCoordinate(LatticeInteger &l,int mu){
|
||||
GridBase *grid = l._grid;
|
||||
int Nsimd = grid->iSites();
|
||||
std::vector<int> gcoor;
|
||||
std::vector<Integer> mergebuf(Nsimd);
|
||||
std::vector<Integer *> mergeptr(Nsimd);
|
||||
for(int o=0;o<grid->oSites();o++){
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
// grid->RankIndexToGlobalCoor(0,o,i,gcoor);
|
||||
mergebuf[i]=gcoor[mu];
|
||||
mergeptr[i]=&mergebuf[i];
|
||||
}
|
||||
merge(l._odata[o],mergeptr);
|
||||
}
|
||||
};
|
||||
|
||||
#include <Grid_predicated.h>
|
||||
|
||||
#if 0
|
||||
|
||||
#endif
|
||||
|
||||
} //namespace QCD
|
||||
} // Grid
|
||||
#endif
|
||||
|
@ -1,10 +1,11 @@
|
||||
#ifndef _GRID_CSHIFT_COMMON_H_
|
||||
#define _GRID_CSHIFT_COMMON_H_
|
||||
|
||||
namespace Grid {
|
||||
//////////////////////////////////////////////////////
|
||||
// Gather for when there is no need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
||||
template<class vobj> void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -48,7 +49,7 @@ friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllo
|
||||
//////////////////////////////////////////////////////
|
||||
// Gather for when there *is* need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
friend void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
|
||||
template<class vobj,class scalar_type> void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -91,7 +92,7 @@ friend void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> p
|
||||
//////////////////////////////////////////////////////
|
||||
// Scatter for when there is no need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
friend void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -134,7 +135,7 @@ friend void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAll
|
||||
//////////////////////////////////////////////////////
|
||||
// Scatter for when there *is* need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
friend void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
|
||||
template<class vobj,class scalar_type> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -177,7 +178,7 @@ friend void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<scalar_type *> po
|
||||
//////////////////////////////////////////////////////
|
||||
// local to node block strided copies
|
||||
//////////////////////////////////////////////////////
|
||||
friend void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
|
||||
template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -219,7 +220,7 @@ friend void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int
|
||||
}
|
||||
}
|
||||
|
||||
friend void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
@ -265,7 +266,7 @@ friend void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimens
|
||||
//////////////////////////////////////////////////////
|
||||
// Local to node Cshift
|
||||
//////////////////////////////////////////////////////
|
||||
friend void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
|
||||
template<class vobj> void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
@ -280,7 +281,7 @@ friend void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int
|
||||
}
|
||||
}
|
||||
|
||||
friend Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid = rhs._grid;
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
@ -322,5 +323,5 @@ friend Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dime
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -6,7 +6,9 @@
|
||||
#define MIN(x,y) ((x)>(y)?(y):(x))
|
||||
#endif
|
||||
|
||||
friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
||||
namespace Grid {
|
||||
|
||||
template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
@ -37,7 +39,7 @@ friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
@ -52,7 +54,7 @@ friend void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int
|
||||
}
|
||||
}
|
||||
|
||||
friend void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
@ -67,8 +69,7 @@ friend void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimensio
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
friend void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
@ -127,8 +128,7 @@ friend void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
friend void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid=rhs._grid;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
@ -260,4 +260,5 @@ friend void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -1,12 +1,12 @@
|
||||
#ifndef _GRID_CSHIFT_NONE_H_
|
||||
#define _GRID_CSHIFT_NONE_H_
|
||||
|
||||
friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
||||
namespace Grid {
|
||||
template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
Lattice<vobj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift);
|
||||
Cshift_local(ret,rhs,dimension,shift);
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -3,10 +3,10 @@
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// TODO: Indexing ()
|
||||
// TODO:
|
||||
// mac,real,imag
|
||||
//
|
||||
// Functionality required:
|
||||
|
||||
// Functionality:
|
||||
// -=,+=,*=,()
|
||||
// add,+,sub,-,mult,mac,*
|
||||
// adj,conj
|
||||
@ -17,7 +17,6 @@ namespace Grid {
|
||||
// innerProduct,outerProduct,
|
||||
// localNorm2
|
||||
// localInnerProduct
|
||||
//
|
||||
|
||||
extern int GridCshiftPermuteMap[4][16];
|
||||
|
||||
@ -33,51 +32,39 @@ public:
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Constructor requires "grid" passed.
|
||||
// what about a default grid?
|
||||
//////////////////////////////////////////////////////////////////
|
||||
Lattice(GridBase *grid) : _grid(grid) {
|
||||
_odata.reserve(_grid->oSites());
|
||||
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
||||
checkerboard=0;
|
||||
}
|
||||
|
||||
#include <Grid_cshift.h>
|
||||
|
||||
template<class obj1,class obj2>
|
||||
friend void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs);
|
||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// FIXME Performance difference between operator * and mult is troubling.
|
||||
// Auto move constructor seems to lose surprisingly much.
|
||||
|
||||
// Site wise binary operations
|
||||
// We eliminate a temporary object assignment if use the mult,add,sub routines.
|
||||
// For the operator versions we rely on move constructor to eliminate the
|
||||
// vector copy back.
|
||||
template<class obj1,class obj2,class obj3>
|
||||
friend void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
friend void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
friend void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
friend void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T> inline Lattice<vobj> &operator *=(const T &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class T> inline Lattice<vobj> &operator -=(const T &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
template<class T> inline Lattice<vobj> &operator +=(const T &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
friend void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
@ -87,550 +74,23 @@ public:
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class sobj>
|
||||
inline Lattice<vobj> & operator = (const sobj & r){
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Poke a scalar object into the SIMD array
|
||||
template<class sobj>
|
||||
friend void pokeSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
// Optional to broadcast from node 0.
|
||||
grid->Broadcast(0,s);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
extract(l._odata[odx],pointers);
|
||||
|
||||
buf[idx] = s;
|
||||
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
merge(l._odata[odx],pointers);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
// Peek a scalar object from the SIMD array
|
||||
template<class sobj>
|
||||
friend void peekSite(sobj &s,Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
|
||||
extract(l._odata[odx],pointers);
|
||||
|
||||
s = buf[idx];
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
// FIXME Randomise; deprecate this
|
||||
friend void random(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t v_len = l._grid->oSites()*sizeof(vobj);
|
||||
size_t d_len = v_len/sizeof(Real);
|
||||
|
||||
for(int i=0;i<d_len;i++){
|
||||
|
||||
v_ptr[i]=drand48();
|
||||
}
|
||||
};
|
||||
|
||||
// FIXME for debug; deprecate this; made obscelete by
|
||||
// LatticeCoordinate();
|
||||
friend void lex_sites(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t o_len = l._grid->oSites();
|
||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
||||
size_t vec_len = vRealF::Nsimd();
|
||||
|
||||
for(int i=0;i<o_len;i++){
|
||||
for(int j=0;j<v_len;j++){
|
||||
for(int vv=0;vv<vec_len;vv+=2){
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
// FIXME Implement a consistent seed management strategy
|
||||
friend void gaussian(Lattice<vobj> &l){
|
||||
// Zero mean, unit variance.
|
||||
std::normal_distribution<double> distribution(0.0,1.0);
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t v_len = l._grid->oSites()*sizeof(vobj);
|
||||
size_t d_len = v_len/sizeof(Real);
|
||||
|
||||
for(int i=0;i<d_len;i++){
|
||||
v_ptr[i]= drand48();
|
||||
}
|
||||
};
|
||||
|
||||
// Unary functions and Unops
|
||||
friend inline Lattice<vobj> operator -(const Lattice<vobj> &r) {
|
||||
Lattice<vobj> ret(r._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
ret._odata[ss]= -r._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T>
|
||||
inline Lattice<vobj> &operator *=(const T &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
template<class T>
|
||||
inline Lattice<vobj> &operator -=(const T &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
template<class T>
|
||||
inline Lattice<vobj> &operator +=(const T &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline friend Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = adj(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
inline friend Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transpose(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
inline friend Lattice<vobj> conj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = conj(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
// remove and insert a half checkerboard
|
||||
friend void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
||||
half.checkerboard = cb;
|
||||
int ssh=0;
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
int cbos;
|
||||
|
||||
full._grid->oCoorFromOindex(coor,ss);
|
||||
cbos=half._grid->CheckerBoard(coor);
|
||||
|
||||
if (cbos==cb) {
|
||||
|
||||
half._odata[ssh] = full._odata[ss];
|
||||
ssh++;
|
||||
}
|
||||
}
|
||||
}
|
||||
friend void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||
int cb = half.checkerboard;
|
||||
int ssh=0;
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
int cbos;
|
||||
|
||||
full._grid->oCoorFromOindex(coor,ss);
|
||||
cbos=half._grid->CheckerBoard(coor);
|
||||
|
||||
if (cbos==cb) {
|
||||
full._odata[ss]=half._odata[ssh];
|
||||
ssh++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}; // class Lattice
|
||||
|
||||
template<class obj1,class obj2>
|
||||
void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
|
||||
{
|
||||
assert(lhs._grid == rhs._grid);
|
||||
assert(lhs.checkerboard == rhs.checkerboard);
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
// Lattice BinOp Lattice,
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
mult(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
add(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
sub(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Scalar BinOp Lattice ;generate return type
|
||||
template<class left,class right>
|
||||
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs*rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs-rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]*rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(trace(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = trace(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto traceIndex(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
inline auto transposeIndex(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
// Fixme; this is problematic since the number of args is variable and
|
||||
// may mismatch...
|
||||
template<int Index,class vobj>
|
||||
inline auto peekIndex(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
inline auto peekIndex(const Lattice<vobj> &lhs,int i)
|
||||
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
inline auto peekIndex(const Lattice<vobj> &lhs,int i,int j)
|
||||
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Poke internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj> inline
|
||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> & rhs)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
template<int Index,class vobj> inline
|
||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
|
||||
}
|
||||
}
|
||||
template<int Index,class vobj> inline
|
||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline RealD norm2(const Lattice<vobj> &arg){
|
||||
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
typedef typename vobj::vector_type vector;
|
||||
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
|
||||
scalar nrm;
|
||||
//FIXME make this loop parallelisable
|
||||
vnrm=zero;
|
||||
for(int ss=0;ss<arg._grid->oSites(); ss++){
|
||||
vnrm = vnrm + innerProduct(arg._odata[ss],arg._odata[ss]);
|
||||
}
|
||||
vector vvnrm =TensorRemove(vnrm) ;
|
||||
nrm = Reduce(vvnrm);
|
||||
arg._grid->GlobalSum(nrm);
|
||||
return real(nrm);
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) ->decltype(innerProduct(left._odata[0],right._odata[0]))
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
|
||||
|
||||
scalar nrm;
|
||||
//FIXME make this loop parallelisable
|
||||
for(int ss=0;ss<left._grid->oSites(); ss++){
|
||||
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
nrm = Reduce(vnrm);
|
||||
right._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline auto real(const Lattice<vobj> &z) -> Lattice<decltype(real(z._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(real(z._odata[0]))> ret(z._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<z._grid->oSites();ss++){
|
||||
ret._odata[ss] = real(z._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline auto imag(const Lattice<vobj> &z) -> Lattice<decltype(imag(z._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(imag(z._odata[0]))> ret(z._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<z._grid->oSites();ss++){
|
||||
ret._odata[ss] = imag(z._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
|
||||
-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}; // class Lattice
|
||||
}
|
||||
|
||||
#include <Grid_lattice_conformable.h>
|
||||
#include <Grid_lattice_arith.h>
|
||||
#include <Grid_lattice_trace.h>
|
||||
#include <Grid_lattice_transpose.h>
|
||||
#include <Grid_lattice_local.h>
|
||||
#include <Grid_lattice_reduction.h>
|
||||
#include <Grid_lattice_peekpoke.h>
|
||||
#include <Grid_lattice_reality.h>
|
||||
#include <Grid_cshift.h>
|
||||
#include <Grid_where.h>
|
||||
#include <Grid_lattice_coordinate.h>
|
||||
#include <Grid_lattice_rng.h>
|
||||
#include <Grid_lattice_transfer.h>
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
160
lib/Grid_lattice_arith.h
Normal file
160
lib/Grid_lattice_arith.h
Normal file
@ -0,0 +1,160 @@
|
||||
#ifndef GRID_LATTICE_ARITH_H
|
||||
#define GRID_LATTICE_ARITH_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
|
||||
{
|
||||
Lattice<vobj> ret(r._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
ret._odata[ss]= -r._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
inline void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
// Lattice BinOp Lattice,
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
mult(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
add(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
sub(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Scalar BinOp Lattice ;generate return type
|
||||
template<class left,class right>
|
||||
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs*rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs-rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]*rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
14
lib/Grid_lattice_conformable.h
Normal file
14
lib/Grid_lattice_conformable.h
Normal file
@ -0,0 +1,14 @@
|
||||
#ifndef GRID_LATTICE_CONFORMABLE_H
|
||||
#define GRID_LATTICE_CONFORMABLE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class obj1,class obj2>
|
||||
void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
|
||||
{
|
||||
assert(lhs._grid == rhs._grid);
|
||||
assert(lhs.checkerboard == rhs.checkerboard);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
45
lib/Grid_lattice_coordinate.h
Normal file
45
lib/Grid_lattice_coordinate.h
Normal file
@ -0,0 +1,45 @@
|
||||
#ifndef GRID_LATTICE_COORDINATE_H
|
||||
#define GRID_LATTICE_COORDINATE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
{
|
||||
GridBase *grid = l._grid;
|
||||
int Nsimd = grid->iSites();
|
||||
std::vector<int> gcoor;
|
||||
std::vector<Integer> mergebuf(Nsimd);
|
||||
std::vector<Integer *> mergeptr(Nsimd);
|
||||
vInteger vI;
|
||||
for(int o=0;o<grid->oSites();o++){
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
// grid->RankIndexToGlobalCoor(0,o,i,gcoor);
|
||||
mergebuf[i]=gcoor[mu];
|
||||
mergeptr[i]=&mergebuf[i];
|
||||
}
|
||||
merge(vI,mergeptr);
|
||||
l._odata[o]=vI;
|
||||
}
|
||||
};
|
||||
|
||||
// LatticeCoordinate();
|
||||
// FIXME for debug; deprecate this; made obscelete by
|
||||
template<class vobj> void lex_sites(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t o_len = l._grid->oSites();
|
||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
||||
size_t vec_len = vRealF::Nsimd();
|
||||
|
||||
for(int i=0;i<o_len;i++){
|
||||
for(int j=0;j<v_len;j++){
|
||||
for(int vv=0;vv<vec_len;vv+=2){
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
54
lib/Grid_lattice_local.h
Normal file
54
lib/Grid_lattice_local.h
Normal file
@ -0,0 +1,54 @@
|
||||
#ifndef GRID_LATTICE_LOCALREDUCTION_H
|
||||
#define GRID_LATTICE_LOCALREDUCTION_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// localInner, localNorm, outerProduct
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site, reduced locally reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
|
||||
-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
143
lib/Grid_lattice_peekpoke.h
Normal file
143
lib/Grid_lattice_peekpoke.h
Normal file
@ -0,0 +1,143 @@
|
||||
#ifndef GRID_LATTICE_PEEK_H
|
||||
#define GRID_LATTICE_PEEK_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Peeking and poking around
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Peek internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto peekIndex(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
inline auto peekIndex(const Lattice<vobj> &lhs,int i)
|
||||
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
inline auto peekIndex(const Lattice<vobj> &lhs,int i,int j)
|
||||
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Poke internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj> inline
|
||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> & rhs)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
template<int Index,class vobj> inline
|
||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
|
||||
}
|
||||
}
|
||||
template<int Index,class vobj> inline
|
||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Poke a scalar object into the SIMD array
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void pokeSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
// Optional to broadcast from node 0.
|
||||
grid->Broadcast(0,s);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
extract(l._odata[odx],pointers);
|
||||
|
||||
buf[idx] = s;
|
||||
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
merge(l._odata[odx],pointers);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void peekSite(sobj &s,Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
|
||||
extract(l._odata[odx],pointers);
|
||||
|
||||
s = buf[idx];
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
52
lib/Grid_lattice_reality.h
Normal file
52
lib/Grid_lattice_reality.h
Normal file
@ -0,0 +1,52 @@
|
||||
#ifndef GRID_LATTICE_REALITY_H
|
||||
#define GRID_LATTICE_REALITY_H
|
||||
|
||||
|
||||
// FIXME .. this is the sector of the code
|
||||
// I am most worried about the directions
|
||||
// The choice of burying complex in the SIMD
|
||||
// is making the use of "real" and "imag" very cumbersome
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = adj(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = conj(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline auto real(const Lattice<vobj> &z) -> Lattice<decltype(real(z._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(real(z._odata[0]))> ret(z._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<z._grid->oSites();ss++){
|
||||
ret._odata[ss] = real(z._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj> inline auto imag(const Lattice<vobj> &z) -> Lattice<decltype(imag(z._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(imag(z._odata[0]))> ret(z._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<z._grid->oSites();ss++){
|
||||
ret._odata[ss] = imag(z._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
45
lib/Grid_lattice_reduction.h
Normal file
45
lib/Grid_lattice_reduction.h
Normal file
@ -0,0 +1,45 @@
|
||||
#ifndef GRID_LATTICE_REDUCTION_H
|
||||
#define GRID_LATTICE_REDUCTION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline RealD norm2(const Lattice<vobj> &arg){
|
||||
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
typedef typename vobj::vector_type vector;
|
||||
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
|
||||
scalar nrm;
|
||||
//FIXME make this loop parallelisable
|
||||
vnrm=zero;
|
||||
for(int ss=0;ss<arg._grid->oSites(); ss++){
|
||||
vnrm = vnrm + innerProduct(arg._odata[ss],arg._odata[ss]);
|
||||
}
|
||||
vector vvnrm =TensorRemove(vnrm) ;
|
||||
nrm = Reduce(vvnrm);
|
||||
arg._grid->GlobalSum(nrm);
|
||||
return real(nrm);
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) ->decltype(innerProduct(left._odata[0],right._odata[0]))
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
|
||||
|
||||
scalar nrm;
|
||||
//FIXME make this loop parallelisable
|
||||
for(int ss=0;ss<left._grid->oSites(); ss++){
|
||||
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
nrm = Reduce(vnrm);
|
||||
right._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
32
lib/Grid_lattice_rng.h
Normal file
32
lib/Grid_lattice_rng.h
Normal file
@ -0,0 +1,32 @@
|
||||
#ifndef GRID_LATTICE_RNG_H
|
||||
#define GRID_LATTICE_RNG_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// FIXME Randomise; deprecate this
|
||||
template <class vobj> inline void random(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t v_len = l._grid->oSites()*sizeof(vobj);
|
||||
size_t d_len = v_len/sizeof(Real);
|
||||
|
||||
for(int i=0;i<d_len;i++){
|
||||
|
||||
v_ptr[i]=drand48();
|
||||
}
|
||||
};
|
||||
|
||||
// FIXME Implement a consistent seed management strategy
|
||||
template <class vobj> inline void gaussian(Lattice<vobj> &l){
|
||||
// Zero mean, unit variance.
|
||||
std::normal_distribution<double> distribution(0.0,1.0);
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t v_len = l._grid->oSites()*sizeof(vobj);
|
||||
size_t d_len = v_len/sizeof(Real);
|
||||
|
||||
for(int i=0;i<d_len;i++){
|
||||
v_ptr[i]= drand48();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
43
lib/Grid_lattice_trace.h
Normal file
43
lib/Grid_lattice_trace.h
Normal file
@ -0,0 +1,43 @@
|
||||
#ifndef GRID_LATTICE_TRACE_H
|
||||
#define GRID_LATTICE_TRACE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Tracing, transposing, peeking, poking
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(trace(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = trace(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace Index level dependent operation
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto traceIndex(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
46
lib/Grid_lattice_transfer.h
Normal file
46
lib/Grid_lattice_transfer.h
Normal file
@ -0,0 +1,46 @@
|
||||
#ifndef GRID_LATTICE_TRANSFER_H
|
||||
#define GRID_LATTICE_TRANSFER_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// remove and insert a half checkerboard
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
||||
half.checkerboard = cb;
|
||||
int ssh=0;
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
int cbos;
|
||||
|
||||
full._grid->oCoorFromOindex(coor,ss);
|
||||
cbos=half._grid->CheckerBoard(coor);
|
||||
|
||||
if (cbos==cb) {
|
||||
|
||||
half._odata[ssh] = full._odata[ss];
|
||||
ssh++;
|
||||
}
|
||||
}
|
||||
}
|
||||
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||
int cb = half.checkerboard;
|
||||
int ssh=0;
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
int cbos;
|
||||
|
||||
full._grid->oCoorFromOindex(coor,ss);
|
||||
cbos=half._grid->CheckerBoard(coor);
|
||||
|
||||
if (cbos==cb) {
|
||||
full._odata[ss]=half._odata[ssh];
|
||||
ssh++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
39
lib/Grid_lattice_transpose.h
Normal file
39
lib/Grid_lattice_transpose.h
Normal file
@ -0,0 +1,39 @@
|
||||
#ifndef GRID_LATTICE_TRANSPOSE_H
|
||||
#define GRID_LATTICE_TRANSPOSE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Transpose
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transpose(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto transposeIndex(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -1,14 +1,14 @@
|
||||
#ifndef GRID_PREDICATED_H
|
||||
#define GRID_PREDICATED_H
|
||||
|
||||
#ifndef GRID_WHERE_H
|
||||
#define GRID_WHERE_H
|
||||
namespace Grid {
|
||||
// Must implement the predicate gating the
|
||||
// Must be able to reduce the predicate down to a single vInteger per site.
|
||||
// Must be able to require the type be iScalar x iScalar x ....
|
||||
// give a GetVtype method in iScalar
|
||||
// and blow away the tensor structures.
|
||||
//
|
||||
template<class vobj>
|
||||
inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
template<class vobj,class iobj>
|
||||
inline void where(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
@ -17,6 +17,7 @@ inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vob
|
||||
GridBase *grid=iftrue._grid;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename iobj::vector_type mask_type;
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
const int words = sizeof(vobj)/sizeof(vector_type);
|
||||
@ -35,7 +36,7 @@ inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vob
|
||||
for(int s=0;s<Nsimd;s++) pointers[s] = & falsevals[s][0];
|
||||
extract(iffalse._odata[ss] ,pointers);
|
||||
|
||||
extract(predicate._odata[ss],mask);
|
||||
extract(TensorRemove(predicate._odata[ss]),mask);
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
if (mask[s]) pointers[s]=&truevals[s][0];
|
||||
@ -46,8 +47,8 @@ inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vob
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> where(const LatticeInteger &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
template<class vobj,class iobj>
|
||||
inline Lattice<vobj> where(const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
@ -58,5 +59,5 @@ inline Lattice<vobj> where(const LatticeInteger &predicate,Lattice<vobj> &iftrue
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user