1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Build reorg with which I am a bit happier

This commit is contained in:
Peter Boyle
2015-04-18 21:22:50 +01:00
parent a17ce0695b
commit aee6669d0b
41 changed files with 0 additions and 0 deletions

View File

@ -0,0 +1,160 @@
#ifndef GRID_LATTICE_ARITH_H
#define GRID_LATTICE_ARITH_H
namespace Grid {
template<class vobj>
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
{
Lattice<vobj> ret(r._grid);
#pragma omp parallel for
for(int ss=0;ss<r._grid->oSites();ss++){
ret._odata[ss]= -r._odata[ss];
}
return ret;
}
template<class vobj>
inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class vobj>
inline void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
}
}
// Lattice BinOp Lattice,
template<class left,class right>
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
{
//NB mult performs conformable check. Do not reapply here for performance.
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
mult(ret,lhs,rhs);
return ret;
}
template<class left,class right>
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
{
//NB mult performs conformable check. Do not reapply here for performance.
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
add(ret,lhs,rhs);
return ret;
}
template<class left,class right>
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
{
//NB mult performs conformable check. Do not reapply here for performance.
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
sub(ret,lhs,rhs);
return ret;
}
// Scalar BinOp Lattice ;generate return type
template<class left,class right>
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
{
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs*rhs._odata[ss];
}
return ret;
}
template<class left,class right>
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
{
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs+rhs._odata[ss];
}
return ret;
}
template<class left,class right>
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
{
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs-rhs._odata[ss];
}
return ret;
}
template<class left,class right>
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
{
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites(); ss++){
ret._odata[ss]=lhs._odata[ss]*rhs;
}
return ret;
}
template<class left,class right>
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
{
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs._odata[ss]+rhs;
}
return ret;
}
template<class left,class right>
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
{
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs._odata[ss]-rhs;
}
return ret;
}
}
#endif

View File

@ -0,0 +1,14 @@
#ifndef GRID_LATTICE_CONFORMABLE_H
#define GRID_LATTICE_CONFORMABLE_H
namespace Grid {
template<class obj1,class obj2>
void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
{
assert(lhs._grid == rhs._grid);
assert(lhs.checkerboard == rhs.checkerboard);
}
}
#endif

View File

@ -0,0 +1,45 @@
#ifndef GRID_LATTICE_COORDINATE_H
#define GRID_LATTICE_COORDINATE_H
namespace Grid {
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
{
GridBase *grid = l._grid;
int Nsimd = grid->iSites();
std::vector<int> gcoor;
std::vector<Integer> mergebuf(Nsimd);
std::vector<Integer *> mergeptr(Nsimd);
vInteger vI;
for(int o=0;o<grid->oSites();o++){
for(int i=0;i<grid->iSites();i++){
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
// grid->RankIndexToGlobalCoor(0,o,i,gcoor);
mergebuf[i]=gcoor[mu];
mergeptr[i]=&mergebuf[i];
}
merge(vI,mergeptr);
l._odata[o]=vI;
}
};
// LatticeCoordinate();
// FIXME for debug; deprecate this; made obscelete by
template<class vobj> void lex_sites(Lattice<vobj> &l){
Real *v_ptr = (Real *)&l._odata[0];
size_t o_len = l._grid->oSites();
size_t v_len = sizeof(vobj)/sizeof(vRealF);
size_t vec_len = vRealF::Nsimd();
for(int i=0;i<o_len;i++){
for(int j=0;j<v_len;j++){
for(int vv=0;vv<vec_len;vv+=2){
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
}
}}
}
}
#endif

View File

@ -0,0 +1,54 @@
#ifndef GRID_LATTICE_LOCALREDUCTION_H
#define GRID_LATTICE_LOCALREDUCTION_H
///////////////////////////////////////////////
// localInner, localNorm, outerProduct
///////////////////////////////////////////////
namespace Grid {
/////////////////////////////////////////////////////
// Non site, reduced locally reduced routines
/////////////////////////////////////////////////////
// localNorm2,
template<class vobj>
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
// localInnerProduct
template<class vobj>
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
-> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
// outerProduct Scalar x Scalar -> Scalar
// Vector x Vector -> Matrix
template<class ll,class rr>
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
{
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
}
#endif

View File

@ -0,0 +1,143 @@
#ifndef GRID_LATTICE_PEEK_H
#define GRID_LATTICE_PEEK_H
///////////////////////////////////////////////
// Peeking and poking around
///////////////////////////////////////////////
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////////////
// Peek internal indices of a Lattice object
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
inline auto peekIndex(const Lattice<vobj> &lhs)
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
}
return ret;
};
template<int Index,class vobj>
inline auto peekIndex(const Lattice<vobj> &lhs,int i)
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
}
return ret;
};
template<int Index,class vobj>
inline auto peekIndex(const Lattice<vobj> &lhs,int i,int j)
-> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
}
return ret;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Poke internal indices of a Lattice object
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj> inline
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> & rhs)
{
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss]);
}
}
template<int Index,class vobj> inline
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
{
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
}
}
template<int Index,class vobj> inline
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
{
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
}
}
//////////////////////////////////////////////////////
// Poke a scalar object into the SIMD array
//////////////////////////////////////////////////////
template<class vobj,class sobj>
void pokeSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
GridBase *grid=l._grid;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nsimd = grid->Nsimd();
assert( l.checkerboard== l._grid->CheckerBoard(site));
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
int rank,odx,idx;
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
// Optional to broadcast from node 0.
grid->Broadcast(0,s);
std::vector<sobj> buf(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
// extract-modify-merge cycle is easiest way and this is not perf critical
extract(l._odata[odx],pointers);
buf[idx] = s;
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
merge(l._odata[odx],pointers);
return;
};
//////////////////////////////////////////////////////////
// Peek a scalar object from the SIMD array
//////////////////////////////////////////////////////////
template<class vobj,class sobj>
void peekSite(sobj &s,Lattice<vobj> &l,std::vector<int> &site){
GridBase *grid=l._grid;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nsimd = grid->Nsimd();
assert( l.checkerboard== l._grid->CheckerBoard(site));
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
int rank,odx,idx;
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
std::vector<sobj> buf(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
extract(l._odata[odx],pointers);
s = buf[idx];
grid->Broadcast(rank,s);
return;
};
}
#endif

View File

@ -0,0 +1,52 @@
#ifndef GRID_LATTICE_REALITY_H
#define GRID_LATTICE_REALITY_H
// FIXME .. this is the sector of the code
// I am most worried about the directions
// The choice of burying complex in the SIMD
// is making the use of "real" and "imag" very cumbersome
namespace Grid {
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = adj(lhs._odata[ss]);
}
return ret;
};
template<class vobj> inline Lattice<vobj> conj(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = conj(lhs._odata[ss]);
}
return ret;
};
template<class vobj> inline auto real(const Lattice<vobj> &z) -> Lattice<decltype(real(z._odata[0]))>
{
Lattice<decltype(real(z._odata[0]))> ret(z._grid);
#pragma omp parallel for
for(int ss=0;ss<z._grid->oSites();ss++){
ret._odata[ss] = real(z._odata[ss]);
}
return ret;
}
template<class vobj> inline auto imag(const Lattice<vobj> &z) -> Lattice<decltype(imag(z._odata[0]))>
{
Lattice<decltype(imag(z._odata[0]))> ret(z._grid);
#pragma omp parallel for
for(int ss=0;ss<z._grid->oSites();ss++){
ret._odata[ss] = imag(z._odata[ss]);
}
return ret;
}
}
#endif

View File

@ -0,0 +1,45 @@
#ifndef GRID_LATTICE_REDUCTION_H
#define GRID_LATTICE_REDUCTION_H
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////////////
// Reduction operations
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj>
inline RealD norm2(const Lattice<vobj> &arg){
typedef typename vobj::scalar_type scalar;
typedef typename vobj::vector_type vector;
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
scalar nrm;
//FIXME make this loop parallelisable
vnrm=zero;
for(int ss=0;ss<arg._grid->oSites(); ss++){
vnrm = vnrm + innerProduct(arg._odata[ss],arg._odata[ss]);
}
vector vvnrm =TensorRemove(vnrm) ;
nrm = Reduce(vvnrm);
arg._grid->GlobalSum(nrm);
return real(nrm);
}
template<class vobj>
inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) ->decltype(innerProduct(left._odata[0],right._odata[0]))
{
typedef typename vobj::scalar_type scalar;
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
scalar nrm;
//FIXME make this loop parallelisable
for(int ss=0;ss<left._grid->oSites(); ss++){
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
}
nrm = Reduce(vnrm);
right._grid->GlobalSum(nrm);
return nrm;
}
}
#endif

View File

@ -0,0 +1,32 @@
#ifndef GRID_LATTICE_RNG_H
#define GRID_LATTICE_RNG_H
namespace Grid {
// FIXME Randomise; deprecate this
template <class vobj> inline void random(Lattice<vobj> &l){
Real *v_ptr = (Real *)&l._odata[0];
size_t v_len = l._grid->oSites()*sizeof(vobj);
size_t d_len = v_len/sizeof(Real);
for(int i=0;i<d_len;i++){
v_ptr[i]=drand48();
}
};
// FIXME Implement a consistent seed management strategy
template <class vobj> inline void gaussian(Lattice<vobj> &l){
// Zero mean, unit variance.
std::normal_distribution<double> distribution(0.0,1.0);
Real *v_ptr = (Real *)&l._odata[0];
size_t v_len = l._grid->oSites()*sizeof(vobj);
size_t d_len = v_len/sizeof(Real);
for(int i=0;i<d_len;i++){
v_ptr[i]= drand48();
}
};
}
#endif

View File

@ -0,0 +1,43 @@
#ifndef GRID_LATTICE_TRACE_H
#define GRID_LATTICE_TRACE_H
///////////////////////////////////////////////
// Tracing, transposing, peeking, poking
///////////////////////////////////////////////
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////////////
// Trace
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj>
inline auto trace(const Lattice<vobj> &lhs)
-> Lattice<decltype(trace(lhs._odata[0]))>
{
Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = trace(lhs._odata[ss]);
}
return ret;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Trace Index level dependent operation
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
inline auto traceIndex(const Lattice<vobj> &lhs)
-> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
}
return ret;
};
}
#endif

View File

@ -0,0 +1,46 @@
#ifndef GRID_LATTICE_TRANSFER_H
#define GRID_LATTICE_TRANSFER_H
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////
// remove and insert a half checkerboard
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
half.checkerboard = cb;
int ssh=0;
#pragma omp parallel for
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
full._grid->oCoorFromOindex(coor,ss);
cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) {
half._odata[ssh] = full._odata[ss];
ssh++;
}
}
}
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
int cb = half.checkerboard;
int ssh=0;
#pragma omp parallel for
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
full._grid->oCoorFromOindex(coor,ss);
cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) {
full._odata[ss]=half._odata[ssh];
ssh++;
}
}
}
}
#endif

View File

@ -0,0 +1,39 @@
#ifndef GRID_LATTICE_TRANSPOSE_H
#define GRID_LATTICE_TRANSPOSE_H
///////////////////////////////////////////////
// Transpose
///////////////////////////////////////////////
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////////////
// Transpose
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj>
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = transpose(lhs._odata[ss]);
}
return ret;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Index level dependent transpose
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
inline auto transposeIndex(const Lattice<vobj> &lhs)
-> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
}
return ret;
};
}
#endif