mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
Global changes to parallel_for structure.
Move the comms flags to more sensible names
This commit is contained in:
@ -39,8 +39,7 @@ namespace Grid {
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
@ -56,8 +55,7 @@ PARALLEL_FOR_LOOP
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
@ -73,8 +71,7 @@ PARALLEL_FOR_LOOP
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
@ -89,8 +86,7 @@ PARALLEL_FOR_LOOP
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
@ -108,8 +104,7 @@ PARALLEL_FOR_LOOP
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
@ -120,8 +115,7 @@ PARALLEL_FOR_LOOP
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
@ -132,8 +126,7 @@ PARALLEL_FOR_LOOP
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs);
|
||||
@ -147,8 +140,7 @@ PARALLEL_FOR_LOOP
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs);
|
||||
@ -166,8 +158,7 @@ PARALLEL_FOR_LOOP
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs,&rhs._odata[ss]);
|
||||
@ -182,8 +173,7 @@ PARALLEL_FOR_LOOP
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs,&rhs._odata[ss]);
|
||||
@ -198,8 +188,7 @@ PARALLEL_FOR_LOOP
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs,&rhs._odata[ss]);
|
||||
@ -213,8 +202,7 @@ PARALLEL_FOR_LOOP
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs,&rhs._odata[ss]);
|
||||
@ -230,8 +218,7 @@ PARALLEL_FOR_LOOP
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
@ -245,8 +232,7 @@ PARALLEL_FOR_LOOP
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
|
@ -121,8 +121,7 @@ public:
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
@ -144,8 +143,7 @@ PARALLEL_FOR_LOOP
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
@ -167,8 +165,7 @@ PARALLEL_FOR_LOOP
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
//vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,eval(ss,expr));
|
||||
@ -191,8 +188,7 @@ PARALLEL_FOR_LOOP
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
@ -213,8 +209,7 @@ PARALLEL_FOR_LOOP
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
@ -235,8 +230,7 @@ PARALLEL_FOR_LOOP
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
vstream(_odata[ss] ,eval(ss,expr));
|
||||
}
|
||||
};
|
||||
@ -258,8 +252,7 @@ PARALLEL_FOR_LOOP
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
}
|
||||
@ -269,8 +262,7 @@ PARALLEL_FOR_LOOP
|
||||
virtual ~Lattice(void) = default;
|
||||
|
||||
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r;
|
||||
}
|
||||
return *this;
|
||||
@ -279,8 +271,7 @@ PARALLEL_FOR_LOOP
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
|
@ -45,90 +45,87 @@ namespace Grid {
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to scalar
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
return ret;
|
||||
{
|
||||
Lattice<vInteger> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare scalar to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
return ret;
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Map to functors
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Less than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Less than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Greater than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
// Less than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Less than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Greater than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// Greater than equal
|
||||
}
|
||||
|
||||
|
||||
// Greater than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
@ -136,38 +133,37 @@ PARALLEL_FOR_LOOP
|
||||
return LSComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
inline Lattice<vInteger> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
inline Lattice<vInteger> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// not equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -34,47 +34,42 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site, reduced locally reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site, reduced locally reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -37,8 +37,7 @@ namespace Grid {
|
||||
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
|
||||
{
|
||||
Lattice<vobj> ret(r._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
vstream(ret._odata[ss], -r._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
@ -74,8 +73,7 @@ PARALLEL_FOR_LOOP
|
||||
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs*rhs._odata[ss];
|
||||
@ -86,8 +84,7 @@ PARALLEL_FOR_LOOP
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
@ -98,11 +95,9 @@ PARALLEL_FOR_LOOP
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs-rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -110,8 +105,7 @@ PARALLEL_FOR_LOOP
|
||||
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]*rhs;
|
||||
@ -122,8 +116,7 @@ PARALLEL_FOR_LOOP
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
@ -134,15 +127,12 @@ PARALLEL_FOR_LOOP
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -44,22 +44,20 @@ namespace Grid {
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
|
||||
ret.checkerboard=lhs.checkerboard;
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
||||
}
|
||||
return ret;
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
|
||||
ret.checkerboard=lhs.checkerboard;
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
||||
}
|
||||
return ret;
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -68,18 +66,16 @@ PARALLEL_FOR_LOOP
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
|
||||
{
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
|
||||
}
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
|
||||
}
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
|
||||
{
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
|
||||
}
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
@ -131,9 +127,6 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
assert( l.checkerboard == l._grid->CheckerBoard(site));
|
||||
|
||||
// FIXME
|
||||
// assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
|
@ -40,8 +40,7 @@ namespace Grid {
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = adj(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
@ -49,13 +48,10 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = conjugate(lhs._odata[ss]);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = conjugate(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -57,8 +57,7 @@ namespace Grid {
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
@ -68,7 +67,7 @@ PARALLEL_FOR_LOOP
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
}
|
||||
|
||||
|
||||
vector_type vvnrm; vvnrm=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
@ -114,18 +113,17 @@ PARALLEL_FOR_LOOP
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
|
||||
vobj vvsum=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
}
|
||||
|
||||
|
||||
vobj vsum=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
|
@ -302,8 +302,7 @@ namespace Grid {
|
||||
int words=sizeof(scalar_object)/sizeof(scalar_type);
|
||||
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<osites;ss++){
|
||||
parallel_for(int ss=0;ss<osites;ss++){
|
||||
|
||||
std::vector<scalar_object> buf(Nsimd);
|
||||
for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times
|
||||
|
@ -42,8 +42,7 @@ namespace Grid {
|
||||
-> Lattice<decltype(trace(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = trace(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
@ -56,8 +55,7 @@ PARALLEL_FOR_LOOP
|
||||
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
|
@ -51,7 +51,7 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
||||
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
||||
half.checkerboard = cb;
|
||||
int ssh=0;
|
||||
//PARALLEL_FOR_LOOP
|
||||
//parallel_for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
int cbos;
|
||||
@ -68,7 +68,7 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
||||
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||
int cb = half.checkerboard;
|
||||
int ssh=0;
|
||||
//PARALLEL_FOR_LOOP
|
||||
//parallel_for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
int cbos;
|
||||
@ -153,8 +153,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]);
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sf=0;sf<fine->oSites();sf++){
|
||||
parallel_for(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
int sc;
|
||||
std::vector<int> coor_c(_ndimension);
|
||||
@ -186,8 +185,7 @@ template<class vobj,class CComplex>
|
||||
|
||||
fine_inner = localInnerProduct(fineX,fineY);
|
||||
blockSum(coarse_inner,fine_inner);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<coarse->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<coarse->oSites();ss++){
|
||||
CoarseInner._odata[ss] = coarse_inner._odata[ss];
|
||||
}
|
||||
}
|
||||
@ -347,8 +345,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
||||
assert(ig->lSites() == og->lSites());
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<ig->lSites();idx++){
|
||||
parallel_for(int idx=0;idx<ig->lSites();idx++){
|
||||
sobj s;
|
||||
ssobj ss;
|
||||
|
||||
@ -386,8 +383,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<lg->lSites();idx++){
|
||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||
sobj s;
|
||||
std::vector<int> lcoor(nl);
|
||||
std::vector<int> hcoor(nh);
|
||||
@ -428,8 +424,7 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
|
||||
}
|
||||
}
|
||||
// the above should guarantee that the operations are local
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<lg->lSites();idx++){
|
||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||
sobj s;
|
||||
std::vector<int> lcoor(nl);
|
||||
std::vector<int> hcoor(nh);
|
||||
@ -468,8 +463,7 @@ void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<lg->lSites();idx++){
|
||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||
sobj s;
|
||||
std::vector<int> lcoor(nl);
|
||||
std::vector<int> hcoor(nh);
|
||||
@ -504,8 +498,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<lg->lSites();idx++){
|
||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||
sobj s;
|
||||
std::vector<int> lcoor(nl);
|
||||
std::vector<int> hcoor(nh);
|
||||
@ -574,8 +567,7 @@ typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>
|
||||
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||
|
||||
@ -623,8 +615,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
std::vector<SobjOut> in_slex_conv(in_grid->lSites());
|
||||
unvectorizeToLexOrdArray(in_slex_conv, in);
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){
|
||||
parallel_for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){
|
||||
std::vector<int> out_ocoor(ndim);
|
||||
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
|
||||
|
||||
@ -642,10 +633,6 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
merge(out._odata[out_oidx], ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -40,27 +40,24 @@ namespace Grid {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transpose(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transpose(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@ -37,8 +37,7 @@ namespace Grid {
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=pow(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
@ -47,8 +46,7 @@ PARALLEL_FOR_LOOP
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=mod(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
@ -58,8 +56,7 @@ PARALLEL_FOR_LOOP
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=div(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
@ -69,8 +66,7 @@ PARALLEL_FOR_LOOP
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp);
|
||||
}
|
||||
return ret;
|
||||
|
@ -56,8 +56,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
||||
std::vector<scalar_object> truevals (Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
||||
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
||||
|
||||
extract(iftrue._odata[ss] ,truevals);
|
||||
extract(iffalse._odata[ss] ,falsevals);
|
||||
|
Reference in New Issue
Block a user