mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Merge branch 'feature/hdcr' into develop
This commit is contained in:
commit
f3a8d039a2
@ -46,16 +46,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template<class vobj,class CComplex>
|
template<class vobj,class CComplex>
|
||||||
inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner1,
|
inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||||
Lattice<CComplex> &CoarseInner2,
|
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask,
|
||||||
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask1,
|
|
||||||
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask2,
|
|
||||||
const Lattice<vobj> &fineX,
|
const Lattice<vobj> &fineX,
|
||||||
const Lattice<vobj> &fineY)
|
const Lattice<vobj> &fineY)
|
||||||
{
|
{
|
||||||
typedef decltype(innerProduct(vobj(),vobj())) dotp;
|
typedef decltype(innerProduct(vobj(),vobj())) dotp;
|
||||||
|
|
||||||
GridBase *coarse(CoarseInner1.Grid());
|
GridBase *coarse(CoarseInner.Grid());
|
||||||
GridBase *fine (fineX.Grid());
|
GridBase *fine (fineX.Grid());
|
||||||
|
|
||||||
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
|
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
|
||||||
@ -64,12 +62,8 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner1,
|
|||||||
// Multiply could be fused with innerProduct
|
// Multiply could be fused with innerProduct
|
||||||
// Single block sum kernel could do both masks.
|
// Single block sum kernel could do both masks.
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
|
mult(fine_inner_msk, fine_inner,FineMask);
|
||||||
mult(fine_inner_msk, fine_inner,FineMask1);
|
blockSum(CoarseInner,fine_inner_msk);
|
||||||
blockSum(CoarseInner1,fine_inner_msk);
|
|
||||||
|
|
||||||
mult(fine_inner_msk, fine_inner,FineMask2);
|
|
||||||
blockSum(CoarseInner2,fine_inner_msk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -794,7 +788,7 @@ public:
|
|||||||
|
|
||||||
Lattice<iScalar<vInteger> > coor (FineGrid);
|
Lattice<iScalar<vInteger> > coor (FineGrid);
|
||||||
Lattice<iScalar<vInteger> > bcoor(FineGrid);
|
Lattice<iScalar<vInteger> > bcoor(FineGrid);
|
||||||
Lattice<iScalar<vInteger> > bcb (FineGrid);
|
Lattice<iScalar<vInteger> > bcb (FineGrid); bcb = Zero();
|
||||||
|
|
||||||
CoarseVector iProj(Grid());
|
CoarseVector iProj(Grid());
|
||||||
CoarseVector oProj(Grid());
|
CoarseVector oProj(Grid());
|
||||||
@ -868,7 +862,7 @@ public:
|
|||||||
|
|
||||||
for(int j=0;j<nbasis;j++){
|
for(int j=0;j<nbasis;j++){
|
||||||
|
|
||||||
blockMaskedInnerProduct(iZProj,oZProj,imask,omask,Subspace.subspace[j],Mphi);
|
blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi);
|
||||||
|
|
||||||
auto iZProj_v = iZProj.View() ;
|
auto iZProj_v = iZProj.View() ;
|
||||||
auto oZProj_v = oZProj.View() ;
|
auto oZProj_v = oZProj.View() ;
|
||||||
@ -876,6 +870,8 @@ public:
|
|||||||
auto A_self = A[self_stencil].View();
|
auto A_self = A[self_stencil].View();
|
||||||
|
|
||||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
||||||
|
// if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });}
|
||||||
|
// accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); });
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -886,9 +882,8 @@ public:
|
|||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
{
|
{
|
||||||
mult(tmp,phi,evenmask); linop.Op(tmp,Mphie);
|
mult(tmp,phi,evenmask); linop.Op(tmp,Mphie);
|
||||||
mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio);
|
mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio);
|
||||||
|
|
||||||
// tmp = Mphie*evenmask + Mphio*oddmask;
|
|
||||||
{
|
{
|
||||||
auto tmp_ = tmp.View();
|
auto tmp_ = tmp.View();
|
||||||
auto evenmask_ = evenmask.View();
|
auto evenmask_ = evenmask.View();
|
||||||
@ -904,15 +899,17 @@ public:
|
|||||||
|
|
||||||
auto SelfProj_ = SelfProj.View();
|
auto SelfProj_ = SelfProj.View();
|
||||||
auto A_self = A[self_stencil].View();
|
auto A_self = A[self_stencil].View();
|
||||||
|
|
||||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
|
||||||
for(int j=0;j<nbasis;j++){
|
for(int j=0;j<nbasis;j++){
|
||||||
coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
|
coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(hermitian) {
|
if(hermitian) {
|
||||||
std::cout << GridLogMessage << " ForceHermitian "<<std::endl;
|
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
|
||||||
ForceHermitian();
|
ForceHermitian();
|
||||||
}
|
}
|
||||||
// AssertHermitian();
|
// AssertHermitian();
|
||||||
|
@ -336,7 +336,7 @@ public:
|
|||||||
};
|
};
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
|
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||||
|
@ -59,16 +59,15 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
|
|||||||
{
|
{
|
||||||
typedef decltype(basis[0].View()) View;
|
typedef decltype(basis[0].View()) View;
|
||||||
auto tmp_v = basis[0].View();
|
auto tmp_v = basis[0].View();
|
||||||
std::vector<View> basis_v(basis.size(),tmp_v);
|
Vector<View> basis_v(basis.size(),tmp_v);
|
||||||
typedef typename Field::vector_object vobj;
|
typedef typename Field::vector_object vobj;
|
||||||
GridBase* grid = basis[0].Grid();
|
GridBase* grid = basis[0].Grid();
|
||||||
|
|
||||||
for(int k=0;k<basis.size();k++){
|
for(int k=0;k<basis.size();k++){
|
||||||
basis_v[k] = basis[k].View();
|
basis_v[k] = basis[k].View();
|
||||||
}
|
}
|
||||||
|
#if 0
|
||||||
std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private
|
std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private
|
||||||
|
|
||||||
thread_region
|
thread_region
|
||||||
{
|
{
|
||||||
vobj* B = Bt.data() + Nm * thread_num();
|
vobj* B = Bt.data() + Nm * thread_num();
|
||||||
@ -86,24 +85,89 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
int nrot = j1-j0;
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t oSites =grid->oSites();
|
||||||
|
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
|
||||||
|
|
||||||
|
// printf("BasisRotate %d %d nrot %d siteBlock %d\n",j0,j1,nrot,siteBlock);
|
||||||
|
|
||||||
|
Vector <vobj> Bt(siteBlock * nrot);
|
||||||
|
auto Bp=&Bt[0];
|
||||||
|
|
||||||
|
// GPU readable copy of Eigen matrix
|
||||||
|
Vector<double> Qt_jv(Nm*Nm);
|
||||||
|
double *Qt_p = & Qt_jv[0];
|
||||||
|
for(int k=0;k<Nm;++k){
|
||||||
|
for(int j=0;j<Nm;++j){
|
||||||
|
Qt_p[j*Nm+k]=Qt(j,k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Block the loop to keep storage footprint down
|
||||||
|
vobj zz=Zero();
|
||||||
|
for(uint64_t s=0;s<oSites;s+=siteBlock){
|
||||||
|
|
||||||
|
// remaining work in this block
|
||||||
|
int ssites=MIN(siteBlock,oSites-s);
|
||||||
|
|
||||||
|
// zero out the accumulators
|
||||||
|
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
|
||||||
|
auto z=coalescedRead(zz);
|
||||||
|
coalescedWrite(Bp[ss],z);
|
||||||
|
});
|
||||||
|
|
||||||
|
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||||
|
|
||||||
|
int j =sj%nrot;
|
||||||
|
int jj =j0+j;
|
||||||
|
int ss =sj/nrot;
|
||||||
|
int sss=ss+s;
|
||||||
|
|
||||||
|
for(int k=k0; k<k1; ++k){
|
||||||
|
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
||||||
|
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||||
|
int j =sj%nrot;
|
||||||
|
int jj =j0+j;
|
||||||
|
int ss =sj/nrot;
|
||||||
|
int sss=ss+s;
|
||||||
|
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract a single rotated vector
|
// Extract a single rotated vector
|
||||||
template<class Field>
|
template<class Field>
|
||||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||||
{
|
{
|
||||||
|
typedef decltype(basis[0].View()) View;
|
||||||
typedef typename Field::vector_object vobj;
|
typedef typename Field::vector_object vobj;
|
||||||
GridBase* grid = basis[0].Grid();
|
GridBase* grid = basis[0].Grid();
|
||||||
|
|
||||||
result.Checkerboard() = basis[0].Checkerboard();
|
result.Checkerboard() = basis[0].Checkerboard();
|
||||||
auto result_v=result.View();
|
auto result_v=result.View();
|
||||||
thread_for(ss, grid->oSites(),{
|
Vector<View> basis_v(basis.size(),result_v);
|
||||||
vobj B = Zero();
|
for(int k=0;k<basis.size();k++){
|
||||||
|
basis_v[k] = basis[k].View();
|
||||||
|
}
|
||||||
|
vobj zz=Zero();
|
||||||
|
Vector<double> Qt_jv(Nm);
|
||||||
|
double * Qt_j = & Qt_jv[0];
|
||||||
|
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
|
||||||
|
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
||||||
|
auto B=coalescedRead(zz);
|
||||||
for(int k=k0; k<k1; ++k){
|
for(int k=k0; k<k1; ++k){
|
||||||
auto basis_k = basis[k].View();
|
B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
|
||||||
B +=Qt(j,k) * basis_k[ss];
|
|
||||||
}
|
}
|
||||||
result_v[ss] = B;
|
coalescedWrite(result_v[ss], B);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,7 +367,7 @@ public:
|
|||||||
RealD _eresid, // resid in lmdue deficit
|
RealD _eresid, // resid in lmdue deficit
|
||||||
int _MaxIter, // Max iterations
|
int _MaxIter, // Max iterations
|
||||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||||
int _MinRestart=1, int _orth_period = 1,
|
int _MinRestart=0, int _orth_period = 1,
|
||||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
|
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
|
||||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||||
@ -377,14 +441,17 @@ until convergence
|
|||||||
{
|
{
|
||||||
auto src_n = src;
|
auto src_n = src;
|
||||||
auto tmp = src;
|
auto tmp = src;
|
||||||
|
std::cout << GridLogIRL << " IRL source norm " << norm2(src) << std::endl;
|
||||||
const int _MAX_ITER_IRL_MEVAPP_ = 50;
|
const int _MAX_ITER_IRL_MEVAPP_ = 50;
|
||||||
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
|
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
|
||||||
normalise(src_n);
|
normalise(src_n);
|
||||||
_HermOp(src_n,tmp);
|
_HermOp(src_n,tmp);
|
||||||
|
// std::cout << GridLogMessage<< tmp<<std::endl; exit(0);
|
||||||
|
// std::cout << GridLogIRL << " _HermOp " << norm2(tmp) << std::endl;
|
||||||
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||||
RealD vden = norm2(src_n);
|
RealD vden = norm2(src_n);
|
||||||
RealD na = vnum/vden;
|
RealD na = vnum/vden;
|
||||||
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
|
if (fabs(evalMaxApprox/na - 1.0) < 0.0001)
|
||||||
i=_MAX_ITER_IRL_MEVAPP_;
|
i=_MAX_ITER_IRL_MEVAPP_;
|
||||||
evalMaxApprox = na;
|
evalMaxApprox = na;
|
||||||
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||||
|
@ -60,5 +60,53 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class Field> class HPDSolver {
|
||||||
|
private:
|
||||||
|
LinearOperatorBase<Field> & _Matrix;
|
||||||
|
OperatorFunction<Field> & _HermitianSolver;
|
||||||
|
LinearFunction<Field> & _Guess;
|
||||||
|
public:
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
HPDSolver(LinearOperatorBase<Field> &Matrix,
|
||||||
|
OperatorFunction<Field> &HermitianSolver,
|
||||||
|
LinearFunction<Field> &Guess)
|
||||||
|
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out){
|
||||||
|
|
||||||
|
_Guess(in,out);
|
||||||
|
_HermitianSolver(_Matrix,in,out); // Mdag M out = Mdag in
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<class Field> class MdagMSolver {
|
||||||
|
private:
|
||||||
|
SparseMatrixBase<Field> & _Matrix;
|
||||||
|
OperatorFunction<Field> & _HermitianSolver;
|
||||||
|
LinearFunction<Field> & _Guess;
|
||||||
|
public:
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
MdagMSolver(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
|
||||||
|
LinearFunction<Field> &Guess)
|
||||||
|
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out){
|
||||||
|
|
||||||
|
MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
|
||||||
|
_Guess(in,out);
|
||||||
|
|
||||||
|
_HermitianSolver(MdagMOp,in,out); // Mdag M out = Mdag in
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
#endif
|
#endif
|
||||||
|
@ -118,7 +118,7 @@ public:
|
|||||||
|
|
||||||
}
|
}
|
||||||
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||||
assert(0);
|
// assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
|
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
|
||||||
|
@ -47,20 +47,19 @@ public:
|
|||||||
// Give Lattice access
|
// Give Lattice access
|
||||||
template<class object> friend class Lattice;
|
template<class object> friend class Lattice;
|
||||||
|
|
||||||
GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) {};
|
GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) { LocallyPeriodic=0;};
|
||||||
|
|
||||||
GridBase(const Coordinate & processor_grid,
|
GridBase(const Coordinate & processor_grid,
|
||||||
const CartesianCommunicator &parent,
|
const CartesianCommunicator &parent,
|
||||||
int &split_rank)
|
int &split_rank)
|
||||||
: CartesianCommunicator(processor_grid,parent,split_rank) {};
|
: CartesianCommunicator(processor_grid,parent,split_rank) {LocallyPeriodic=0;};
|
||||||
|
|
||||||
GridBase(const Coordinate & processor_grid,
|
GridBase(const Coordinate & processor_grid,
|
||||||
const CartesianCommunicator &parent)
|
const CartesianCommunicator &parent)
|
||||||
: CartesianCommunicator(processor_grid,parent,dummy) {};
|
: CartesianCommunicator(processor_grid,parent,dummy) {LocallyPeriodic=0;};
|
||||||
|
|
||||||
virtual ~GridBase() = default;
|
virtual ~GridBase() = default;
|
||||||
|
|
||||||
|
|
||||||
// Physics Grid information.
|
// Physics Grid information.
|
||||||
Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes.
|
Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes.
|
||||||
Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal
|
Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal
|
||||||
@ -80,7 +79,8 @@ public:
|
|||||||
Coordinate _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
|
Coordinate _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
|
||||||
Coordinate _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
|
Coordinate _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
|
||||||
|
|
||||||
bool _isCheckerBoarded;
|
bool _isCheckerBoarded;
|
||||||
|
int LocallyPeriodic;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -173,6 +173,7 @@ public:
|
|||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
typedef typename vobj::scalar_object scalar_object;
|
||||||
typedef vobj vector_object;
|
typedef vobj vector_object;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -156,7 +156,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
|||||||
// Peek a scalar object from the SIMD array
|
// Peek a scalar object from the SIMD array
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
template<class vobj,class sobj>
|
template<class vobj,class sobj>
|
||||||
void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
|
accelerator_inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
|
||||||
|
|
||||||
GridBase *grid = l.Grid();
|
GridBase *grid = l.Grid();
|
||||||
|
|
||||||
@ -185,7 +185,7 @@ void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<class vobj,class sobj>
|
template<class vobj,class sobj>
|
||||||
void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
|
accelerator_inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
|
||||||
|
|
||||||
GridBase *grid=l.Grid();
|
GridBase *grid=l.Grid();
|
||||||
|
|
||||||
|
@ -439,6 +439,67 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate FromLowerLeft, Coordinate ToLowerLeft, Coordinate RegionSize)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
|
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
|
GridBase *Fg = From.Grid();
|
||||||
|
GridBase *Tg = To.Grid();
|
||||||
|
assert(!Fg->_isCheckerBoarded);
|
||||||
|
assert(!Tg->_isCheckerBoarded);
|
||||||
|
int Nsimd = Fg->Nsimd();
|
||||||
|
int nF = Fg->_ndimension;
|
||||||
|
int nT = Tg->_ndimension;
|
||||||
|
int nd = nF;
|
||||||
|
assert(nF == nT);
|
||||||
|
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
assert(Fg->_processors[d] == Tg->_processors[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the above should guarantee that the operations are local
|
||||||
|
Coordinate ldf = Fg->_ldimensions;
|
||||||
|
Coordinate rdf = Fg->_rdimensions;
|
||||||
|
Coordinate isf = Fg->_istride;
|
||||||
|
Coordinate osf = Fg->_ostride;
|
||||||
|
Coordinate rdt = Tg->_rdimensions;
|
||||||
|
Coordinate ist = Tg->_istride;
|
||||||
|
Coordinate ost = Tg->_ostride;
|
||||||
|
auto t_v = To.View();
|
||||||
|
auto f_v = From.View();
|
||||||
|
accelerator_for(idx,Fg->lSites(),1,{
|
||||||
|
sobj s;
|
||||||
|
Coordinate Fcoor(nd);
|
||||||
|
Coordinate Tcoor(nd);
|
||||||
|
Lexicographic::CoorFromIndex(Fcoor,idx,ldf);
|
||||||
|
int in_region=1;
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
if ( (Fcoor[d] < FromLowerLeft[d]) || (Fcoor[d]>=FromLowerLeft[d]+RegionSize[d]) ){
|
||||||
|
in_region=0;
|
||||||
|
}
|
||||||
|
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
|
||||||
|
}
|
||||||
|
if (in_region) {
|
||||||
|
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
|
||||||
|
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
|
||||||
|
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
|
||||||
|
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
|
||||||
|
scalar_type * fp = (scalar_type *)&f_v[odx_f];
|
||||||
|
scalar_type * tp = (scalar_type *)&t_v[odx_t];
|
||||||
|
for(int w=0;w<words;w++){
|
||||||
|
tp[idx_t+w*Nsimd] = fp[idx_f+w*Nsimd]; // FIXME IF RRII layout, type pun no worke
|
||||||
|
}
|
||||||
|
// peekLocalSite(s,From,Fcoor);
|
||||||
|
// pokeLocalSite(s,To ,Tcoor);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
|
void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user