mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-11 14:40:46 +01:00
Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet
This commit is contained in:
commit
7db8dd7a95
@ -81,6 +81,7 @@ public:
|
|||||||
using OperatorFunction<FieldD>::operator();
|
using OperatorFunction<FieldD>::operator();
|
||||||
|
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
|
Integer MaxIterationsMshift;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||||
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||||
@ -95,9 +96,9 @@ public:
|
|||||||
|
|
||||||
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
|
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
|
||||||
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
||||||
int _ReliableUpdateFreq
|
int _ReliableUpdateFreq) :
|
||||||
) :
|
MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq),
|
||||||
MaxIterations(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq)
|
MaxIterations(20000)
|
||||||
{
|
{
|
||||||
verbose=1;
|
verbose=1;
|
||||||
IterationsToCompleteShift.resize(_shifts.order);
|
IterationsToCompleteShift.resize(_shifts.order);
|
||||||
@ -244,7 +245,7 @@ public:
|
|||||||
// Iteration loop
|
// Iteration loop
|
||||||
int k;
|
int k;
|
||||||
|
|
||||||
for (k=1;k<=MaxIterations;k++){
|
for (k=1;k<=MaxIterationsMshift;k++){
|
||||||
|
|
||||||
a = c /cp;
|
a = c /cp;
|
||||||
AXPYTimer.Start();
|
AXPYTimer.Start();
|
||||||
@ -350,12 +351,17 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( all_converged ){
|
if ( all_converged || k == MaxIterationsMshift-1){
|
||||||
|
|
||||||
SolverTimer.Stop();
|
SolverTimer.Stop();
|
||||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
|
if ( all_converged ){
|
||||||
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Not all shifts have converged iteration "<<k<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
// Check answers
|
// Check answers
|
||||||
for(int s=0; s < nshift; s++) {
|
for(int s=0; s < nshift; s++) {
|
||||||
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
|
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
|
||||||
@ -396,12 +402,10 @@ public:
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
// ugly hack
|
|
||||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||||
// assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -29,6 +29,7 @@ Author: Christoph Lehner <christoph@lhnr.de>
|
|||||||
|
|
||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
#include <pwd.h>
|
#include <pwd.h>
|
||||||
|
#include <syscall.h>
|
||||||
|
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
#include <cuda_runtime_api.h>
|
#include <cuda_runtime_api.h>
|
||||||
|
@ -484,24 +484,26 @@ public:
|
|||||||
|
|
||||||
int dag = compress.dag;
|
int dag = compress.dag;
|
||||||
int face_idx=0;
|
int face_idx=0;
|
||||||
|
#define vet_same_node(a,b) \
|
||||||
|
{ auto tmp = b; }
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx));
|
vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XpCompress,Xp,face_idx));
|
||||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx));
|
vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YpCompress,Yp,face_idx));
|
||||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
|
vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
|
||||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TpCompress,Tp,face_idx));
|
vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TpCompress,Tp,face_idx));
|
||||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XmCompress,Xm,face_idx));
|
vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XmCompress,Xm,face_idx));
|
||||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YmCompress,Ym,face_idx));
|
vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YmCompress,Ym,face_idx));
|
||||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
|
vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
|
||||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TmCompress,Tm,face_idx));
|
vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TmCompress,Tm,face_idx));
|
||||||
} else {
|
} else {
|
||||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XmCompress,Xp,face_idx));
|
vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XmCompress,Xp,face_idx));
|
||||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YmCompress,Yp,face_idx));
|
vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YmCompress,Yp,face_idx));
|
||||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
|
vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
|
||||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TmCompress,Tp,face_idx));
|
vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TmCompress,Tp,face_idx));
|
||||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XpCompress,Xm,face_idx));
|
vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XpCompress,Xm,face_idx));
|
||||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YpCompress,Ym,face_idx));
|
vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YpCompress,Ym,face_idx));
|
||||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
|
vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
|
||||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TpCompress,Tm,face_idx));
|
vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TpCompress,Tm,face_idx));
|
||||||
}
|
}
|
||||||
this->face_table_computed=1;
|
this->face_table_computed=1;
|
||||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||||
|
@ -52,13 +52,6 @@ public:
|
|||||||
typedef AcceleratorVector<int,STENCIL_MAX> StencilVector;
|
typedef AcceleratorVector<int,STENCIL_MAX> StencilVector;
|
||||||
public:
|
public:
|
||||||
|
|
||||||
#ifdef GRID_SYCL
|
|
||||||
#define SYCL_HACK
|
|
||||||
#endif
|
|
||||||
#ifdef SYCL_HACK
|
|
||||||
static void HandDhopSiteSycl(StencilVector st_perm,StencilEntry *st_p, SiteDoubledGaugeField *U,SiteHalfSpinor *buf,
|
|
||||||
int ss,int sU,const SiteSpinor *in, SiteSpinor *out);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
|
@ -63,6 +63,10 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
_tmp(&FiveDimRedBlackGrid),
|
_tmp(&FiveDimRedBlackGrid),
|
||||||
Dirichlet(0)
|
Dirichlet(0)
|
||||||
{
|
{
|
||||||
|
Stencil.lo = &Lebesgue;
|
||||||
|
StencilEven.lo = &LebesgueEvenOdd;
|
||||||
|
StencilOdd.lo = &LebesgueEvenOdd;
|
||||||
|
|
||||||
// some assertions
|
// some assertions
|
||||||
assert(FiveDimGrid._ndimension==5);
|
assert(FiveDimGrid._ndimension==5);
|
||||||
assert(FourDimGrid._ndimension==4);
|
assert(FourDimGrid._ndimension==4);
|
||||||
|
@ -60,6 +60,9 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
|||||||
_tmp(&Hgrid),
|
_tmp(&Hgrid),
|
||||||
anisotropyCoeff(anis)
|
anisotropyCoeff(anis)
|
||||||
{
|
{
|
||||||
|
Stencil.lo = &Lebesgue;
|
||||||
|
StencilEven.lo = &LebesgueEvenOdd;
|
||||||
|
StencilOdd.lo = &LebesgueEvenOdd;
|
||||||
// Allocate the required comms buffer
|
// Allocate the required comms buffer
|
||||||
ImportGauge(_Umu);
|
ImportGauge(_Umu);
|
||||||
if (anisotropyCoeff.isAnisotropic){
|
if (anisotropyCoeff.isAnisotropic){
|
||||||
|
@ -433,11 +433,23 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
});
|
});
|
||||||
|
|
||||||
#define ASM_CALL(A) \
|
#define ASM_CALL(A) \
|
||||||
thread_for( ss, Nsite, { \
|
thread_for( sss, Nsite, { \
|
||||||
|
int ss = st.lo->Reorder(sss); \
|
||||||
int sU = ss; \
|
int sU = ss; \
|
||||||
int sF = ss*Ls; \
|
int sF = ss*Ls; \
|
||||||
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
|
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
|
||||||
});
|
});
|
||||||
|
#define ASM_CALL_SLICE(A) \
|
||||||
|
auto grid = in.Grid() ; \
|
||||||
|
int nt = grid->LocalDimensions()[4]; \
|
||||||
|
int nxyz = Nsite/nt ; \
|
||||||
|
for(int t=0;t<nt;t++){ \
|
||||||
|
thread_for( sss, nxyz, { \
|
||||||
|
int ss = t*nxyz+sss; \
|
||||||
|
int sU = ss; \
|
||||||
|
int sF = ss*Ls; \
|
||||||
|
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
|
||||||
|
});}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
@ -127,6 +127,8 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
ApproxNegPowerAction.tolerances[i] = action_tolerance[i];
|
ApproxNegPowerAction.tolerances[i] = action_tolerance[i];
|
||||||
ApproxHalfPowerAction.tolerances[i] = action_tolerance[i];
|
ApproxHalfPowerAction.tolerances[i] = action_tolerance[i];
|
||||||
ApproxNegHalfPowerAction.tolerances[i]= action_tolerance[i];
|
ApproxNegHalfPowerAction.tolerances[i]= action_tolerance[i];
|
||||||
|
}
|
||||||
|
for(int i=0;i<ApproxPowerMD.tolerances.size();i++){
|
||||||
ApproxPowerMD.tolerances[i] = md_tolerance[i];
|
ApproxPowerMD.tolerances[i] = md_tolerance[i];
|
||||||
ApproxNegPowerMD.tolerances[i] = md_tolerance[i];
|
ApproxNegPowerMD.tolerances[i] = md_tolerance[i];
|
||||||
ApproxHalfPowerMD.tolerances[i] = md_tolerance[i];
|
ApproxHalfPowerMD.tolerances[i] = md_tolerance[i];
|
||||||
|
@ -29,6 +29,8 @@
|
|||||||
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
||||||
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
||||||
|
|
||||||
|
#include <Grid/algorithms/iterative/ConjugateGradientMultiShiftCleanup.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -58,7 +60,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
//Allow derived classes to override the multishift CG
|
//Allow derived classes to override the multishift CG
|
||||||
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
|
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
|
||||||
#if 0
|
#if 0
|
||||||
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOp : DenOp);
|
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
|
||||||
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
|
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
|
||||||
msCG(schurOp,in, out);
|
msCG(schurOp,in, out);
|
||||||
#else
|
#else
|
||||||
@ -66,7 +68,8 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
|
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
|
||||||
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
||||||
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
||||||
|
|
||||||
|
// Action better with higher precision?
|
||||||
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
||||||
precisionChange(inD2,in);
|
precisionChange(inD2,in);
|
||||||
std::cout << "msCG single solve "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
std::cout << "msCG single solve "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
||||||
@ -76,12 +79,12 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
}
|
}
|
||||||
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
|
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
|
||||||
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
|
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
|
||||||
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
|
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
|
||||||
|
|
||||||
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
||||||
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
||||||
std::vector<FermionFieldD2> out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid());
|
std::vector<FermionFieldD2> out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid());
|
||||||
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
||||||
precisionChange(inD2,in);
|
precisionChange(inD2,in);
|
||||||
std::cout << "msCG in "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
std::cout << "msCG in "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
||||||
msCG(schurOpD2, inD2, out_elemsD2, outD2);
|
msCG(schurOpD2, inD2, out_elemsD2, outD2);
|
||||||
|
@ -300,9 +300,9 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
GridBase * _grid;
|
GridBase * _grid;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GridBase *Grid(void) const { return _grid; }
|
GridBase *Grid(void) const { return _grid; }
|
||||||
|
LebesgueOrder *lo;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Needed to conveniently communicate gparity parameters into GPU memory
|
// Needed to conveniently communicate gparity parameters into GPU memory
|
||||||
@ -348,6 +348,7 @@ public:
|
|||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
// Stencil query
|
// Stencil query
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
|
#ifdef SHM_FAST_PATH
|
||||||
inline int SameNode(int point) {
|
inline int SameNode(int point) {
|
||||||
|
|
||||||
int dimension = this->_directions[point];
|
int dimension = this->_directions[point];
|
||||||
@ -367,7 +368,40 @@ public:
|
|||||||
if ( displacement == 0 ) return 1;
|
if ( displacement == 0 ) return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
// fancy calculation for shm code
|
||||||
|
inline int SameNode(int point) {
|
||||||
|
|
||||||
|
int dimension = this->_directions[point];
|
||||||
|
int displacement = this->_distances[point];
|
||||||
|
|
||||||
|
int pd = _grid->_processors[dimension];
|
||||||
|
int fd = _grid->_fdimensions[dimension];
|
||||||
|
int ld = _grid->_ldimensions[dimension];
|
||||||
|
int rd = _grid->_rdimensions[dimension];
|
||||||
|
int simd_layout = _grid->_simd_layout[dimension];
|
||||||
|
int comm_dim = _grid->_processors[dimension] >1 ;
|
||||||
|
|
||||||
|
int recv_from_rank;
|
||||||
|
int xmit_to_rank;
|
||||||
|
|
||||||
|
if ( ! comm_dim ) return 1;
|
||||||
|
|
||||||
|
int nbr_proc;
|
||||||
|
if (displacement>0) nbr_proc = 1;
|
||||||
|
else nbr_proc = pd-1;
|
||||||
|
|
||||||
|
// FIXME this logic needs to be sorted for three link term
|
||||||
|
// assert( (displacement==1) || (displacement==-1));
|
||||||
|
// Present hack only works for >= 4^4 subvol per node
|
||||||
|
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
|
void *shm = (void *) _grid->ShmBufferTranslate(recv_from_rank,this->u_recv_buf_p);
|
||||||
|
|
||||||
|
if ( shm==NULL ) return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Comms packet queue for asynch thread
|
// Comms packet queue for asynch thread
|
||||||
// Use OpenMP Tasks for cleaner ???
|
// Use OpenMP Tasks for cleaner ???
|
||||||
@ -1075,7 +1109,7 @@ public:
|
|||||||
int comms_recv = this->_comms_recv[point];
|
int comms_recv = this->_comms_recv[point];
|
||||||
int comms_partial_send = this->_comms_partial_send[point] ;
|
int comms_partial_send = this->_comms_partial_send[point] ;
|
||||||
int comms_partial_recv = this->_comms_partial_recv[point] ;
|
int comms_partial_recv = this->_comms_partial_recv[point] ;
|
||||||
|
|
||||||
assert(rhs.Grid()==_grid);
|
assert(rhs.Grid()==_grid);
|
||||||
// conformable(_grid,rhs.Grid());
|
// conformable(_grid,rhs.Grid());
|
||||||
|
|
||||||
@ -1146,11 +1180,32 @@ public:
|
|||||||
recv_buf=this->u_recv_buf_p;
|
recv_buf=this->u_recv_buf_p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// potential SHM fast path for intranode
|
||||||
|
int shm_send=0;
|
||||||
|
int shm_recv=0;
|
||||||
|
#ifdef SHM_FAST_PATH
|
||||||
|
// Put directly in place if we can
|
||||||
|
send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,recv_buf);
|
||||||
|
if ( (send_buf==NULL) ) {
|
||||||
|
shm_send=0;
|
||||||
|
send_buf = this->u_send_buf_p;
|
||||||
|
} else {
|
||||||
|
shm_send=1;
|
||||||
|
}
|
||||||
|
void *test_ptr = _grid->ShmBufferTranslate(recv_from_rank,recv_buf);
|
||||||
|
if ( test_ptr != NULL ) shm_recv = 1;
|
||||||
|
// static int printed;
|
||||||
|
// if (!printed){
|
||||||
|
// std::cout << " GATHER FAST PATH SHM "<<shm_send<< " "<<shm_recv<<std::endl;
|
||||||
|
// printed = 1;
|
||||||
|
// }
|
||||||
|
#else
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Gather locally
|
// Gather locally
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
send_buf = this->u_send_buf_p; // Gather locally, must send
|
send_buf = this->u_send_buf_p; // Gather locally, must send
|
||||||
assert(send_buf!=NULL);
|
assert(send_buf!=NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
// std::cout << " GatherPlaneSimple partial send "<< comms_partial_send<<std::endl;
|
// std::cout << " GatherPlaneSimple partial send "<< comms_partial_send<<std::endl;
|
||||||
compressor::Gather_plane_simple(face_table[face_idx],rhs,send_buf,compress,comm_off,so,comms_partial_send);
|
compressor::Gather_plane_simple(face_table[face_idx],rhs,send_buf,compress,comm_off,so,comms_partial_send);
|
||||||
@ -1162,10 +1217,13 @@ public:
|
|||||||
// Build a list of things to do after we synchronise GPUs
|
// Build a list of things to do after we synchronise GPUs
|
||||||
// Start comms now???
|
// Start comms now???
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
|
int do_send = (comms_send|comms_partial_send) && (!shm_send );
|
||||||
|
int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
|
||||||
|
|
||||||
AddPacket((void *)&send_buf[comm_off],
|
AddPacket((void *)&send_buf[comm_off],
|
||||||
(void *)&recv_buf[comm_off],
|
(void *)&recv_buf[comm_off],
|
||||||
xmit_to_rank, comms_send|comms_partial_send,
|
xmit_to_rank, do_send,
|
||||||
recv_from_rank, comms_recv|comms_partial_recv,
|
recv_from_rank, do_recv,
|
||||||
xbytes,rbytes);
|
xbytes,rbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1307,19 +1365,47 @@ public:
|
|||||||
|
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
int xmit_to_rank;
|
int xmit_to_rank;
|
||||||
|
int shm_send=0;
|
||||||
|
int shm_recv=0;
|
||||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
#ifdef SHM_FAST_PATH
|
||||||
|
#warning STENCIL SHM FAST PATH SELECTED
|
||||||
|
// shm == receive pointer if offnode
|
||||||
|
// shm == Translate[send pointer] if on node -- my view of his send pointer
|
||||||
|
cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
||||||
|
if (shm==NULL) {
|
||||||
|
shm = rp;
|
||||||
|
// we found a packet that comes from MPI and contributes to this shift.
|
||||||
|
// is_same_node is only used in the WilsonStencil, and gets set for this point in the stencil.
|
||||||
|
// Kernel will add the exterior_terms except if is_same_node.
|
||||||
|
// leg of stencil
|
||||||
|
shm_recv=0;
|
||||||
|
} else {
|
||||||
|
shm_recv=1;
|
||||||
|
}
|
||||||
|
rpointers[i] = shm;
|
||||||
|
// Test send side
|
||||||
|
void *test_ptr = (void *) _grid->ShmBufferTranslate(xmit_to_rank,sp);
|
||||||
|
if ( test_ptr != NULL ) shm_send = 1;
|
||||||
|
// static int printed;
|
||||||
|
// if (!printed){
|
||||||
|
// std::cout << " GATHERSIMD FAST PATH SHM "<<shm_send<< " "<<shm_recv<<std::endl;
|
||||||
|
// printed = 1;
|
||||||
|
// }
|
||||||
|
#else
|
||||||
rpointers[i] = rp;
|
rpointers[i] = rp;
|
||||||
|
#endif
|
||||||
|
|
||||||
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,xbytes,rbytes,cbmask);
|
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,xbytes,rbytes,cbmask);
|
||||||
if ( !duplicate ) {
|
if ( !duplicate ) {
|
||||||
if ( (bytes != rbytes) && (rbytes!=0) ){
|
if ( (bytes != rbytes) && (rbytes!=0) ){
|
||||||
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
|
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
|
||||||
}
|
}
|
||||||
|
int do_send = (comms_send|comms_partial_send) && (!shm_send );
|
||||||
|
int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
|
||||||
AddPacket((void *)sp,(void *)rp,
|
AddPacket((void *)sp,(void *)rp,
|
||||||
xmit_to_rank,comms_send|comms_partial_send,
|
xmit_to_rank,do_send,
|
||||||
recv_from_rank,comms_recv|comms_partial_recv,
|
recv_from_rank,do_send,
|
||||||
xbytes,rbytes);
|
xbytes,rbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1329,7 +1415,7 @@ public:
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// rpointer may be doing a remote read in the gather over SHM
|
||||||
if ( comms_recv|comms_partial_recv ) {
|
if ( comms_recv|comms_partial_recv ) {
|
||||||
AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers);
|
AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers);
|
||||||
}
|
}
|
||||||
|
@ -248,17 +248,23 @@ inline int acceleratorIsCommunicable(void *ptr)
|
|||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// SyCL acceleration
|
// SyCL acceleration
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
#ifdef GRID_SYCL
|
|
||||||
NAMESPACE_END(Grid);
|
|
||||||
#include <CL/sycl.hpp>
|
|
||||||
#include <CL/sycl/usm.hpp>
|
|
||||||
|
|
||||||
|
#ifdef GRID_SYCL
|
||||||
#define GRID_SYCL_LEVEL_ZERO_IPC
|
#define GRID_SYCL_LEVEL_ZERO_IPC
|
||||||
|
|
||||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
NAMESPACE_END(Grid);
|
||||||
|
#if 0
|
||||||
|
#include <CL/sycl.hpp>
|
||||||
|
#include <CL/sycl/usm.hpp>
|
||||||
#include <level_zero/ze_api.h>
|
#include <level_zero/ze_api.h>
|
||||||
#include <CL/sycl/backend/level_zero.hpp>
|
#include <CL/sycl/backend/level_zero.hpp>
|
||||||
|
#else
|
||||||
|
#include <sycl/CL/sycl.hpp>
|
||||||
|
#include <sycl/usm.hpp>
|
||||||
|
#include <level_zero/ze_api.h>
|
||||||
|
#include <sycl/ext/oneapi/backend/level_zero.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
extern cl::sycl::queue *theGridAccelerator;
|
extern cl::sycl::queue *theGridAccelerator;
|
||||||
|
@ -232,31 +232,34 @@ int main(int argc, char **argv) {
|
|||||||
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||||
|
|
||||||
OneFlavourRationalParams OFRp; // Up/down
|
int SP_iters=10000;
|
||||||
OFRp.lo = 4.0e-5;
|
|
||||||
|
RationalActionParams OFRp; // Up/down
|
||||||
|
OFRp.lo = 6.0e-5;
|
||||||
OFRp.hi = 90.0;
|
OFRp.hi = 90.0;
|
||||||
OFRp.MaxIter = 60000;
|
OFRp.inv_pow = 2;
|
||||||
OFRp.tolerance= 1.0e-5;
|
OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space
|
||||||
OFRp.mdtolerance= 1.0e-3;
|
OFRp.action_tolerance= 1.0e-8;
|
||||||
|
OFRp.action_degree = 18;
|
||||||
|
OFRp.md_tolerance= 1.0e-5;
|
||||||
|
OFRp.md_degree = 14;
|
||||||
// OFRp.degree = 20; converges
|
// OFRp.degree = 20; converges
|
||||||
// OFRp.degree = 16;
|
// OFRp.degree = 16;
|
||||||
OFRp.degree = 18;
|
|
||||||
OFRp.precision= 80;
|
OFRp.precision= 80;
|
||||||
OFRp.BoundsCheckFreq=0;
|
OFRp.BoundsCheckFreq=0;
|
||||||
std::vector<RealD> ActionTolByPole({
|
std::vector<RealD> ActionTolByPole({
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-7,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8
|
1.0e-8,1.0e-8
|
||||||
});
|
});
|
||||||
std::vector<RealD> MDTolByPole({
|
std::vector<RealD> MDTolByPole({
|
||||||
1.0e-5,5.0e-6,1.0e-6,1.0e-7, // soften convergence more more
|
1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more
|
||||||
// 1.0e-6,3.0e-7,1.0e-7,1.0e-7,
|
// 1.0e-6,3.0e-7,1.0e-7,1.0e-7,
|
||||||
// 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence
|
// 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
|
||||||
1.0e-8,1.0e-8
|
1.0e-8,1.0e-8
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -340,6 +343,7 @@ int main(int argc, char **argv) {
|
|||||||
ParamsDirF.dirichlet=Dirichlet;
|
ParamsDirF.dirichlet=Dirichlet;
|
||||||
ParamsDir.partialDirichlet=1;
|
ParamsDir.partialDirichlet=1;
|
||||||
ParamsDirF.partialDirichlet=1;
|
ParamsDirF.partialDirichlet=1;
|
||||||
|
std::cout << GridLogMessage<< "Partial Dirichlet depth is "<<dwf_compressor_depth<<std::endl;
|
||||||
|
|
||||||
// double StoppingCondition = 1e-14;
|
// double StoppingCondition = 1e-14;
|
||||||
// double MDStoppingCondition = 1e-9;
|
// double MDStoppingCondition = 1e-9;
|
||||||
@ -457,9 +461,9 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
#define MIXED_PRECISION
|
#define MIXED_PRECISION
|
||||||
#ifdef MIXED_PRECISION
|
#ifdef MIXED_PRECISION
|
||||||
std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2> *> Bdys;
|
std::vector<GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2> *> Bdys;
|
||||||
#else
|
#else
|
||||||
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
std::vector<GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
||||||
@ -544,19 +548,19 @@ int main(int argc, char **argv) {
|
|||||||
ParamsNumD2.partialDirichlet = ParamsNum.partialDirichlet;
|
ParamsNumD2.partialDirichlet = ParamsNum.partialDirichlet;
|
||||||
NumeratorsD2.push_back (new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumD2));
|
NumeratorsD2.push_back (new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumD2));
|
||||||
|
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2>(
|
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2>(
|
||||||
*Numerators[h],*Denominators[h],
|
*Numerators[h],*Denominators[h],
|
||||||
*NumeratorsF[h],*DenominatorsF[h],
|
*NumeratorsF[h],*DenominatorsF[h],
|
||||||
*NumeratorsD2[h],*DenominatorsD2[h],
|
*NumeratorsD2[h],*DenominatorsD2[h],
|
||||||
OFRp, 400) );
|
OFRp, SP_iters) );
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2>(
|
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2>(
|
||||||
*Numerators[h],*Denominators[h],
|
*Numerators[h],*Denominators[h],
|
||||||
*NumeratorsF[h],*DenominatorsF[h],
|
*NumeratorsF[h],*DenominatorsF[h],
|
||||||
*NumeratorsD2[h],*DenominatorsD2[h],
|
*NumeratorsD2[h],*DenominatorsD2[h],
|
||||||
OFRp, 400) );
|
OFRp, SP_iters) );
|
||||||
#else
|
#else
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -183,7 +183,7 @@ int main(int argc, char **argv) {
|
|||||||
// 4/2 => 0.6 dH
|
// 4/2 => 0.6 dH
|
||||||
// 3/3 => 0.8 dH .. depth 3, slower
|
// 3/3 => 0.8 dH .. depth 3, slower
|
||||||
//MD.MDsteps = 4;
|
//MD.MDsteps = 4;
|
||||||
MD.MDsteps = 3;
|
MD.MDsteps = 12;
|
||||||
MD.trajL = 0.5;
|
MD.trajL = 0.5;
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
@ -200,8 +200,8 @@ int main(int argc, char **argv) {
|
|||||||
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
||||||
|
|
||||||
CheckpointerParameters CPparams;
|
CheckpointerParameters CPparams;
|
||||||
CPparams.config_prefix = "ckpoint_DDHMC_lat";
|
CPparams.config_prefix = "ckpoint_HMC_lat";
|
||||||
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
|
CPparams.rng_prefix = "ckpoint_HMC_rng";
|
||||||
CPparams.saveInterval = 1;
|
CPparams.saveInterval = 1;
|
||||||
CPparams.format = "IEEE64BIG";
|
CPparams.format = "IEEE64BIG";
|
||||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
@ -228,7 +228,7 @@ int main(int argc, char **argv) {
|
|||||||
Real pv_mass = 1.0;
|
Real pv_mass = 1.0;
|
||||||
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||||
|
|
||||||
auto GridPtr = TheHMC.Resources.GetCartesian();
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
@ -299,8 +299,8 @@ int main(int argc, char **argv) {
|
|||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Collect actions
|
// Collect actions
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
ActionLevel<HMCWrapper::Field> Level1(1);
|
// ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
ActionLevel<HMCWrapper::Field> Level2(3);
|
ActionLevel<HMCWrapper::Field> Level2(1);
|
||||||
ActionLevel<HMCWrapper::Field> Level3(15);
|
ActionLevel<HMCWrapper::Field> Level3(15);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
@ -369,7 +369,7 @@ int main(int argc, char **argv) {
|
|||||||
ActionCGL, ActionCGR,
|
ActionCGL, ActionCGR,
|
||||||
DerivativeCGL, DerivativeCGR,
|
DerivativeCGL, DerivativeCGR,
|
||||||
SFRp, true);
|
SFRp, true);
|
||||||
// Level2.push_back(&EOFA);
|
Level2.push_back(&EOFA);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// up down action
|
// up down action
|
||||||
@ -477,7 +477,7 @@ int main(int argc, char **argv) {
|
|||||||
// Gauge action
|
// Gauge action
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
Level3.push_back(&GaugeAction);
|
Level3.push_back(&GaugeAction);
|
||||||
TheHMC.TheAction.push_back(Level1);
|
// TheHMC.TheAction.push_back(Level1);
|
||||||
TheHMC.TheAction.push_back(Level2);
|
TheHMC.TheAction.push_back(Level2);
|
||||||
TheHMC.TheAction.push_back(Level3);
|
TheHMC.TheAction.push_back(Level3);
|
||||||
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
||||||
|
@ -646,6 +646,14 @@ case ${ac_SHM_FORCE_MPI} in
|
|||||||
;;
|
;;
|
||||||
*) ;;
|
*) ;;
|
||||||
esac
|
esac
|
||||||
|
############### force MPI in SMP
|
||||||
|
AC_ARG_ENABLE([shm-fast-path],[AS_HELP_STRING([--enable-shm-fast-path],[Allow kernels to remote copy over intranode])],[ac_SHM_FAST_PATH=${enable_shm_fast_path}],[ac_SHM_FAST_PATH=no])
|
||||||
|
case ${ac_SHM_FAST_PATH} in
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([SHM_FAST_PATH],[1],[SHM_FAST_PATH] )
|
||||||
|
;;
|
||||||
|
*) ;;
|
||||||
|
esac
|
||||||
|
|
||||||
############### communication type selection
|
############### communication type selection
|
||||||
AC_ARG_ENABLE([comms-threads],[AS_HELP_STRING([--enable-comms-threads | --disable-comms-threads],[Use multiple threads in MPI calls])],[ac_COMMS_THREADS=${enable_comms_threads}],[ac_COMMS_THREADS=yes])
|
AC_ARG_ENABLE([comms-threads],[AS_HELP_STRING([--enable-comms-threads | --disable-comms-threads],[Use multiple threads in MPI calls])],[ac_COMMS_THREADS=${enable_comms_threads}],[ac_COMMS_THREADS=yes])
|
||||||
|
@ -23,12 +23,7 @@ export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1
|
|||||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0
|
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0
|
||||||
|
|
||||||
for i in 0
|
#mpiexec -launcher ssh -n 1 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0 > 1tile.log
|
||||||
do
|
|
||||||
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
|
|
||||||
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
|
|
||||||
done
|
|
||||||
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.1x2.log
|
|
||||||
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.2x1.log
|
|
||||||
|
|
||||||
|
mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0
|
||||||
|
|
||||||
|
@ -5,10 +5,10 @@ export ZE_AFFINITY_MASK=0.$MPI_LOCALRANKID
|
|||||||
echo Ranke $MPI_LOCALRANKID ZE_AFFINITY_MASK is $ZE_AFFINITY_MASK
|
echo Ranke $MPI_LOCALRANKID ZE_AFFINITY_MASK is $ZE_AFFINITY_MASK
|
||||||
|
|
||||||
|
|
||||||
if [ $MPI_LOCALRANKID = "0" ]
|
#if [ $MPI_LOCALRANKID = "0" ]
|
||||||
then
|
#then
|
||||||
# ~psteinbr/build_pti/ze_tracer -h $@
|
# ~psteinbr/build_pti/ze_tracer -c $@
|
||||||
onetrace --chrome-device-timeline $@
|
# onetrace --chrome-kernel-timeline $@
|
||||||
else
|
#else
|
||||||
$@
|
$@
|
||||||
fi
|
#fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
INSTALL=/nfs/site/home/azusayax/install
|
INSTALL=/nfs/site/home/paboylx/prereqs/
|
||||||
../../configure \
|
../../configure \
|
||||||
--enable-simd=GPU \
|
--enable-simd=GPU \
|
||||||
--enable-gen-simd-width=64 \
|
--enable-gen-simd-width=64 \
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
export https_proxy=http://proxy-chain.intel.com:911
|
export https_proxy=http://proxy-chain.intel.com:911
|
||||||
export LD_LIBRARY_PATH=/nfs/site/home/azusayax/install/lib:$LD_LIBRARY_PATH
|
#export LD_LIBRARY_PATH=/nfs/site/home/azusayax/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
module load intel-release
|
module load intel-release
|
||||||
source /opt/intel/oneapi/PVC_setup.sh
|
source /opt/intel/oneapi/PVC_setup.sh
|
||||||
|
160
tests/core/Test_fft_matt.cc
Normal file
160
tests/core/Test_fft_matt.cc
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
grid` physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_cshift.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
|
Coordinate latt_size = GridDefaultLatt();
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
int vol = 1;
|
||||||
|
for(int d=0;d<latt_size.size();d++){
|
||||||
|
vol = vol * latt_size[d];
|
||||||
|
}
|
||||||
|
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridRedBlackCartesian RBGRID(&GRID);
|
||||||
|
|
||||||
|
LatticeComplexD one(&GRID);
|
||||||
|
LatticeComplexD zz(&GRID);
|
||||||
|
LatticeComplexD C(&GRID);
|
||||||
|
LatticeComplexD Ctilde(&GRID);
|
||||||
|
LatticeComplexD Cref (&GRID);
|
||||||
|
LatticeComplexD Csav (&GRID);
|
||||||
|
LatticeComplexD coor(&GRID);
|
||||||
|
|
||||||
|
LatticeSpinMatrixD S(&GRID);
|
||||||
|
LatticeSpinMatrixD Stilde(&GRID);
|
||||||
|
|
||||||
|
Coordinate p({1,3,2,3});
|
||||||
|
|
||||||
|
one = ComplexD(1.0,0.0);
|
||||||
|
zz = ComplexD(0.0,0.0);
|
||||||
|
|
||||||
|
ComplexD ci(0.0,1.0);
|
||||||
|
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); // naughty seeding
|
||||||
|
GridParallelRNG pRNG(&GRID);
|
||||||
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
LatticeGaugeFieldD Umu(&GRID);
|
||||||
|
|
||||||
|
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
// Wilson test
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
{
|
||||||
|
LatticeFermionD src(&GRID); gaussian(pRNG,src);
|
||||||
|
LatticeFermionD tmp(&GRID);
|
||||||
|
LatticeFermionD ref(&GRID);
|
||||||
|
|
||||||
|
RealD mass=0.01;
|
||||||
|
WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
|
||||||
|
|
||||||
|
Dw.M(src,tmp);
|
||||||
|
|
||||||
|
std::cout << "Dw src = " <<norm2(src)<<std::endl;
|
||||||
|
std::cout << "Dw tmp = " <<norm2(tmp)<<std::endl;
|
||||||
|
|
||||||
|
Dw.FreePropagator(tmp,ref,mass);
|
||||||
|
|
||||||
|
std::cout << "Dw ref = " <<norm2(ref)<<std::endl;
|
||||||
|
|
||||||
|
ref = ref - src;
|
||||||
|
|
||||||
|
std::cout << "Dw ref-src = " <<norm2(ref)<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
// Wilson prop
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
{
|
||||||
|
std::cout<<"****************************************"<<std::endl;
|
||||||
|
std::cout << "Wilson Mom space 4d propagator \n";
|
||||||
|
std::cout<<"****************************************"<<std::endl;
|
||||||
|
|
||||||
|
LatticeFermionD src(&GRID); gaussian(pRNG,src);
|
||||||
|
LatticeFermionD tmp(&GRID);
|
||||||
|
LatticeFermionD ref(&GRID);
|
||||||
|
LatticeFermionD diff(&GRID);
|
||||||
|
|
||||||
|
src=Zero();
|
||||||
|
Coordinate point(4,0); // 0,0,0,0
|
||||||
|
SpinColourVectorD ferm;
|
||||||
|
ferm=Zero();
|
||||||
|
ferm()(0)(0) = ComplexD(1.0);
|
||||||
|
pokeSite(ferm,src,point);
|
||||||
|
|
||||||
|
RealD mass=0.01;
|
||||||
|
WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
|
||||||
|
|
||||||
|
// Momentum space prop
|
||||||
|
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
|
||||||
|
Dw.FreePropagator(src,ref,mass) ;
|
||||||
|
|
||||||
|
Gamma G5(Gamma::Algebra::Gamma5);
|
||||||
|
|
||||||
|
LatticeFermionD result(&GRID);
|
||||||
|
const int sdir=0;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Conjugate gradient on normal equations system
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
|
||||||
|
Dw.Mdag(src,tmp);
|
||||||
|
src=tmp;
|
||||||
|
MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
|
||||||
|
ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
|
||||||
|
CG(HermOp,src,result);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
std::cout << " Taking difference" <<std::endl;
|
||||||
|
std::cout << "Dw result "<<norm2(result)<<std::endl;
|
||||||
|
std::cout << "Dw ref "<<norm2(ref)<<std::endl;
|
||||||
|
|
||||||
|
diff = ref - result;
|
||||||
|
std::cout << "result - ref "<<norm2(diff)<<std::endl;
|
||||||
|
|
||||||
|
DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user