1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-23 02:02:02 +01:00

Compare commits

...

9 Commits

13 changed files with 36 additions and 75 deletions

View File

@ -519,7 +519,6 @@ void MemoryManager::Audit(std::string s)
uint64_t LruBytes1=0;
uint64_t LruBytes2=0;
uint64_t LruCnt=0;
uint64_t LockedBytes=0;
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
for(auto it=LRU.begin();it!=LRU.end();it++){

View File

@ -170,10 +170,7 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
}
static inline int divides(int a,int b)
{
return ( b == ( (b/a)*a ) );
}
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
{
////////////////////////////////////////////////////////////////

View File

@ -38,19 +38,15 @@ NAMESPACE_BEGIN(Grid);
// cf. GeneralEvenOddRational.h for details
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class ImplD, class ImplF, class ImplD2>
template<class ImplD, class ImplF>
class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<ImplD> {
private:
typedef typename ImplD2::FermionField FermionFieldD2;
typedef typename ImplD::FermionField FermionFieldD;
typedef typename ImplF::FermionField FermionFieldF;
FermionOperator<ImplD> & NumOpD;
FermionOperator<ImplD> & DenOpD;
FermionOperator<ImplD2> & NumOpD2;
FermionOperator<ImplD2> & DenOpD2;
FermionOperator<ImplF> & NumOpF;
FermionOperator<ImplF> & DenOpF;
@ -64,40 +60,31 @@ NAMESPACE_BEGIN(Grid);
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
msCG(schurOp,in, out);
#else
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
FermionFieldD inD(NumOpD.FermionRedBlackGrid());
FermionFieldD outD(NumOpD.FermionRedBlackGrid());
// Action better with higher precision?
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
precisionChange(inD2,in);
std::cout << "msCG single solve "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
msCG(schurOpD2, inD2, outD2);
precisionChange(out,outD2);
ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out);
#endif
}
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
std::vector<FermionFieldD2> out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid());
ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
precisionChange(inD2,in);
std::cout << "msCG in "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
msCG(schurOpD2, inD2, out_elemsD2, outD2);
precisionChange(out,outD2);
for(int i=0;i<out_elems.size();i++){
precisionChange(out_elems[i],out_elemsD2[i]);
}
FermionFieldD inD(NumOpD.FermionRedBlackGrid());
FermionFieldD outD(NumOpD.FermionRedBlackGrid());
std::vector<FermionFieldD> out_elemsD(out_elems.size(),NumOpD.FermionRedBlackGrid());
ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out_elems, out);
}
//Allow derived classes to override the gauge import
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
typename ImplD2::GaugeField Ud2(NumOpD2.GaugeGrid());
typename ImplD::GaugeField Ud2(NumOpD.GaugeGrid());
precisionChange(Uf, Ud);
precisionChange(Ud2, Ud);
@ -109,20 +96,18 @@ NAMESPACE_BEGIN(Grid);
NumOpF.ImportGauge(Uf);
DenOpF.ImportGauge(Uf);
NumOpD2.ImportGauge(Ud2);
DenOpD2.ImportGauge(Ud2);
NumOpD.ImportGauge(Ud2);
DenOpD.ImportGauge(Ud2);
}
public:
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<ImplD> &_NumOpD, FermionOperator<ImplD> &_DenOpD,
FermionOperator<ImplF> &_NumOpF, FermionOperator<ImplF> &_DenOpF,
FermionOperator<ImplD2> &_NumOpD2, FermionOperator<ImplD2> &_DenOpD2,
const RationalActionParams & p, Integer _ReliableUpdateFreq
) : GeneralEvenOddRatioRationalPseudoFermionAction<ImplD>(_NumOpD, _DenOpD, p),
ReliableUpdateFreq(_ReliableUpdateFreq),
NumOpD(_NumOpD), DenOpD(_DenOpD),
NumOpF(_NumOpF), DenOpF(_DenOpF),
NumOpD2(_NumOpD2), DenOpD2(_DenOpD2)
NumOpF(_NumOpF), DenOpF(_DenOpF)
{}
virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";}

View File

@ -67,9 +67,9 @@ NAMESPACE_BEGIN(Grid);
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
};
template<class Impl,class ImplF,class ImplD2>
template<class Impl,class ImplF>
class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction
: public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF,ImplD2> {
: public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF> {
public:
typedef OneFlavourRationalParams Params;
private:
@ -91,11 +91,9 @@ NAMESPACE_BEGIN(Grid);
FermionOperator<Impl> &_DenOp,
FermionOperator<ImplF> &_NumOpF,
FermionOperator<ImplF> &_DenOpF,
FermionOperator<ImplD2> &_NumOpD2,
FermionOperator<ImplD2> &_DenOpD2,
const Params & p, Integer ReliableUpdateFreq
) :
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF,ImplD2>(_NumOp, _DenOp,_NumOpF, _DenOpF,_NumOpD2, _DenOpD2, transcribe(p),ReliableUpdateFreq){}
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF>(_NumOp, _DenOp,_NumOpF, _DenOpF, transcribe(p),ReliableUpdateFreq){}
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
};

View File

@ -320,7 +320,7 @@ struct Conj{
struct TimesMinusI{
//Complex single
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
inline float32x4_t operator()(float32x4_t in){
// ar ai br bi -> ai -ar ai -br
float32x4_t r0, r1;
r0 = vnegq_f32(in); // -ar -ai -br -bi
@ -328,7 +328,7 @@ struct TimesMinusI{
return vtrn1q_f32(r1, r0); // ar -ai br -bi
}
//Complex double
inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
inline float64x2_t operator()(float64x2_t in){
// a ib -> b -ia
float64x2_t tmp;
tmp = vnegq_f64(in);
@ -338,7 +338,7 @@ struct TimesMinusI{
struct TimesI{
//Complex single
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
inline float32x4_t operator()(float32x4_t in){
// ar ai br bi -> -ai ar -bi br
float32x4_t r0, r1;
r0 = vnegq_f32(in); // -ar -ai -br -bi
@ -346,7 +346,7 @@ struct TimesI{
return vtrn1q_f32(r1, in); // -ai ar -bi br
}
//Complex double
inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
inline float64x2_t operator()(float64x2_t in){
// a ib -> -b ia
float64x2_t tmp;
tmp = vnegq_f64(in);

View File

@ -1369,10 +1369,11 @@ public:
int recv_from_rank;
int xmit_to_rank;
int shm_send=0;
int shm_recv=0;
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
#ifdef SHM_FAST_PATH
#warning STENCIL SHM FAST PATH SELECTED
int shm_recv=0;
// shm == receive pointer if offnode
// shm == Translate[send pointer] if on node -- my view of his send pointer
cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
@ -1405,7 +1406,6 @@ public:
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
}
int do_send = (comms_send|comms_partial_send) && (!shm_send );
int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
AddPacket((void *)sp,(void *)rp,
xmit_to_rank,do_send,
recv_from_rank,do_send,

View File

@ -133,7 +133,6 @@ typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec
typedef scalar_type * pointer;
constexpr int words=sizeof(vobj)/sizeof(vector_type);
constexpr int Nsimd=vector_type::Nsimd();
scalar_object extracted;
pointer __restrict__ sp = (pointer)&extracted; // Type pun
@ -153,7 +152,6 @@ void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_ob
typedef scalar_type * pointer;
constexpr int words=sizeof(vobj)/sizeof(vector_type);
constexpr int Nsimd=vector_type::Nsimd();
pointer __restrict__ sp = (pointer)&extracted;
vector_type *vp = (vector_type *)&vec;
@ -178,8 +176,6 @@ void extract(const vobj &vec,const ExtractPointerArray<sobj> &extracted, int off
const int s = Nsimd/Nextr;
vector_type * vp = (vector_type *)&vec;
scalar_type vtmp;
sobj_scalar_type stmp;
for(int w=0;w<words;w++){
for(int i=0;i<Nextr;i++){
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
@ -205,7 +201,6 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
vector_type * vp = (vector_type *)&vec;
scalar_type vtmp;
sobj_scalar_type stmp;
for(int w=0;w<words;w++){
for(int i=0;i<Nextr;i++){
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
@ -242,9 +237,6 @@ void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __rest
typedef oextract_type * opointer;
typedef iextract_type * ipointer;
constexpr int oNsimd=ovector_type::Nsimd();
constexpr int iNsimd=ivector_type::Nsimd();
iscalar_type itmp;
oscalar_type otmp;

View File

@ -451,7 +451,7 @@ int main(int argc, char **argv) {
#define MIXED_PRECISION
#ifdef MIXED_PRECISION
std::vector<GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy> *> Bdys;
std::vector<GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF> *> Bdys;
#else
std::vector<GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
#endif
@ -526,15 +526,13 @@ int main(int argc, char **argv) {
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
} else {
#ifdef MIXED_PRECISION
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy>(
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
*Numerators[h],*Denominators[h],
*NumeratorsF[h],*DenominatorsF[h],
*Numerators[h],*Denominators[h],
OFRp, SP_iters) );
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy>(
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
*Numerators[h],*Denominators[h],
*NumeratorsF[h],*DenominatorsF[h],
*Numerators[h],*Denominators[h],
OFRp, SP_iters) );
#else
Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));

View File

@ -329,7 +329,6 @@ int main(int argc, char **argv) {
auto grid4= GridPtr;
auto rbgrid4= GridRBPtr;
auto rbgrid = StrangeOp.FermionRedBlackGrid();
auto grid = StrangeOp.FermionGrid();
if(1){

View File

@ -164,11 +164,6 @@ int main(int argc, char **argv) {
typedef MobiusEOFAFermionF FermionEOFAActionF;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef WilsonImplD2 FermionImplPolicyD2;
typedef MobiusFermionD2 FermionActionD2;
typedef MobiusEOFAFermionD2 FermionEOFAActionD2;
typedef typename FermionActionD2::FermionField FermionFieldD2;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
@ -272,7 +267,6 @@ int main(int argc, char **argv) {
// temporarily need a gauge field
LatticeGaugeFieldD U(GridPtr); U=Zero();
LatticeGaugeFieldF UF(GridPtrF); UF=Zero();
LatticeGaugeFieldD2 UD2(GridPtrF); UD2=Zero();
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
@ -394,15 +388,13 @@ int main(int argc, char **argv) {
std::vector<FermionAction *> Denominators;
std::vector<FermionActionF *> NumeratorsF;
std::vector<FermionActionF *> DenominatorsF;
std::vector<FermionActionD2 *> NumeratorsD2;
std::vector<FermionActionD2 *> DenominatorsD2;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<MxPCG *> ActionMPCG;
std::vector<MxPCG *> MPCG;
#define MIXED_PRECISION
#ifdef MIXED_PRECISION
std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2> *> Bdys;
std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF> *> Bdys;
#else
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
#endif

View File

@ -1,7 +1,8 @@
# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:GridBasedSoftware_Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview)
# Grid
**Data parallel C++ mathematical object library.**
[![Teamcity status](https://ci.dev.dirac.ed.ac.uk/guestAuth/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:GridBasedSoftware_Grid)),branch:default:true)/statusIcon.svg)](https://ci.dev.dirac.ed.ac.uk/project/GridBasedSoftware_Grid?mode=builds)
License: GPL v2.
Last update June 2017.

View File

@ -1,7 +1,7 @@
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
../../configure --enable-comms=mpi-auto \
--with-lime=$CLIME \
--enable-unified=yes \
--enable-unified=no \
--enable-shm=nvlink \
--enable-tracing=timer \
--enable-accelerator=hip \

View File

@ -5,8 +5,8 @@ module load emacs
#module load gperftools
module load PrgEnv-gnu
module load rocm/5.3.0
module load cray-mpich/8.1.16
#module load cray-mpich/8.1.17
#module load cray-mpich/8.1.16
module load cray-mpich/8.1.17
module load gmp
module load cray-fftw
module load craype-accel-amd-gfx90a