mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-06 04:05:55 +01:00
Merge branch 'develop' of github.com:fionnoh/Grid into feature/A2A_current_insertion
This commit is contained in:
commit
d566637cec
@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
@ -26,11 +26,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#ifdef AVX512
|
||||
#include <simd/Intel512common.h>
|
||||
#include <simd/Intel512avx.h>
|
||||
#include <Grid/simd/Intel512common.h>
|
||||
#include <Grid/simd/Intel512avx.h>
|
||||
#endif
|
||||
|
||||
// Interleave operations from two directions
|
||||
@ -679,7 +679,7 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
gauge3 =(uint64_t)&UU._odata[sU]( T );
|
||||
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
#include <simd/Intel512single.h>
|
||||
#include <Grid/simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
@ -732,7 +732,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
||||
|
||||
}
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
#include <Grid/simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
@ -816,7 +816,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
||||
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
|
||||
#include <simd/Intel512single.h>
|
||||
#include <Grid/simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
@ -884,7 +884,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
||||
#endif
|
||||
}
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
#include <Grid/simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
|
||||
#define LOAD_CHI(b) \
|
||||
|
@ -81,8 +81,8 @@ WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl
|
||||
assert(0);
|
||||
}
|
||||
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmAvx512.h>
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmQPX.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h>
|
||||
|
||||
#define INSTANTIATE_ASM(A)\
|
||||
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
|
@ -66,6 +66,7 @@ namespace QCD{
|
||||
FermionField Phi; // the pseudofermion field for this trajectory
|
||||
|
||||
public:
|
||||
|
||||
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
|
||||
AbstractEOFAFermion<Impl>& _Rop,
|
||||
OperatorFunction<FermionField>& HeatbathCG,
|
||||
|
@ -485,83 +485,6 @@ namespace Optimization {
|
||||
// Some Template specialization
|
||||
|
||||
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
||||
#ifndef __INTEL_COMPILER
|
||||
#warning "Slow reduction due to incomplete reduce intrinsics"
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||
__m512 v1,v2;
|
||||
v1=Optimization::Permute::Permute0(in); // avx 512; quad complex single
|
||||
v1= _mm512_add_ps(v1,in);
|
||||
v2=Optimization::Permute::Permute1(v1);
|
||||
v1 = _mm512_add_ps(v1,v2);
|
||||
v2=Optimization::Permute::Permute2(v1);
|
||||
v1 = _mm512_add_ps(v1,v2);
|
||||
u512f conv; conv.v = v1;
|
||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||
}
|
||||
|
||||
//Real float Reduce
|
||||
template<>
|
||||
inline Grid::RealF Reduce<Grid::RealF, __m512>::operator()(__m512 in){
|
||||
__m512 v1,v2;
|
||||
v1 = Optimization::Permute::Permute0(in); // avx 512; octo-double
|
||||
v1 = _mm512_add_ps(v1,in);
|
||||
v2 = Optimization::Permute::Permute1(v1);
|
||||
v1 = _mm512_add_ps(v1,v2);
|
||||
v2 = Optimization::Permute::Permute2(v1);
|
||||
v1 = _mm512_add_ps(v1,v2);
|
||||
v2 = Optimization::Permute::Permute3(v1);
|
||||
v1 = _mm512_add_ps(v1,v2);
|
||||
u512f conv; conv.v=v1;
|
||||
return conv.f[0];
|
||||
}
|
||||
|
||||
|
||||
//Complex double Reduce
|
||||
template<>
|
||||
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
|
||||
__m512d v1;
|
||||
v1 = Optimization::Permute::Permute0(in); // sse 128; paired complex single
|
||||
v1 = _mm512_add_pd(v1,in);
|
||||
v1 = Optimization::Permute::Permute1(in); // sse 128; paired complex single
|
||||
v1 = _mm512_add_pd(v1,in);
|
||||
u512d conv; conv.v = v1;
|
||||
return Grid::ComplexD(conv.f[0],conv.f[1]);
|
||||
}
|
||||
|
||||
//Real double Reduce
|
||||
template<>
|
||||
inline Grid::RealD Reduce<Grid::RealD, __m512d>::operator()(__m512d in){
|
||||
__m512d v1,v2;
|
||||
v1 = Optimization::Permute::Permute0(in); // avx 512; quad double
|
||||
v1 = _mm512_add_pd(v1,in);
|
||||
v2 = Optimization::Permute::Permute1(v1);
|
||||
v1 = _mm512_add_pd(v1,v2);
|
||||
v2 = Optimization::Permute::Permute2(v1);
|
||||
v1 = _mm512_add_pd(v1,v2);
|
||||
u512d conv; conv.v = v1;
|
||||
return conv.f[0];
|
||||
}
|
||||
|
||||
//Integer Reduce
|
||||
template<>
|
||||
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
|
||||
// No full vector reduce, use AVX to add upper and lower halves of register
|
||||
// and perform AVX reduction.
|
||||
__m256i v1, v2, v3;
|
||||
__m128i u1, u2, ret;
|
||||
v1 = _mm512_castsi512_si256(in); // upper half
|
||||
v2 = _mm512_extracti32x8_epi32(in, 1); // lower half
|
||||
v3 = _mm256_add_epi32(v1, v2);
|
||||
v1 = _mm256_hadd_epi32(v3, v3);
|
||||
v2 = _mm256_hadd_epi32(v1, v1);
|
||||
u1 = _mm256_castsi256_si128(v2); // upper half
|
||||
u2 = _mm256_extracti128_si256(v2, 1); // lower half
|
||||
ret = _mm_add_epi32(u1, u2);
|
||||
return _mm_cvtsi128_si32(ret);
|
||||
}
|
||||
#else
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||
@ -590,8 +513,6 @@ namespace Optimization {
|
||||
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
|
||||
return _mm512_reduce_add_epi32(in);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,9 @@ directory
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
||||
#define MIXED_PRECISION
|
||||
#endif
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
@ -346,6 +348,7 @@ int main(int argc, char **argv) {
|
||||
#else
|
||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
|
||||
EOFA(Strange_Op_L, Strange_Op_R,
|
||||
ActionCG,
|
||||
ActionCG, ActionCG,
|
||||
DerivativeCG, DerivativeCG,
|
||||
OFRp, true);
|
||||
|
@ -109,10 +109,11 @@ typedef std::vector<typename ComplexField##suffix::vector_object::scalar_object>
|
||||
|
||||
#define FERM_TYPE_ALIASES(FImpl, suffix)\
|
||||
BASIC_TYPE_ALIASES(FImpl, suffix);\
|
||||
typedef FermionOperator<FImpl> FMat##suffix;\
|
||||
typedef typename FImpl::FermionField FermionField##suffix;\
|
||||
typedef typename FImpl::GaugeField GaugeField##suffix;\
|
||||
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;
|
||||
typedef FermionOperator<FImpl> FMat##suffix;\
|
||||
typedef typename FImpl::FermionField FermionField##suffix;\
|
||||
typedef typename FImpl::GaugeField GaugeField##suffix;\
|
||||
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\
|
||||
typedef Lattice<iSpinMatrix<typename FImpl::Simd>> SpinMatrixField##suffix;
|
||||
|
||||
#define GAUGE_TYPE_ALIASES(GImpl, suffix)\
|
||||
typedef typename GImpl::GaugeField GaugeField##suffix;
|
||||
|
@ -64,7 +64,7 @@ BEGIN_HADRONS_NAMESPACE
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* TWeakMesonDecayKl2 *
|
||||
* TWeakMesonDecayKl2 *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
@ -75,7 +75,7 @@ public:
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, lepton,
|
||||
std::string, output);
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
@ -83,14 +83,13 @@ class TWeakMesonDecayKl2: public Module<WeakMesonDecayKl2Par>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
class Metadata: Serializable
|
||||
typedef typename SpinMatrixField::vector_object::scalar_object SpinMatrix;
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata,
|
||||
int, spinidx1,
|
||||
int, spinidx2);
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
std::vector<SpinMatrix>, corr);
|
||||
};
|
||||
typedef Correlator<Metadata> Result;
|
||||
public:
|
||||
// constructor
|
||||
TWeakMesonDecayKl2(const std::string name);
|
||||
@ -138,10 +137,10 @@ std::vector<std::string> TWeakMesonDecayKl2<FImpl>::getOutput(void)
|
||||
template <typename FImpl>
|
||||
void TWeakMesonDecayKl2<FImpl>::setup(void)
|
||||
{
|
||||
envTmpLat(LatticeComplex, "c");
|
||||
envTmpLat(ComplexField, "c");
|
||||
envTmpLat(PropagatorField, "prop_buf");
|
||||
envCreateLat(PropagatorField, getName());
|
||||
envTmpLat(LatticeComplex, "buf");
|
||||
envTmpLat(SpinMatrixField, "buf");
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
@ -150,57 +149,33 @@ void TWeakMesonDecayKl2<FImpl>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing QED Kl2 contractions '" << getName() << "' using"
|
||||
<< " quarks '" << par().q1 << "' and '" << par().q2 << "' and"
|
||||
<< "lepton '" << par().lepton << "'" << std::endl;
|
||||
<< "lepton '" << par().lepton << "'" << std::endl;
|
||||
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
int nt = env().getDim(Tp);
|
||||
std::vector<SpinMatrix> res_summed;
|
||||
Result r;
|
||||
|
||||
auto &res = envGet(PropagatorField, getName()); res = zero;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
int nt = env().getDim(Tp);
|
||||
|
||||
auto &q1 = envGet(PropagatorField, par().q1);
|
||||
auto &q2 = envGet(PropagatorField, par().q2);
|
||||
auto &res = envGet(PropagatorField, getName()); res = zero;
|
||||
auto &q1 = envGet(PropagatorField, par().q1);
|
||||
auto &q2 = envGet(PropagatorField, par().q2);
|
||||
auto &lepton = envGet(PropagatorField, par().lepton);
|
||||
envGetTmp(LatticeComplex, buf);
|
||||
std::vector<TComplex> res_summed;
|
||||
envGetTmp(LatticeComplex, c);
|
||||
envGetTmp(SpinMatrixField, buf);
|
||||
envGetTmp(ComplexField, c);
|
||||
envGetTmp(PropagatorField, prop_buf);
|
||||
|
||||
std::vector<Result> result;
|
||||
Result r;
|
||||
|
||||
for (unsigned int mu = 0; mu < 4; ++mu)
|
||||
{
|
||||
c = zero;
|
||||
//hadronic part: trace(q1*adj(q2)*g5*gL[mu])
|
||||
c = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu]));
|
||||
prop_buf = 1.;
|
||||
//multiply lepton part
|
||||
res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton;
|
||||
c = zero;
|
||||
//hadronic part: trace(q1*adj(q2)*g5*gL[mu])
|
||||
c = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu]));
|
||||
prop_buf = 1.;
|
||||
//multiply lepton part
|
||||
res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton;
|
||||
}
|
||||
|
||||
//loop over spinor index of lepton part
|
||||
unsigned int i = 0;
|
||||
for (unsigned int s1 = 0; s1 < Ns ; ++s1)
|
||||
for (unsigned int s2 = 0; s2 < Ns ; ++s2)
|
||||
{
|
||||
buf = peekColour(peekSpin(res,s1,s2),0,0);
|
||||
|
||||
sliceSum(buf, res_summed, Tp);
|
||||
|
||||
r.corr.clear();
|
||||
for (unsigned int t = 0; t < nt; ++t)
|
||||
{
|
||||
r.corr.push_back(TensorRemove(res_summed[t]));
|
||||
}
|
||||
|
||||
r.info.spinidx1 = s1;
|
||||
r.info.spinidx2 = s2;
|
||||
result.push_back(r);
|
||||
|
||||
i+=1;
|
||||
}
|
||||
|
||||
saveResult(par().output, "weakdecay", result);
|
||||
buf = peekColour(res, 0, 0);
|
||||
sliceSum(buf, r.corr, Tp);
|
||||
saveResult(par().output, "weakdecay", r);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
@ -54,8 +54,9 @@ BEGIN_HADRONS_NAMESPACE
|
||||
* - action: fermion action used for propagator (string)
|
||||
* - emField: photon field A_mu (string)
|
||||
* - mass: input mass for the lepton propagator
|
||||
* - boundary: boundary conditions for the lepton propagator, e.g. "1 1 1 -1"
|
||||
* - twist: twisted boundary for lepton propagator, e.g. "0.0 0.0 0.0 0.5"
|
||||
* - deltat: source-sink separation
|
||||
* - deltat: list of source-sink separations
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
@ -74,7 +75,7 @@ public:
|
||||
double, mass,
|
||||
std::string , boundary,
|
||||
std::string, twist,
|
||||
unsigned int, deltat);
|
||||
std::vector<unsigned int>, deltat);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
@ -124,7 +125,12 @@ std::vector<std::string> TEMLepton<FImpl>::getInput(void)
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TEMLepton<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName(), getName() + "_free"};
|
||||
std::vector<std::string> out = {};
|
||||
for(int i=0; i<par().deltat.size(); i++)
|
||||
{
|
||||
out.push_back(std::to_string(par().deltat[i]) + "_" + getName() + "_free");
|
||||
out.push_back(std::to_string(par().deltat[i]) + "_" + getName());
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
@ -134,8 +140,11 @@ template <typename FImpl>
|
||||
void TEMLepton<FImpl>::setup(void)
|
||||
{
|
||||
Ls_ = env().getObjectLs(par().action);
|
||||
envCreateLat(PropagatorField, getName());
|
||||
envCreateLat(PropagatorField, getName() + "_free");
|
||||
for(int i=0; i<par().deltat.size(); i++)
|
||||
{
|
||||
envCreateLat(PropagatorField, std::to_string(par().deltat[i]) + "_" + getName() + "_free");
|
||||
envCreateLat(PropagatorField, std::to_string(par().deltat[i]) + "_" + getName());
|
||||
}
|
||||
envTmpLat(FermionField, "source", Ls_);
|
||||
envTmpLat(FermionField, "sol", Ls_);
|
||||
envTmpLat(FermionField, "tmp");
|
||||
@ -156,9 +165,6 @@ void TEMLepton<FImpl>::execute(void)
|
||||
auto &mat = envGet(FMat, par().action);
|
||||
RealD mass = par().mass;
|
||||
Complex ci(0.0,1.0);
|
||||
|
||||
PropagatorField &Aslashlep = envGet(PropagatorField, getName());
|
||||
PropagatorField &lep = envGet(PropagatorField, getName() + "_free");
|
||||
|
||||
envGetTmp(FermionField, source);
|
||||
envGetTmp(FermionField, sol);
|
||||
@ -227,6 +233,22 @@ void TEMLepton<FImpl>::execute(void)
|
||||
}
|
||||
}
|
||||
|
||||
for(unsigned int dt=0;dt<par().deltat.size();dt++){
|
||||
PropagatorField &lep = envGet(PropagatorField, std::to_string(par().deltat[dt]) + "_" + getName() + "_free");
|
||||
for(tl=0;tl<nt;tl++){
|
||||
|
||||
//shift free propagator to different source positions
|
||||
//account for possible anti-periodic boundary in time
|
||||
proptmp = Cshift(freetmp,Tp, -tl);
|
||||
proptmp = where( tlat < tl, boundary[Tp]*proptmp, proptmp);
|
||||
|
||||
// free propagator for fixed source-sink separation
|
||||
lep = where(tlat == (tl-par().deltat[dt]+nt)%nt, proptmp, lep);
|
||||
}
|
||||
//account for possible anti-periodic boundary in time
|
||||
lep = where( tlat >= nt-par().deltat[dt], boundary[Tp]*lep, lep);
|
||||
}
|
||||
|
||||
for(tl=0;tl<nt;tl++){
|
||||
|
||||
//shift free propagator to different source positions
|
||||
@ -234,9 +256,6 @@ void TEMLepton<FImpl>::execute(void)
|
||||
proptmp = Cshift(freetmp,Tp, -tl);
|
||||
proptmp = where( tlat < tl, boundary[Tp]*proptmp, proptmp);
|
||||
|
||||
// free propagator for fixed source-sink separation
|
||||
lep = where(tlat == (tl-par().deltat+nt)%nt, proptmp, lep);
|
||||
|
||||
// i*A_mu*gamma_mu
|
||||
sourcetmp = zero;
|
||||
for(unsigned int mu=0;mu<=3;mu++)
|
||||
@ -276,13 +295,17 @@ void TEMLepton<FImpl>::execute(void)
|
||||
}
|
||||
}
|
||||
// keep the result for the desired delta t
|
||||
Aslashlep = where(tlat == (tl-par().deltat+nt)%nt, proptmp, Aslashlep);
|
||||
for(unsigned int dt=0;dt<par().deltat.size();dt++){
|
||||
PropagatorField &Aslashlep = envGet(PropagatorField, std::to_string(par().deltat[dt]) + "_" + getName());
|
||||
Aslashlep = where(tlat == (tl-par().deltat[dt]+nt)%nt, proptmp, Aslashlep);
|
||||
}
|
||||
}
|
||||
|
||||
//account for possible anti-periodic boundary in time
|
||||
Aslashlep = where( tlat >= nt-par().deltat, boundary[Tp]*Aslashlep, Aslashlep);
|
||||
lep = where( tlat >= nt-par().deltat, boundary[Tp]*lep, lep);
|
||||
|
||||
for(unsigned int dt=0;dt<par().deltat.size();dt++){
|
||||
PropagatorField &Aslashlep = envGet(PropagatorField, std::to_string(par().deltat[dt]) + "_" + getName());
|
||||
Aslashlep = where( tlat >= nt-par().deltat[dt], boundary[Tp]*Aslashlep, Aslashlep);
|
||||
}
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
@ -188,9 +188,9 @@ int main (int argc, char ** argv)
|
||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
|
||||
}
|
||||
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
@ -277,15 +277,15 @@ int main (int argc, char ** argv)
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
bzero((void *)xbuf[d],bytes);
|
||||
bzero((void *)rbuf[d],bytes);
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
@ -374,15 +374,15 @@ int main (int argc, char ** argv)
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
bzero((void *)xbuf[d],bytes);
|
||||
bzero((void *)rbuf[d],bytes);
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
@ -472,15 +472,16 @@ int main (int argc, char ** argv)
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
// uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
uint64_t bytes = 2*1024*1024;
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
|
||||
bzero((void *)xbuf[d],bytes);
|
||||
bzero((void *)rbuf[d],bytes);
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
@ -262,7 +262,6 @@ Set HEADER_SEARCH_PATHS to:
|
||||
|
||||
$Grid/build$(CONFIGURATION)/Grid
|
||||
$Grid
|
||||
$Grid/Grid
|
||||
|
||||
followed by (***the order is important***) the locations reported by `grid-config --cxxflags`, ignoring duplicates, e.g.:
|
||||
|
||||
@ -272,7 +271,7 @@ followed by (***the order is important***) the locations reported by `grid-confi
|
||||
|
||||
**Note: the easiest way to set this value is to put it all on one line, space separated, and edit the text to the right of `HEADER_SEARCH_PATHS`**, i.e.:
|
||||
|
||||
$Grid/build$(CONFIGURATION)/Grid $Grid $Grid/Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include
|
||||
$Grid/build$(CONFIGURATION)/Grid $Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include
|
||||
|
||||
#### LIBRARY_SEARCH_PATHS
|
||||
|
||||
@ -298,7 +297,7 @@ The easiest way to link to all required libraries is to obtain a list of all lib
|
||||
|
||||
and pasting the output ***with `-lGrid -lHadrons ` prepended*** (including the `-l` switches) directly into `OTHER_LDFLAGS`, e.g.:
|
||||
|
||||
-lGrid -lHadrons -lmpi -lhdf5_cpp -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lstdc++ -lm -lz -lhdf5
|
||||
-lGrid -lHadrons -lmpi -lhdf5_cpp -lhdf5 -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lm
|
||||
|
||||
## Make additional configurations
|
||||
|
||||
|
@ -84,7 +84,7 @@ int main (int argc, char** argv)
|
||||
DomainWallEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5);
|
||||
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
|
||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, true);
|
||||
|
||||
Meofa.refresh(U, RNG5);
|
||||
RealD S = Meofa.S(U); // pdag M p
|
||||
|
@ -89,7 +89,7 @@ int main (int argc, char** argv)
|
||||
FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, params);
|
||||
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
|
||||
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
|
||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, true);
|
||||
|
||||
Meofa.refresh(U, RNG5);
|
||||
RealD S = Meofa.S(U); // pdag M p
|
||||
|
@ -86,7 +86,7 @@ int main (int argc, char** argv)
|
||||
MobiusEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c);
|
||||
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
|
||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, false);
|
||||
|
||||
Meofa.refresh(U, RNG5);
|
||||
RealD S = Meofa.S(U); // pdag M p
|
||||
|
@ -91,7 +91,7 @@ int main (int argc, char** argv)
|
||||
FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c, params);
|
||||
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
|
||||
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
|
||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, false);
|
||||
|
||||
Meofa.refresh(U, RNG5);
|
||||
RealD S = Meofa.S(U); // pdag M p
|
||||
|
@ -73,7 +73,7 @@ int main (int argc, char ** argv)
|
||||
////////////////////////////////////
|
||||
// Modify the gauge field a little
|
||||
////////////////////////////////////
|
||||
RealD dt = 0.0001;
|
||||
RealD dt = 0.002;
|
||||
|
||||
LatticeColourMatrix mommu(&Grid);
|
||||
LatticeColourMatrix forcemu(&Grid);
|
||||
@ -88,13 +88,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
// fourth order exponential approx
|
||||
parallel_for(auto i=mom.begin();i<mom.end();i++){ // exp(pmu dt) * Umu
|
||||
Uprime[i](mu) = U[i](mu) + mom[i](mu)*U[i](mu)*dt
|
||||
+ mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt/2.0)
|
||||
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt/6.0)
|
||||
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt/24.0)
|
||||
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt/120.0)
|
||||
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt*dt/720.0);
|
||||
|
||||
Uprime[i](mu) = U[i](mu) + mom[i](mu)*U[i](mu)*dt ;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user