mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Merge branch 'develop' into feature/distil
* develop: Hadron WeakEye and A2ALoop bug fixes, and WWVVContraction bug fix DiskVector: fix of memory bug triggering segfault when the cache is accessed following a certain pattern MFermion::GaugeProp fix for 4d fields
This commit is contained in:
commit
ec7d96ce3b
@ -1385,17 +1385,18 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
|
|||||||
for(int t=0;t<N_t;t++){
|
for(int t=0;t<N_t;t++){
|
||||||
for(int s=0;s<N_s;s++){
|
for(int s=0;s<N_s;s++){
|
||||||
auto tmp1 = vs[s]._odata[ss];
|
auto tmp1 = vs[s]._odata[ss];
|
||||||
vobj tmp2 = zero;
|
vobj tmp2 = zero;
|
||||||
|
vobj tmp3 = zero;
|
||||||
|
|
||||||
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
|
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
|
||||||
Scalar_v coeff = WW_sd(t,s,d);
|
Scalar_v coeff = WW_sd(t,s,d);
|
||||||
mac(&tmp2 ,& coeff, & vd[d]._odata[ss]);
|
tmp3 = conjugate(vd[d]._odata[ss]);
|
||||||
}
|
mac(&tmp2, &coeff, &tmp3);
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
|
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
tmp2 = conjugate(tmp2);
|
|
||||||
for(int s1=0;s1<Ns;s1++){
|
for(int s1=0;s1<Ns;s1++){
|
||||||
for(int s2=0;s2<Ns;s2++){
|
for(int s2=0;s2<Ns;s2++){
|
||||||
WWVV[t]._odata[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0);
|
WWVV[t]._odata[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0);
|
||||||
|
@ -395,12 +395,26 @@ void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
|
|||||||
auto &freeInd = *freePtr_;
|
auto &freeInd = *freePtr_;
|
||||||
auto &loads = *loadsPtr_;
|
auto &loads = *loadsPtr_;
|
||||||
|
|
||||||
evict();
|
// cache miss, evict and store
|
||||||
index[i] = freeInd.top();
|
if (index.find(i) == index.end())
|
||||||
freeInd.pop();
|
{
|
||||||
cache[index.at(i)] = obj;
|
evict();
|
||||||
loads.push_back(i);
|
index[i] = freeInd.top();
|
||||||
modified[index.at(i)] = false;
|
freeInd.pop();
|
||||||
|
cache[index.at(i)] = obj;
|
||||||
|
loads.push_back(i);
|
||||||
|
modified[index.at(i)] = false;
|
||||||
|
}
|
||||||
|
// cache hit, modify current value
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto pos = std::find(loads.begin(), loads.end(), i);
|
||||||
|
|
||||||
|
cache[index.at(i)] = obj;
|
||||||
|
modified[index.at(i)] = true;
|
||||||
|
loads.erase(pos);
|
||||||
|
loads.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef DV_DEBUG
|
#ifdef DV_DEBUG
|
||||||
std::string msg;
|
std::string msg;
|
||||||
|
@ -89,7 +89,7 @@ std::vector<std::string> TA2ALoop<FImpl>::getInput(void)
|
|||||||
template <typename FImpl>
|
template <typename FImpl>
|
||||||
std::vector<std::string> TA2ALoop<FImpl>::getOutput(void)
|
std::vector<std::string> TA2ALoop<FImpl>::getOutput(void)
|
||||||
{
|
{
|
||||||
std::vector<std::string> out = {};
|
std::vector<std::string> out = {getName()};
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,7 @@ BEGIN_HADRONS_NAMESPACE
|
|||||||
* |
|
* |
|
||||||
* one trace | two traces
|
* one trace | two traces
|
||||||
*
|
*
|
||||||
* one trace : tr(qbr*gOut*qs*adj(gIn)*g5*adj(qbl)*g5*G*loop*G*qbr*gOut)
|
* one trace : tr(qbr*gOut*qs*adj(gIn)*g5*adj(qbl)*g5*G*loop*G)
|
||||||
* two traces: tr(qbr*gOut*qs*adj(gIn)*g5*adj(qbl)*g5*G)*tr(loop*G)
|
* two traces: tr(qbr*gOut*qs*adj(gIn)*g5*adj(qbl)*g5*G)*tr(loop*G)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -118,7 +118,7 @@ template <typename FImpl>
|
|||||||
std::vector<std::string> TWeakEye3pt<FImpl>::getInput(void)
|
std::vector<std::string> TWeakEye3pt<FImpl>::getInput(void)
|
||||||
{
|
{
|
||||||
std::vector<std::string> in = {par().qBarLeft, par().qBarRight,
|
std::vector<std::string> in = {par().qBarLeft, par().qBarRight,
|
||||||
par().qSpectator};
|
par().qSpectator, par().loop};
|
||||||
|
|
||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
@ -170,7 +170,7 @@ void TWeakEye3pt<FImpl>::execute(void)
|
|||||||
|
|
||||||
r.info.op = G.g;
|
r.info.op = G.g;
|
||||||
// one trace
|
// one trace
|
||||||
corr = trace(qbr*gOut*qst*adj(gIn)*g5*adj(qbl)*g5*G*loop*G*qbr*gOut);
|
corr = trace(qbr*gOut*qst*adj(gIn)*g5*adj(qbl)*g5*G*loop*G);
|
||||||
sliceSum(corr, buf, Tp);
|
sliceSum(corr, buf, Tp);
|
||||||
r.corr.clear();
|
r.corr.clear();
|
||||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||||
|
@ -111,13 +111,18 @@ void TGaugeProp<FImpl>::setup(void)
|
|||||||
{
|
{
|
||||||
Ls_ = env().getObjectLs(par().solver);
|
Ls_ = env().getObjectLs(par().solver);
|
||||||
envCreateLat(PropagatorField, getName());
|
envCreateLat(PropagatorField, getName());
|
||||||
envTmpLat(FermionField, "source", Ls_);
|
|
||||||
envTmpLat(FermionField, "sol", Ls_);
|
|
||||||
envTmpLat(FermionField, "tmp");
|
envTmpLat(FermionField, "tmp");
|
||||||
if (Ls_ > 1)
|
if (Ls_ > 1)
|
||||||
{
|
{
|
||||||
|
envTmpLat(FermionField, "source", Ls_);
|
||||||
|
envTmpLat(FermionField, "sol", Ls_);
|
||||||
envCreateLat(PropagatorField, getName() + "_5d", Ls_);
|
envCreateLat(PropagatorField, getName() + "_5d", Ls_);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
envTmpLat(FermionField, "source");
|
||||||
|
envTmpLat(FermionField, "sol");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// execution ///////////////////////////////////////////////////////////////////
|
// execution ///////////////////////////////////////////////////////////////////
|
||||||
|
Loading…
x
Reference in New Issue
Block a user