1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

first attempt to write C terms in clover derivative. Some shifts to be fixed

This commit is contained in:
David Preti 2017-10-05 10:13:53 +02:00
parent 56478d63a5
commit d810e8c8fb
3 changed files with 77 additions and 31 deletions

View File

@ -90,9 +90,10 @@ inline uint64_t cyclecount(void){
}
#elif defined __x86_64__
inline uint64_t cyclecount(void){
return __rdtsc();
//return __rdtsc();
// unsigned int dummy;
// return __rdtscp(&dummy);
return 0; // <- remove this;
}
#else

View File

@ -57,35 +57,37 @@ void PmuStat::start(void)
pmu_start();
++count;
xmemctrs(&mrstart, &mwstart);
tstart = __rdtsc();
//tstart = __rdtsc();
tstart=0;
#endif
}
void PmuStat::enter(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0);
counters[1][t] = __rdpmc(1);
counters[2][t] = __rdpmc((1<<30)|0);
counters[3][t] = __rdpmc((1<<30)|1);
counters[4][t] = __rdpmc((1<<30)|2);
counters[5][t] = __rdtsc();
counters[0][t] = 0;//__rdpmc(0);
counters[1][t] = 0;//__rdpmc(1);
counters[2][t] = 0;//__rdpmc((1<<30)|0);
counters[3][t] = 0;//__rdpmc((1<<30)|1);
counters[4][t] = 0;//__rdpmc((1<<30)|2);
counters[5][t] = 0;//__rdtsc();
#endif
}
void PmuStat::exit(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0) - counters[0][t];
counters[1][t] = __rdpmc(1) - counters[1][t];
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
counters[5][t] = __rdtsc() - counters[5][t];
counters[0][t] = 0;//__rdpmc(0) - counters[0][t];
counters[1][t] = 0;// __rdpmc(1) - counters[1][t];
counters[2][t] = 0;// __rdpmc((1<<30)|0) - counters[2][t];
counters[3][t] = 0;// __rdpmc((1<<30)|1) - counters[3][t];
counters[4][t] = 0;// __rdpmc((1<<30)|2) - counters[4][t];
counters[5][t] = 0;// __rdtsc() - counters[5][t];
#endif
}
void PmuStat::accum(int nthreads)
{
#ifdef __x86_64__
tend = __rdtsc();
// tend = __rdtsc();
tend =0 ;
xmemctrs(&mrend, &mwend);
pmu_stop();
for (int t = 0; t < nthreads; ++t) {

View File

@ -253,25 +253,14 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
}
}
/*
} else {
out = *Clover * in;
}
*/
} // MooeeInternal
// Derivative parts
template <class Impl>
void WilsonCloverFermion<Impl>::MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
{
GaugeField tmp(mat._grid);
conformable(U._grid, V._grid);
@ -287,10 +276,64 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &mat, const FermionField &U, c
// Derivative parts
template <class Impl>
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
{
// Compute the 8 terms of the derivative
assert(0); // not implemented yet
GridBase *grid = mat._grid;
GaugeLinkField Lambda(grid), tmp(grid);
Lambda = zero; //Y*dag(X)+X*dag(Y); // I have to peek spin and decide the color structure
conformable(mat._grid, X._grid);
conformable(Y._grid, X._grid);
std::vector<GaugeLinkField> C1p(Nd,grid), C2p(Nd,grid), C3p(Nd,grid), C4p(Nd,grid);
std::vector<GaugeLinkField> C1m(Nd,grid), C2m(Nd,grid), C3m(Nd,grid), C4m(Nd,grid);
std::vector<GaugeLinkField> U(Nd, mat._grid);
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(mat, mu);
C1p[mu]=zero; C2p[mu]=zero; C3p[mu]=zero; C4p[mu]=zero;
C1m[mu]=zero; C2m[mu]=zero; C3m[mu]=zero; C4m[mu]=zero;
}
for (int mu=0;mu<4;mu++){
for (int nu=0;nu<4;nu++){
// insertion in upper staple
tmp = Impl::CovShiftIdentityBackward(Lambda, nu) * U[nu];
C1p[mu]+= Cshift(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))), mu, 1);
tmp = Impl::CovShiftIdentityForward(Lambda, mu) * U[mu];
C2p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Cshift(U[nu], nu, -1))), mu, 1);
tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu];
C3p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(tmp, nu, -1))), mu, 1);
tmp = Lambda;
C4p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))),mu,1) * tmp;
// insertion in lower staple
tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu];
C1m[mu]+= Cshift(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1);
tmp = Cshift(Cshift(Lambda, nu, 2),mu, 1) * U[mu];
C2m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu ,1);
tmp = Cshift(Lambda, nu, 2) * U[nu];
C3m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu, 1);
tmp = Lambda;
C4m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1)* tmp;
}
}
//Still implementing. Have to be tested, and understood how to project EO
}
// Derivative parts