1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

first attempt to write C terms in clover derivative. Some shifts to be fixed

This commit is contained in:
David Preti 2017-10-05 10:13:53 +02:00
parent 56478d63a5
commit d810e8c8fb
3 changed files with 77 additions and 31 deletions

View File

@ -90,9 +90,10 @@ inline uint64_t cyclecount(void){
} }
#elif defined __x86_64__ #elif defined __x86_64__
inline uint64_t cyclecount(void){ inline uint64_t cyclecount(void){
return __rdtsc(); //return __rdtsc();
// unsigned int dummy; // unsigned int dummy;
// return __rdtscp(&dummy); // return __rdtscp(&dummy);
return 0; // <- remove this;
} }
#else #else

View File

@ -57,35 +57,37 @@ void PmuStat::start(void)
pmu_start(); pmu_start();
++count; ++count;
xmemctrs(&mrstart, &mwstart); xmemctrs(&mrstart, &mwstart);
tstart = __rdtsc(); //tstart = __rdtsc();
tstart=0;
#endif #endif
} }
void PmuStat::enter(int t) void PmuStat::enter(int t)
{ {
#ifdef __x86_64__ #ifdef __x86_64__
counters[0][t] = __rdpmc(0); counters[0][t] = 0;//__rdpmc(0);
counters[1][t] = __rdpmc(1); counters[1][t] = 0;//__rdpmc(1);
counters[2][t] = __rdpmc((1<<30)|0); counters[2][t] = 0;//__rdpmc((1<<30)|0);
counters[3][t] = __rdpmc((1<<30)|1); counters[3][t] = 0;//__rdpmc((1<<30)|1);
counters[4][t] = __rdpmc((1<<30)|2); counters[4][t] = 0;//__rdpmc((1<<30)|2);
counters[5][t] = __rdtsc(); counters[5][t] = 0;//__rdtsc();
#endif #endif
} }
void PmuStat::exit(int t) void PmuStat::exit(int t)
{ {
#ifdef __x86_64__ #ifdef __x86_64__
counters[0][t] = __rdpmc(0) - counters[0][t]; counters[0][t] = 0;//__rdpmc(0) - counters[0][t];
counters[1][t] = __rdpmc(1) - counters[1][t]; counters[1][t] = 0;// __rdpmc(1) - counters[1][t];
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t]; counters[2][t] = 0;// __rdpmc((1<<30)|0) - counters[2][t];
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t]; counters[3][t] = 0;// __rdpmc((1<<30)|1) - counters[3][t];
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t]; counters[4][t] = 0;// __rdpmc((1<<30)|2) - counters[4][t];
counters[5][t] = __rdtsc() - counters[5][t]; counters[5][t] = 0;// __rdtsc() - counters[5][t];
#endif #endif
} }
void PmuStat::accum(int nthreads) void PmuStat::accum(int nthreads)
{ {
#ifdef __x86_64__ #ifdef __x86_64__
tend = __rdtsc(); // tend = __rdtsc();
tend =0 ;
xmemctrs(&mrend, &mwend); xmemctrs(&mrend, &mwend);
pmu_stop(); pmu_stop();
for (int t = 0; t < nthreads; ++t) { for (int t = 0; t < nthreads; ++t) {

View File

@ -253,25 +253,14 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
} }
} }
/*
} else {
out = *Clover * in;
}
*/
} // MooeeInternal } // MooeeInternal
// Derivative parts // Derivative parts
template <class Impl> template <class Impl>
void WilsonCloverFermion<Impl>::MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) void WilsonCloverFermion<Impl>::MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
{ {
GaugeField tmp(mat._grid); GaugeField tmp(mat._grid);
conformable(U._grid, V._grid); conformable(U._grid, V._grid);
@ -287,10 +276,64 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &mat, const FermionField &U, c
// Derivative parts // Derivative parts
template <class Impl> template <class Impl>
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
{ {
// Compute the 8 terms of the derivative
assert(0); // not implemented yet GridBase *grid = mat._grid;
GaugeLinkField Lambda(grid), tmp(grid);
Lambda = zero; //Y*dag(X)+X*dag(Y); // I have to peek spin and decide the color structure
conformable(mat._grid, X._grid);
conformable(Y._grid, X._grid);
std::vector<GaugeLinkField> C1p(Nd,grid), C2p(Nd,grid), C3p(Nd,grid), C4p(Nd,grid);
std::vector<GaugeLinkField> C1m(Nd,grid), C2m(Nd,grid), C3m(Nd,grid), C4m(Nd,grid);
std::vector<GaugeLinkField> U(Nd, mat._grid);
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(mat, mu);
C1p[mu]=zero; C2p[mu]=zero; C3p[mu]=zero; C4p[mu]=zero;
C1m[mu]=zero; C2m[mu]=zero; C3m[mu]=zero; C4m[mu]=zero;
}
for (int mu=0;mu<4;mu++){
for (int nu=0;nu<4;nu++){
// insertion in upper staple
tmp = Impl::CovShiftIdentityBackward(Lambda, nu) * U[nu];
C1p[mu]+= Cshift(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))), mu, 1);
tmp = Impl::CovShiftIdentityForward(Lambda, mu) * U[mu];
C2p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Cshift(U[nu], nu, -1))), mu, 1);
tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu];
C3p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(tmp, nu, -1))), mu, 1);
tmp = Lambda;
C4p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))),mu,1) * tmp;
// insertion in lower staple
tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu];
C1m[mu]+= Cshift(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1);
tmp = Cshift(Cshift(Lambda, nu, 2),mu, 1) * U[mu];
C2m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu ,1);
tmp = Cshift(Lambda, nu, 2) * U[nu];
C3m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu, 1);
tmp = Lambda;
C4m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1)* tmp;
}
}
//Still implementing. Have to be tested, and understood how to project EO
} }
// Derivative parts // Derivative parts