diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h index b47700ac..6ec2ede9 100644 --- a/lib/qcd/action/fermion/WilsonCompressor.h +++ b/lib/qcd/action/fermion/WilsonCompressor.h @@ -69,39 +69,47 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Compress includes precision change if mpi data is not same */ /*****************************************************/ - inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) { - projector::Proj(buf[o],in,mu,dag); + inline void Compress(SiteHalfSpinor * __restrict__ buf,Integer o,const SiteSpinor &in) { + SiteHalfSpinor tmp; + projector::Proj(tmp,in,mu,dag); + vstream(buf[o],tmp); } /*****************************************************/ /* Exchange includes precision change if mpi data is not same */ /*****************************************************/ - inline void Exchange(SiteHalfSpinor *mp, - SiteHalfSpinor *vp0, - SiteHalfSpinor *vp1, + inline void Exchange(SiteHalfSpinor * __restrict__ mp, + const SiteHalfSpinor * __restrict__ vp0, + const SiteHalfSpinor * __restrict__ vp1, Integer type,Integer o){ - exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); + SiteHalfSpinor tmp1; + SiteHalfSpinor tmp2; + exchange(tmp1,tmp2,vp0[o],vp1[o],type); + vstream(mp[2*o ],tmp1); + vstream(mp[2*o+1],tmp2); } /*****************************************************/ /* Have a decompression step if mpi data is not same */ /*****************************************************/ - inline void Decompress(SiteHalfSpinor *out, - SiteHalfSpinor *in, Integer o) { + inline void Decompress(SiteHalfSpinor * __restrict__ out, + SiteHalfSpinor * __restrict__ in, Integer o) { assert(0); } /*****************************************************/ /* Compress Exchange */ /*****************************************************/ - inline void CompressExchange(SiteHalfSpinor *out0, - SiteHalfSpinor *out1, - const SiteSpinor *in, + inline void CompressExchange(SiteHalfSpinor * __restrict__ out0, + SiteHalfSpinor * __restrict__ out1, + const SiteSpinor * __restrict__ in, Integer j,Integer k, Integer m,Integer type){ SiteHalfSpinor temp1, temp2,temp3,temp4; projector::Proj(temp1,in[k],mu,dag); projector::Proj(temp2,in[m],mu,dag); - exchange(out0[j],out1[j],temp1,temp2,type); + exchange(temp3,temp4,temp1,temp2,type); + vstream(out0[j],temp3); + vstream(out1[j],temp4); } /*****************************************************/ diff --git a/tests/Test_compressed_lanczos_hot_start.cc b/tests/Test_compressed_lanczos_hot_start.cc index 3276d0f8..293506c2 100644 --- a/tests/Test_compressed_lanczos_hot_start.cc +++ b/tests/Test_compressed_lanczos_hot_start.cc @@ -167,7 +167,7 @@ int main (int argc, char ** argv) { RealD mass = Params.mass; RealD M5 = Params.M5; std::vector blockSize = Params.blockSize; - std::vector latt({16,16,16,16}); + std::vector latt({32,32,16,16}); uint64_t vol = Ls*latt[0]*latt[1]*latt[2]*latt[3]; double mat_flop= 2.0*1320.0*vol; // Grids