1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

accelerator loop, copy Opt into the GPU

This commit is contained in:
paboyle 2018-02-02 11:34:37 +00:00
parent 14ba20898a
commit a308dff410

View File

@ -430,17 +430,18 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
} }
// do the compute // do the compute
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) { if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) { for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
} }
} else { } else {
for (int ss = myblock; ss < myblock+myn; ++ss) { for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
} }
} }
ptime = usecond() - start; ptime = usecond() - start;
@ -462,19 +463,21 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
DhopFaceTime+=usecond(); DhopFaceTime+=usecond();
DhopComputeTime2-=usecond(); DhopComputeTime2-=usecond();
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) { if (dag == DaggerYes) {
int sz=st.surface_list.size(); int sz=st.surface_list.size();
int Opt = WilsonKernelsStatic::Opt;
thread_loop( (int ss = 0; ss < sz; ss++) ,{ thread_loop( (int ss = 0; ss < sz; ss++) ,{
int sU = st.surface_list[ss]; int sU = st.surface_list[ss];
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}); });
} else { } else {
int sz=st.surface_list.size(); int sz=st.surface_list.size();
thread_loop( (int ss = 0; ss < sz; ss++) ,{ thread_loop( (int ss = 0; ss < sz; ss++) ,{
int sU = st.surface_list[ss]; int sU = st.surface_list[ss];
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}); });
} }
DhopComputeTime2+=usecond(); DhopComputeTime2+=usecond();
@ -501,17 +504,18 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
DhopComputeTime-=usecond(); DhopComputeTime-=usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion // Dhop takes the 4d grid from U, and makes a 5d index for fermion
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) { if (dag == DaggerYes) {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{ accelerator_loop( ss, U, {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
}); });
} else { } else {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{ accelerator_loop( ss, U , {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
}); });
} }
DhopComputeTime+=usecond(); DhopComputeTime+=usecond();