mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-18 07:47:06 +01:00
Improvements to the assembler interface that let us move chunks of the
site and s loop into the kernels. This will save on function call overhead and guarantee L2 prefetching strategy is right since OMP can't distribute the sub-chunks of work.
This commit is contained in:
@ -296,12 +296,12 @@ PARALLEL_FOR_LOOP
|
||||
if ( dag == DaggerYes ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out);
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,1,1,in,out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,in,out);
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,1,1,in,out);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
Reference in New Issue
Block a user