1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

included pragma map in Lattice_reduction.h

This commit is contained in:
Mohammad Atif 2023-08-27 11:00:56 -04:00
parent 1bda8c47fa
commit ec2ddda12c

View File

@ -267,18 +267,18 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
autoView( right_v,right, AcceleratorRead); autoView( right_v,right, AcceleratorRead);
// GPU - SIMT lane compliance... // GPU - SIMT lane compliance...
accelerator_for( ss, sites, nsimd,{ //accelerator_for( ss, sites, nsimd,{
auto x_l = left_v(ss); // auto x_l = left_v(ss);
auto y_l = right_v(ss); // auto y_l = right_v(ss);
// coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
//});
#pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
#pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
for ( uint64_t ss=0;ss<sites;ss++) {
auto x_l = left_v[ss];
auto y_l = right_v[ss];
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
}); }
//#pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
//#pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
//for ( uint64_t ss=0;ss<sites;ss++) {
// auto x_l = left_v[ss];
// auto y_l = right_v[ss];
// inner_tmp_v[ss]=innerProductD(x_l,y_l);
//}
} }
#endif #endif
// This is in single precision and fails some tests // This is in single precision and fails some tests