mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
included pragma map in Lattice_reduction.h
This commit is contained in:
parent
1bda8c47fa
commit
ec2ddda12c
@ -267,18 +267,18 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
|||||||
autoView( right_v,right, AcceleratorRead);
|
autoView( right_v,right, AcceleratorRead);
|
||||||
|
|
||||||
// GPU - SIMT lane compliance...
|
// GPU - SIMT lane compliance...
|
||||||
accelerator_for( ss, sites, nsimd,{
|
//accelerator_for( ss, sites, nsimd,{
|
||||||
auto x_l = left_v(ss);
|
// auto x_l = left_v(ss);
|
||||||
auto y_l = right_v(ss);
|
// auto y_l = right_v(ss);
|
||||||
|
// coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
||||||
|
//});
|
||||||
|
#pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
|
||||||
|
#pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
|
||||||
|
for ( uint64_t ss=0;ss<sites;ss++) {
|
||||||
|
auto x_l = left_v[ss];
|
||||||
|
auto y_l = right_v[ss];
|
||||||
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
||||||
});
|
}
|
||||||
//#pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
|
|
||||||
//#pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
|
|
||||||
//for ( uint64_t ss=0;ss<sites;ss++) {
|
|
||||||
// auto x_l = left_v[ss];
|
|
||||||
// auto y_l = right_v[ss];
|
|
||||||
// inner_tmp_v[ss]=innerProductD(x_l,y_l);
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// This is in single precision and fails some tests
|
// This is in single precision and fails some tests
|
||||||
|
Loading…
Reference in New Issue
Block a user