mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
included pragma map in Lattice_reduction.h
This commit is contained in:
parent
1bda8c47fa
commit
ec2ddda12c
@ -267,18 +267,18 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
||||
autoView( right_v,right, AcceleratorRead);
|
||||
|
||||
// GPU - SIMT lane compliance...
|
||||
accelerator_for( ss, sites, nsimd,{
|
||||
auto x_l = left_v(ss);
|
||||
auto y_l = right_v(ss);
|
||||
//accelerator_for( ss, sites, nsimd,{
|
||||
// auto x_l = left_v(ss);
|
||||
// auto y_l = right_v(ss);
|
||||
// coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
||||
//});
|
||||
#pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
|
||||
#pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
|
||||
for ( uint64_t ss=0;ss<sites;ss++) {
|
||||
auto x_l = left_v[ss];
|
||||
auto y_l = right_v[ss];
|
||||
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
||||
});
|
||||
//#pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
|
||||
//#pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
|
||||
//for ( uint64_t ss=0;ss<sites;ss++) {
|
||||
// auto x_l = left_v[ss];
|
||||
// auto y_l = right_v[ss];
|
||||
// inner_tmp_v[ss]=innerProductD(x_l,y_l);
|
||||
//}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// This is in single precision and fails some tests
|
||||
|
Loading…
Reference in New Issue
Block a user