diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 68d37cb0..b5819919 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -267,18 +267,18 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & autoView( right_v,right, AcceleratorRead); // GPU - SIMT lane compliance... - accelerator_for( ss, sites, nsimd,{ - auto x_l = left_v(ss); - auto y_l = right_v(ss); + //accelerator_for( ss, sites, nsimd,{ + // auto x_l = left_v(ss); + // auto y_l = right_v(ss); + // coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); + //}); + #pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v ) + #pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait + for ( uint64_t ss=0;ss