#include using namespace std; using namespace Grid; using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); const int Nvec=4; typedef Lattice< iVector< vReal,Nvec> > LatticeVec; typedef iVector Vec; std::vector simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); int threads = GridThread::GetThreads(); std::cout< latt_size ({2*mpi_layout[0],2*mpi_layout[1],4*mpi_layout[2],lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]*threads; Vec tsum; tsum = zero; std::vector stop(threads); Vector sum(threads); std::vector x(threads,&Grid); double start=usecond(); PARALLEL_FOR_LOOP for(int t=0;tmax_stop ) max_stop=stop[t]; } double max_time = (max_stop-start)/Nloop*1000; double min_time = (min_stop-start)/Nloop*1000; double bytes=vol*Nvec*sizeof(Real); std::cout<