mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 04:37:05 +01:00
I have made the Cshift work successfully with open mp threading in
every routine. Collapse(2) is now working under clang-omp++.
This commit is contained in:
10
lib/Grid.h
10
lib/Grid.h
@ -24,21 +24,21 @@
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#define MIN(x,y) ((x)>(y)?(y):(x))
|
||||
#endif
|
||||
|
||||
#include <Grid_config.h>
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Tunable header includes
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#define OMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MALLOC_MALLOC_H
|
||||
#include <malloc/malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
@ -250,7 +250,11 @@ namespace Grid {
|
||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||
int words = sizeof(cobj)/sizeof(vector_type);
|
||||
|
||||
/* FIXME ALTERNATE BUFFER DETERMINATION ; possibly slow to allocate*/
|
||||
/*
|
||||
* possibly slow to allocate
|
||||
* Doesn't matter in this test, but may want to preallocate in the
|
||||
* dirac operators
|
||||
*/
|
||||
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
||||
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
|
@ -1,13 +1,17 @@
|
||||
#ifndef GRID_THREADS_H
|
||||
#define GRID_THREADS_H
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#ifdef _OPENMP
|
||||
#define GRID_OMP
|
||||
#endif
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#include <omp.h>
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for")
|
||||
#define PARALLEL_NESTED_LOOP(n) _Pragma("omp parallel for collapse(" #n ")")
|
||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP
|
||||
#define PARALLEL_NESTED_LOOP(n)
|
||||
#define PARALLEL_NESTED_LOOP2
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
@ -20,7 +24,7 @@ class GridThread {
|
||||
static int _threads;
|
||||
|
||||
static void SetThreads(int thr) {
|
||||
#ifdef HAVE_OPENMP
|
||||
#ifdef GRID_OMP
|
||||
_threads = MIN(thr,omp_get_max_threads()) ;
|
||||
omp_set_num_threads(_threads);
|
||||
#else
|
||||
@ -28,7 +32,7 @@ class GridThread {
|
||||
#endif
|
||||
};
|
||||
static void SetMaxThreads(void) {
|
||||
#ifdef HAVE_OPENMP
|
||||
#ifdef GRID_OMP
|
||||
_threads = omp_get_max_threads();
|
||||
omp_set_num_threads(_threads);
|
||||
#else
|
||||
@ -58,7 +62,7 @@ class GridThread {
|
||||
};
|
||||
|
||||
static int ThreadBarrier(void) {
|
||||
#ifdef HAVE_OPENMP
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp barrier
|
||||
return omp_get_thread_num();
|
||||
#else
|
||||
|
@ -26,16 +26,15 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
PARALLEL_NESTED_LOOP(2)
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
int o = n*rhs._grid->_slice_stride[dimension];
|
||||
int o = n*rhs._grid->_slice_stride[dimension];
|
||||
int bo = n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb &cbmask ) {
|
||||
buffer[bo]=compress(rhs._odata[so+o+b]);
|
||||
bo++;
|
||||
buffer[bo+b]=compress(rhs._odata[so+o+b]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -55,9 +54,8 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
PARALLEL_NESTED_LOOP(2)
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
|
||||
@ -104,15 +102,15 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
PARALLEL_NESTED_LOOP(2)
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
int o=n*rhs._grid->_slice_stride[dimension];
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int bo =n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
rhs._odata[so+o+b]=buffer[bo++];
|
||||
rhs._odata[so+o+b]=buffer[bo+b];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -131,7 +129,7 @@ PARALLEL_NESTED_LOOP(2)
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
PARALLEL_NESTED_LOOP(2)
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
|
||||
@ -160,7 +158,7 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
|
||||
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
PARALLEL_NESTED_LOOP(2)
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
|
||||
@ -185,7 +183,7 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &r
|
||||
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
PARALLEL_NESTED_LOOP(2)
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
|
@ -1,10 +1,6 @@
|
||||
#ifndef _GRID_CSHIFT_MPI_H_
|
||||
#define _GRID_CSHIFT_MPI_H_
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#define MIN(x,y) ((x)>(y)?(y):(x))
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
@ -100,22 +100,21 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
vHalfSpinColourVector chi;
|
||||
vSpinColourVector result;
|
||||
vHalfSpinColourVector Uchi;
|
||||
vHalfSpinColourVector *chi_p;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<grid->oSites();sss++){
|
||||
|
||||
int ss = sss;
|
||||
int ssu= sss;
|
||||
//int ss = Stencil._LebesgueReorder[sss];
|
||||
int ssu= ss;
|
||||
// int ss = Stencil._LebesgueReorder[sss];
|
||||
|
||||
// Xp
|
||||
offset = Stencil._offsets [Xp][ss];
|
||||
local = Stencil._is_local[Xp][ss];
|
||||
perm = Stencil._permute[Xp][ss];
|
||||
ptype = Stencil._permute_type[Xp];
|
||||
chi_p = &comm_buf[offset];
|
||||
|
||||
if ( local && perm )
|
||||
{
|
||||
spProjXp(tmp,in._odata[offset]);
|
||||
|
Reference in New Issue
Block a user