mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-10 06:00:45 +01:00
Aim to reduce the amount of cuda and other code variations floating around all over the place. Will move GpuInit iinto Accelerator.cc from Init.cc Need to worry about SharedMemoryMPI.cc and the Peer2Peer windows
128 lines
3.5 KiB
C++
128 lines
3.5 KiB
C++
/*************************************************************************************
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
Source file: ./lib/ThreadReduction.h
|
|
|
|
Copyright (C) 2015
|
|
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
*************************************************************************************/
|
|
/* END LEGAL */
|
|
#pragma once
|
|
|
|
// Introduce a class to gain deterministic bit reproducible reduction.
|
|
// make static; perhaps just a namespace is required.
|
|
NAMESPACE_BEGIN(Grid);
|
|
|
|
class GridThread {
|
|
public:
|
|
static int _threads;
|
|
static int _hyperthreads;
|
|
static int _cores;
|
|
|
|
static void SetCores(int cr) {
|
|
#ifdef GRID_OMP
|
|
_cores = cr;
|
|
#else
|
|
_cores = 1;
|
|
#endif
|
|
}
|
|
static void SetThreads(int thr) {
|
|
#ifdef GRID_OMP
|
|
_threads = MIN(thr,omp_get_max_threads()) ;
|
|
omp_set_num_threads(_threads);
|
|
#else
|
|
_threads = 1;
|
|
#endif
|
|
};
|
|
static void SetMaxThreads(void) {
|
|
#ifdef GRID_OMP
|
|
_threads = omp_get_max_threads();
|
|
omp_set_num_threads(_threads);
|
|
#else
|
|
_threads = 1;
|
|
#endif
|
|
};
|
|
static int GetHyperThreads(void) { assert(_threads%_cores ==0); return _threads/_cores; };
|
|
static int GetCores(void) { return _cores; };
|
|
static int GetThreads(void) { return _threads; };
|
|
static int SumArraySize(void) {return _threads;};
|
|
|
|
static void GetWork(int nwork, int me, int & mywork, int & myoff){
|
|
GetWork(nwork,me,mywork,myoff,_threads);
|
|
}
|
|
static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){
|
|
int basework = nwork/units;
|
|
int backfill = units-(nwork%units);
|
|
if ( me >= units ) {
|
|
mywork = myoff = 0;
|
|
} else {
|
|
mywork = (nwork+me)/units;
|
|
myoff = basework * me;
|
|
if ( me > backfill )
|
|
myoff+= (me-backfill);
|
|
}
|
|
return;
|
|
};
|
|
|
|
static void GetWorkBarrier(int nwork, int &me, int & mywork, int & myoff){
|
|
me = ThreadBarrier();
|
|
GetWork(nwork,me,mywork,myoff);
|
|
};
|
|
|
|
static int ThreadBarrier(void) {
|
|
#ifdef GRID_OMP
|
|
#pragma omp barrier
|
|
return omp_get_thread_num();
|
|
#else
|
|
return 0;
|
|
#endif
|
|
};
|
|
|
|
template<class obj> static void ThreadSum( std::vector<obj> &sum_array,obj &val,int me){
|
|
sum_array[me] = val;
|
|
val=Zero();
|
|
ThreadBarrier();
|
|
for(int i=0;i<_threads;i++) val+= sum_array[i];
|
|
ThreadBarrier();
|
|
}
|
|
|
|
static void bcopy(const void *src, void *dst, size_t len) {
|
|
#ifdef GRID_OMP
|
|
#pragma omp parallel
|
|
{
|
|
const char *c_src =(char *) src;
|
|
char *c_dest=(char *) dst;
|
|
int me,mywork,myoff;
|
|
GridThread::GetWorkBarrier(len,me, mywork,myoff);
|
|
bcopy(&c_src[myoff],&c_dest[myoff],mywork);
|
|
}
|
|
#else
|
|
bcopy(src,dst,len);
|
|
#endif
|
|
}
|
|
|
|
|
|
};
|
|
|
|
NAMESPACE_END(Grid);
|
|
|