mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Hadrons: meson field threaded cache copy
This commit is contained in:
parent
89d2fac92e
commit
f4878d3a13
@ -296,7 +296,8 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
+ vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj* ngamma;
|
+ vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj* ngamma;
|
||||||
|
|
||||||
startTimer("cache copy");
|
startTimer("cache copy");
|
||||||
for(int iii=0;iii< N_iii;iii++)
|
|
||||||
|
parallel_for_nest(5)(int iii=0;iii< N_iii;iii++)
|
||||||
for(int jjj=0;jjj< N_jjj;jjj++)
|
for(int jjj=0;jjj< N_jjj;jjj++)
|
||||||
for(int m =0;m< nmom;m++)
|
for(int m =0;m< nmom;m++)
|
||||||
for(int g =0;g< ngamma;g++)
|
for(int g =0;g< ngamma;g++)
|
||||||
@ -310,30 +311,30 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
// IO
|
// IO
|
||||||
if (!par().output.empty())
|
if (!par().output.empty())
|
||||||
{
|
{
|
||||||
double blockSize, ioTime;
|
double blockSize, ioTime;
|
||||||
|
|
||||||
LOG(Message) << "Writing block to disk" << std::endl;
|
LOG(Message) << "Writing block to disk" << std::endl;
|
||||||
ioTime = -getDTimer("IO: write block");
|
ioTime = -getDTimer("IO: write block");
|
||||||
startTimer("IO: total");
|
startTimer("IO: total");
|
||||||
for(int m = 0; m < nmom; m++)
|
for(int m = 0; m < nmom; m++)
|
||||||
for(int g = 0; g < ngamma; g++)
|
for(int g = 0; g < ngamma; g++)
|
||||||
{
|
|
||||||
if ((i == 0) and (j == 0))
|
|
||||||
{
|
{
|
||||||
|
if ((i == 0) and (j == 0))
|
||||||
|
{
|
||||||
startTimer("IO: file creation");
|
startTimer("IO: file creation");
|
||||||
initFile(m, g);
|
initFile(m, g);
|
||||||
stopTimer("IO: file creation");
|
stopTimer("IO: file creation");
|
||||||
}
|
}
|
||||||
startTimer("IO: write block");
|
startTimer("IO: write block");
|
||||||
saveBlock(mfBlock, m, g, i, j);
|
saveBlock(mfBlock, m, g, i, j);
|
||||||
stopTimer("IO: write block");
|
stopTimer("IO: write block");
|
||||||
}
|
}
|
||||||
stopTimer("IO: total");
|
stopTimer("IO: total");
|
||||||
blockSize = static_cast<double>(nmom*ngamma*nt*N_ii*N_jj*sizeof(Complex));
|
blockSize = static_cast<double>(nmom*ngamma*nt*N_ii*N_jj*sizeof(Complex));
|
||||||
ioTime += getDTimer("IO: write block");
|
ioTime += getDTimer("IO: write block");
|
||||||
LOG(Message) << "HDF5 IO done " << blockSize/ioTime*1.0e6/1024/1024
|
LOG(Message) << "HDF5 IO done " << blockSize/ioTime*1.0e6/1024/1024
|
||||||
<< " MB/s" << std::endl;
|
<< " MB/s" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double nodes = env().getGrid()->NodeCount();
|
double nodes = env().getGrid()->NodeCount();
|
||||||
|
@ -174,7 +174,7 @@ void makeMesonFieldBlock(MesonField &mat,
|
|||||||
if (caller) caller->startTimer("contraction: spin trace");
|
if (caller) caller->startTimer("contraction: spin trace");
|
||||||
int pd = grid->_processors[orthogdim];
|
int pd = grid->_processors[orthogdim];
|
||||||
int pc = grid->_processor_coor[orthogdim];
|
int pc = grid->_processor_coor[orthogdim];
|
||||||
parallel_for_nest2(int lt=0;lt<ld;lt++)
|
parallel_for_nest(2)(int lt=0;lt<ld;lt++)
|
||||||
{
|
{
|
||||||
for(int pt=0;pt<pd;pt++)
|
for(int pt=0;pt<pd;pt++)
|
||||||
{
|
{
|
||||||
|
@ -94,7 +94,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_
|
|||||||
int n1=rhs._grid->_slice_stride[dimension];
|
int n1=rhs._grid->_slice_stride[dimension];
|
||||||
|
|
||||||
if ( cbmask ==0x3){
|
if ( cbmask ==0x3){
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
parallel_for_nest(2)(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o = n*n1;
|
int o = n*n1;
|
||||||
@ -110,7 +110,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_
|
|||||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||||
// Test_cshift_red_black code.
|
// Test_cshift_red_black code.
|
||||||
std::cout << " Dense packed buffer WARNING " <<std::endl;
|
std::cout << " Dense packed buffer WARNING " <<std::endl;
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
parallel_for_nest(2)(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o=n*n1;
|
int o=n*n1;
|
||||||
@ -191,7 +191,7 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<typ
|
|||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
if(cbmask ==0x3 ) {
|
if(cbmask ==0x3 ) {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
parallel_for_nest(2)(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*rhs._grid->_slice_stride[dimension];
|
int o = n*rhs._grid->_slice_stride[dimension];
|
||||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||||
|
@ -522,7 +522,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
|
|||||||
|
|
||||||
tensor_reduced at; at=av;
|
tensor_reduced at; at=av;
|
||||||
|
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
parallel_for_nest(2)(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int ss= so+n*stride+b;
|
int ss= so+n*stride+b;
|
||||||
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
|
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
|
||||||
|
@ -41,12 +41,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||||
|
#define PARALLEL_NESTED_LOOP(n) _Pragma("omp parallel for collapse(n)")
|
||||||
#define PARALLEL_REGION _Pragma("omp parallel")
|
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||||
#define PARALLEL_CRITICAL _Pragma("omp critical")
|
#define PARALLEL_CRITICAL _Pragma("omp critical")
|
||||||
#else
|
#else
|
||||||
#define PARALLEL_FOR_LOOP
|
#define PARALLEL_FOR_LOOP
|
||||||
#define PARALLEL_FOR_LOOP_INTERN
|
#define PARALLEL_FOR_LOOP_INTERN
|
||||||
#define PARALLEL_NESTED_LOOP2
|
#define PARALLEL_NESTED_LOOP2
|
||||||
|
#define PARALLEL_NESTED_LOOP(n)
|
||||||
#define PARALLEL_REGION
|
#define PARALLEL_REGION
|
||||||
#define PARALLEL_CRITICAL
|
#define PARALLEL_CRITICAL
|
||||||
#endif
|
#endif
|
||||||
@ -54,7 +56,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define parallel_region PARALLEL_REGION
|
#define parallel_region PARALLEL_REGION
|
||||||
#define parallel_for PARALLEL_FOR_LOOP for
|
#define parallel_for PARALLEL_FOR_LOOP for
|
||||||
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
|
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
|
||||||
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
|
#define parallel_for_nest(n) PARALLEL_NESTED_LOOP(n) for
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user