mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Thread loop constructs changing a little
This commit is contained in:
parent
462900b48d
commit
cb336aa8f8
@ -170,7 +170,7 @@ public:
|
|||||||
if ((MatLeft::Options == Eigen::RowMajor) and
|
if ((MatLeft::Options == Eigen::RowMajor) and
|
||||||
(MatRight::Options == Eigen::ColMajor))
|
(MatRight::Options == Eigen::ColMajor))
|
||||||
{
|
{
|
||||||
thread_loop( (unsigned int r = 0; r < a.rows(); ++r),
|
thread_for(r,a.rows(),
|
||||||
{
|
{
|
||||||
C tmp;
|
C tmp;
|
||||||
#ifdef USE_MKL
|
#ifdef USE_MKL
|
||||||
@ -186,7 +186,7 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
thread_loop( (unsigned int c = 0; c < a.cols(); ++c),
|
thread_for(c,a.cols(),
|
||||||
{
|
{
|
||||||
C tmp;
|
C tmp;
|
||||||
#ifdef USE_MKL
|
#ifdef USE_MKL
|
||||||
@ -646,13 +646,14 @@ void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
|
|||||||
bytes += kernel.bytes(N_iii, N_jjj);
|
bytes += kernel.bytes(N_iii, N_jjj);
|
||||||
|
|
||||||
START_TIMER("cache copy");
|
START_TIMER("cache copy");
|
||||||
thread_loop_collapse( 5, (int e =0;e<next_;e++),
|
thread_for_collapse( 5,e,next_,{
|
||||||
for(int s =0;s< nstr_;s++)
|
for(int s =0;s< nstr_;s++)
|
||||||
for(int t =0;t< nt_;t++)
|
for(int t =0;t< nt_;t++)
|
||||||
for(int iii=0;iii< N_iii;iii++)
|
for(int iii=0;iii< N_iii;iii++)
|
||||||
for(int jjj=0;jjj< N_jjj;jjj++)
|
for(int jjj=0;jjj< N_jjj;jjj++)
|
||||||
{
|
{
|
||||||
mBlock(e,s,t,ii+iii,jj+jjj) = mCacheBlock(e,s,t,iii,jjj);
|
mBlock(e,s,t,ii+iii,jj+jjj) = mCacheBlock(e,s,t,iii,jjj);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
STOP_TIMER("cache copy");
|
STOP_TIMER("cache copy");
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/Global.hpp>
|
#include <Hadrons/Global.hpp>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace QCD;
|
|
||||||
using namespace Hadrons;
|
using namespace Hadrons;
|
||||||
|
|
||||||
HadronsLogger Hadrons::HadronsLogError(1,"Error");
|
HadronsLogger Hadrons::HadronsLogError(1,"Error");
|
||||||
|
@ -50,7 +50,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
|
|
||||||
#define BEGIN_HADRONS_NAMESPACE \
|
#define BEGIN_HADRONS_NAMESPACE \
|
||||||
namespace Grid {\
|
namespace Grid {\
|
||||||
using namespace QCD;\
|
|
||||||
namespace Hadrons {\
|
namespace Hadrons {\
|
||||||
using Grid::operator<<;\
|
using Grid::operator<<;\
|
||||||
using Grid::operator>>;
|
using Grid::operator>>;
|
||||||
|
@ -140,7 +140,7 @@ void TFourQuark<FImpl1, FImpl2>::tensorprod(LatticeSpinColourSpinColourMatrix &l
|
|||||||
auto lret_v = lret.View();
|
auto lret_v = lret.View();
|
||||||
auto a_v = a.View();
|
auto a_v = a.View();
|
||||||
auto b_v = b.View();
|
auto b_v = b.View();
|
||||||
thread_loop( (auto site=lret_v.begin();site<lret_v.end();site++) ,{
|
thread_foreach( site,lret_v,{
|
||||||
vTComplex left;
|
vTComplex left;
|
||||||
for(int si=0; si < Ns; ++si){
|
for(int si=0; si < Ns; ++si){
|
||||||
for(int sj=0; sj < Ns; ++sj){
|
for(int sj=0; sj < Ns; ++sj){
|
||||||
|
@ -28,7 +28,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/TimerArray.hpp>
|
#include <Hadrons/TimerArray.hpp>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace QCD;
|
|
||||||
using namespace Hadrons;
|
using namespace Hadrons;
|
||||||
|
|
||||||
void TimerArray::startTimer(const std::string &name)
|
void TimerArray::startTimer(const std::string &name)
|
||||||
|
@ -30,7 +30,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/TimerArray.hpp>
|
#include <Hadrons/TimerArray.hpp>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace QCD;
|
|
||||||
using namespace Hadrons;
|
using namespace Hadrons;
|
||||||
|
|
||||||
#define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
|
#define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
|
||||||
@ -353,11 +352,12 @@ int main(int argc, char* argv[])
|
|||||||
|
|
||||||
tAr.startTimer("Transpose caching");
|
tAr.startTimer("Transpose caching");
|
||||||
lastTerm[t].resize(ref.rows(), ref.cols());
|
lastTerm[t].resize(ref.rows(), ref.cols());
|
||||||
thread_loop( (unsigned int j = 0; j < ref.cols(); ++j),
|
thread_for( j,ref.cols(),{
|
||||||
for (unsigned int i = 0; i < ref.rows(); ++i)
|
for (unsigned int i = 0; i < ref.rows(); ++i)
|
||||||
{
|
{
|
||||||
lastTerm[t](i, j) = ref(i, j);
|
lastTerm[t](i, j) = ref(i, j);
|
||||||
});
|
}
|
||||||
|
});
|
||||||
tAr.stopTimer("Transpose caching");
|
tAr.stopTimer("Transpose caching");
|
||||||
}
|
}
|
||||||
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
|
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
|
||||||
|
@ -205,7 +205,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
auto nr = a.rows(), nc = a.cols();
|
auto nr = a.rows(), nc = a.cols();
|
||||||
|
|
||||||
res = 0.;
|
res = 0.;
|
||||||
thread_loop( (unsigned int i = 0; i < nr; ++i),
|
thread_for(i,nr,
|
||||||
{
|
{
|
||||||
ComplexD tmp = 0.;
|
ComplexD tmp = 0.;
|
||||||
|
|
||||||
@ -225,7 +225,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
auto nr = a.rows(), nc = a.cols();
|
auto nr = a.rows(), nc = a.cols();
|
||||||
|
|
||||||
res = 0.;
|
res = 0.;
|
||||||
thread_loop( (unsigned int j = 0; j < nc; ++j),
|
thread_for(j,nc,
|
||||||
{
|
{
|
||||||
ComplexD tmp = 0.;
|
ComplexD tmp = 0.;
|
||||||
|
|
||||||
@ -248,7 +248,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
thread_loop( (unsigned int r = 0; r < a.rows(); ++r),
|
thread_for(r,a.rows(),
|
||||||
{
|
{
|
||||||
ComplexD tmp;
|
ComplexD tmp;
|
||||||
|
|
||||||
@ -263,7 +263,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
thread_loop( (unsigned int c = 0; c < a.cols(); ++c),
|
thread_for(c,a.cols(),
|
||||||
{
|
{
|
||||||
ComplexD tmp;
|
ComplexD tmp;
|
||||||
|
|
||||||
@ -284,7 +284,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
thread_loop( (unsigned int r = 0; r < a.rows(); ++r),
|
thread_for(r,a.rows()
|
||||||
{
|
{
|
||||||
ComplexD tmp;
|
ComplexD tmp;
|
||||||
|
|
||||||
@ -299,7 +299,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
thread_loop( (unsigned int c = 0; c < a.cols(); ++c),
|
thread_for(c,a.cols(),
|
||||||
{
|
{
|
||||||
ComplexD tmp;
|
ComplexD tmp;
|
||||||
|
|
||||||
|
@ -29,7 +29,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/Environment.hpp>
|
#include <Hadrons/Environment.hpp>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace QCD;
|
|
||||||
using namespace Hadrons;
|
using namespace Hadrons;
|
||||||
|
|
||||||
template <typename FOut, typename FIn>
|
template <typename FOut, typename FIn>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user