1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-19 16:55:37 +01:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Peter Boyle
d299c86633 Std::asin,acos 2024-06-11 16:41:23 -04:00
Peter Boyle
6ce52092e8 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2024-06-11 15:16:58 -04:00
Peter Boyle
b5926c1d21 Broadcast time info 2024-06-11 15:16:25 -04:00
Peter Boyle
9563238e9b Force initial to identity 2024-06-11 17:51:58 +00:00
3 changed files with 14 additions and 6 deletions

View File

@ -13,6 +13,7 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
mysum.resize(1); mysum.resize(1);
sobj *mysum_p = & mysum[0]; sobj *mysum_p = & mysum[0];
sobj identity; zeroit(identity); sobj identity; zeroit(identity);
mysum[0] = identity;
sobj ret ; sobj ret ;
Integer nsimd= vobj::Nsimd(); Integer nsimd= vobj::Nsimd();
@ -80,13 +81,14 @@ template<class Word> Word svm_xor(Word *vec,uint64_t L)
d_sum.resize(1); d_sum.resize(1);
Word *d_sum_p=&d_sum[0]; Word *d_sum_p=&d_sum[0];
Word identity; identity=0; Word identity; identity=0;
d_sum[0] = identity;
const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() }); const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList); auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList);
cgh.parallel_for(cl::sycl::range<1>{L}, cgh.parallel_for(cl::sycl::range<1>{L},
Reduction, Reduction,
[=] (cl::sycl::id<1> index, auto &sum) { [=] (cl::sycl::id<1> index, auto &sum) {
sum ^=vec[index]; sum^=vec[index];
}); });
}); });
theGridAccelerator->wait(); theGridAccelerator->wait();

View File

@ -99,6 +99,8 @@ using std::log;
using std::exp; using std::exp;
using std::sin; using std::sin;
using std::cos; using std::cos;
using std::asin;
using std::acos;
accelerator_inline RealF conjugate(const RealF & r){ return r; } accelerator_inline RealF conjugate(const RealF & r){ return r; }

View File

@ -142,7 +142,9 @@ int main (int argc, char ** argv)
std:: cout << " CG site flops = "<< CGsiteflops <<std::endl; std:: cout << " CG site flops = "<< CGsiteflops <<std::endl;
int iters; int iters;
time_t now;
time_t start = time(NULL); time_t start = time(NULL);
UGrid->Broadcast(0,(void *)&start,sizeof(start));
FlightRecorder::ContinueOnFail = 0; FlightRecorder::ContinueOnFail = 0;
FlightRecorder::PrintEntireLog = 0; FlightRecorder::PrintEntireLog = 0;
@ -162,9 +164,9 @@ int main (int argc, char ** argv)
} }
std::cerr << "******************* SINGLE PRECISION SOLVE "<<iter<<std::endl; std::cerr << "******************* SINGLE PRECISION SOLVE "<<iter<<std::endl;
result_o = Zero(); result_o = Zero();
t1=usecond(); t1=usecond();
mCG(src_o,result_o); mCG(src_o,result_o);
t2=usecond(); t2=usecond();
iters = mCG.TotalInnerIterations; //Number of inner CG iterations iters = mCG.TotalInnerIterations; //Number of inner CG iterations
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters; flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
flops+= CGsiteflops*FrbGrid->gSites()*iters; flops+= CGsiteflops*FrbGrid->gSites()*iters;
@ -176,7 +178,8 @@ int main (int argc, char ** argv)
std::cout << " FlightRecorder is OK! "<<std::endl; std::cout << " FlightRecorder is OK! "<<std::endl;
iter ++; iter ++;
} while (time(NULL) < (start + nsecs/10) ); now = time(NULL); UGrid->Broadcast(0,(void *)&now,sizeof(now));
} while (now < (start + nsecs/10) );
std::cout << GridLogMessage << "::::::::::::: Starting double precision CG" << std::endl; std::cout << GridLogMessage << "::::::::::::: Starting double precision CG" << std::endl;
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000); ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
@ -189,7 +192,7 @@ int main (int argc, char ** argv)
} }
std::cerr << "******************* DOUBLE PRECISION SOLVE "<<i<<std::endl; std::cerr << "******************* DOUBLE PRECISION SOLVE "<<i<<std::endl;
result_o_2 = Zero(); result_o_2 = Zero();
t1=usecond(); t1=usecond();
CG(HermOpEO,src_o,result_o_2); CG(HermOpEO,src_o,result_o_2);
t2=usecond(); t2=usecond();
iters = CG.IterationsToComplete; iters = CG.IterationsToComplete;
@ -201,8 +204,9 @@ int main (int argc, char ** argv)
std::cout << " DoublePrecision error count "<< FlightRecorder::ErrorCount()<<std::endl; std::cout << " DoublePrecision error count "<< FlightRecorder::ErrorCount()<<std::endl;
assert(FlightRecorder::ErrorCount()==0); assert(FlightRecorder::ErrorCount()==0);
std::cout << " FlightRecorder is OK! "<<std::endl; std::cout << " FlightRecorder is OK! "<<std::endl;
now = time(NULL); UGrid->Broadcast(0,(void *)&now,sizeof(now));
i++; i++;
} while (time(NULL) < (start + nsecs) ); } while (now < (start + nsecs) );
LatticeFermionD diff_o(FrbGrid); LatticeFermionD diff_o(FrbGrid);
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);