mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-05 11:45:56 +01:00
debug new AcceleratorView
This commit is contained in:
parent
2a1387e992
commit
04863f8f38
@ -9,6 +9,7 @@ Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: Christoph Lehner <christoph@lhnr.de
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -94,7 +95,7 @@ const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
||||
{
|
||||
auto view = arg.View();
|
||||
auto view = arg.AcceleratorView(ViewRead);
|
||||
return view[ss];
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -36,9 +37,9 @@ NAMESPACE_BEGIN(Grid);
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
@ -55,9 +56,9 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -72,9 +73,9 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -88,9 +89,9 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -107,8 +108,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
mult(&tmp,&lhs_v(ss),&rhs);
|
||||
@ -120,8 +121,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -134,8 +135,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -147,8 +148,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -164,8 +165,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -178,8 +179,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -192,8 +193,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -205,8 +206,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -220,9 +221,9 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto x_v = x.AcceleratorView(ViewRead);
|
||||
auto y_v = y.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||
auto tmp = a*x_v(ss)+y_v(ss);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
@ -233,9 +234,9 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto x_v = x.AcceleratorView(ViewRead);
|
||||
auto y_v = y.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||
auto tmp = a*x_v(ss)+b*y_v(ss);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
|
@ -54,8 +54,20 @@ void accelerator_inline conformable(GridBase *lhs,GridBase *rhs)
|
||||
// Advise the LatticeAccelerator class
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum LatticeAcceleratorAdvise {
|
||||
AdviseInfrequentUse = 0x1 // Advise that the data is used infrequently. This can
|
||||
// significantly influence performance of bulk storage.
|
||||
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
||||
// significantly influence performance of bulk storage.
|
||||
AdviseReadMostly = 0x2, // Data will mostly be read. On some architectures
|
||||
// enables read-only copies of memory to be kept on
|
||||
// host and device.
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// View Access Mode
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum ViewMode {
|
||||
ViewRead = 0x1,
|
||||
ViewWrite = 0x2,
|
||||
ViewReadWrite = 0x3
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
@ -91,6 +103,29 @@ public:
|
||||
if (advise & AdviseInfrequentUse) {
|
||||
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId);
|
||||
}
|
||||
if (advise & AdviseReadMostly) {
|
||||
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
int target;
|
||||
cudaGetDevice(&target);
|
||||
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),target);
|
||||
std::cout<< GridLogMessage << "To Device " << target << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
|
||||
std::cout<< GridLogMessage << "To Host" << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
@ -225,9 +260,23 @@ public:
|
||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||
// in device lambdas
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
LatticeView<vobj> View (void) const
|
||||
LatticeView<vobj> View (void) const // deprecated, should pick AcceleratorView for accelerator_for
|
||||
{ // and HostView for thread_for
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
return accessor;
|
||||
}
|
||||
|
||||
LatticeView<vobj> AcceleratorView(int mode = ViewReadWrite) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
accessor.AcceleratorPrefetch(mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
LatticeView<vobj> HostView(int mode = ViewReadWrite) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
accessor.HostPrefetch(mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
@ -251,7 +300,7 @@ public:
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
vstream(me[ss],tmp);
|
||||
@ -270,7 +319,7 @@ public:
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
vstream(me[ss],tmp);
|
||||
@ -288,7 +337,7 @@ public:
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
auto me = View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
vstream(me[ss],tmp);
|
||||
@ -399,8 +448,9 @@ public:
|
||||
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
||||
conformable(*this,r);
|
||||
this->checkerboard = r.Checkerboard();
|
||||
auto me = View();
|
||||
auto him= r.View();
|
||||
std::cout << GridLogMessage << "Copy other" << std::endl;
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto him= r.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
@ -413,8 +463,9 @@ public:
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
this->checkerboard = r.Checkerboard();
|
||||
conformable(*this,r);
|
||||
auto me = View();
|
||||
auto him= r.View();
|
||||
std::cout << GridLogMessage << "Copy same" << std::endl;
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto him= r.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
|
@ -103,8 +103,8 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
||||
GridBase *grid = left.Grid();
|
||||
|
||||
// Might make all code paths go this way.
|
||||
auto left_v = left.View();
|
||||
auto right_v=right.View();
|
||||
auto left_v = left.AcceleratorView(ViewRead);
|
||||
auto right_v=right.AcceleratorView(ViewRead);
|
||||
|
||||
const uint64_t nsimd = grid->Nsimd();
|
||||
const uint64_t sites = grid->oSites();
|
||||
@ -175,9 +175,9 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
||||
|
||||
GridBase *grid = x.Grid();
|
||||
|
||||
auto x_v=x.View();
|
||||
auto y_v=y.View();
|
||||
auto z_v=z.View();
|
||||
auto x_v=x.AcceleratorView(ViewRead);
|
||||
auto y_v=y.AcceleratorView(ViewRead);
|
||||
auto z_v=z.AcceleratorView(ViewWrite);
|
||||
|
||||
const uint64_t nsimd = grid->Nsimd();
|
||||
const uint64_t sites = grid->oSites();
|
||||
@ -224,8 +224,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Latti
|
||||
|
||||
GridBase *grid = left.Grid();
|
||||
|
||||
auto left_v=left.View();
|
||||
auto right_v=right.View();
|
||||
auto left_v=left.AcceleratorView(ViewRead);
|
||||
auto right_v=right.AcceleratorView(ViewRead);
|
||||
|
||||
const uint64_t nsimd = grid->Nsimd();
|
||||
const uint64_t sites = grid->oSites();
|
||||
|
Loading…
x
Reference in New Issue
Block a user