mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
debug new AcceleratorView
This commit is contained in:
parent
2a1387e992
commit
04863f8f38
@ -9,6 +9,7 @@ Copyright (C) 2015
|
|||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -94,7 +95,7 @@ const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
|
|||||||
template <class lobj> accelerator_inline
|
template <class lobj> accelerator_inline
|
||||||
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
||||||
{
|
{
|
||||||
auto view = arg.View();
|
auto view = arg.AcceleratorView(ViewRead);
|
||||||
return view[ss];
|
return view[ss];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -36,9 +37,9 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
auto rhs_v = rhs.View();
|
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
@ -55,9 +56,9 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
|||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
auto rhs_v = rhs.View();
|
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto lhs_t=lhs_v(ss);
|
auto lhs_t=lhs_v(ss);
|
||||||
@ -72,9 +73,9 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
|||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
auto rhs_v = rhs.View();
|
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto lhs_t=lhs_v(ss);
|
auto lhs_t=lhs_v(ss);
|
||||||
@ -88,9 +89,9 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
|||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
auto rhs_v = rhs.View();
|
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto lhs_t=lhs_v(ss);
|
auto lhs_t=lhs_v(ss);
|
||||||
@ -107,8 +108,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(lhs,ret);
|
conformable(lhs,ret);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
mult(&tmp,&lhs_v(ss),&rhs);
|
mult(&tmp,&lhs_v(ss),&rhs);
|
||||||
@ -120,8 +121,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,lhs);
|
conformable(ret,lhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto lhs_t=lhs_v(ss);
|
auto lhs_t=lhs_v(ss);
|
||||||
@ -134,8 +135,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,lhs);
|
conformable(ret,lhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto lhs_t=lhs_v(ss);
|
auto lhs_t=lhs_v(ss);
|
||||||
@ -147,8 +148,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(lhs,ret);
|
conformable(lhs,ret);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto lhs_t=lhs_v(ss);
|
auto lhs_t=lhs_v(ss);
|
||||||
@ -164,8 +165,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto rhs_v = lhs.View();
|
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto rhs_t=rhs_v(ss);
|
auto rhs_t=rhs_v(ss);
|
||||||
@ -178,8 +179,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto rhs_v = lhs.View();
|
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto rhs_t=rhs_v(ss);
|
auto rhs_t=rhs_v(ss);
|
||||||
@ -192,8 +193,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto rhs_v = lhs.View();
|
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto rhs_t=rhs_v(ss);
|
auto rhs_t=rhs_v(ss);
|
||||||
@ -205,8 +206,8 @@ template<class obj1,class obj2,class obj3> inline
|
|||||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto rhs_v = lhs.View();
|
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||||
decltype(coalescedRead(obj1())) tmp;
|
decltype(coalescedRead(obj1())) tmp;
|
||||||
auto rhs_t=rhs_v(ss);
|
auto rhs_t=rhs_v(ss);
|
||||||
@ -220,9 +221,9 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
|
|||||||
ret.Checkerboard() = x.Checkerboard();
|
ret.Checkerboard() = x.Checkerboard();
|
||||||
conformable(ret,x);
|
conformable(ret,x);
|
||||||
conformable(x,y);
|
conformable(x,y);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto x_v = x.View();
|
auto x_v = x.AcceleratorView(ViewRead);
|
||||||
auto y_v = y.View();
|
auto y_v = y.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||||
auto tmp = a*x_v(ss)+y_v(ss);
|
auto tmp = a*x_v(ss)+y_v(ss);
|
||||||
coalescedWrite(ret_v[ss],tmp);
|
coalescedWrite(ret_v[ss],tmp);
|
||||||
@ -233,9 +234,9 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
|
|||||||
ret.Checkerboard() = x.Checkerboard();
|
ret.Checkerboard() = x.Checkerboard();
|
||||||
conformable(ret,x);
|
conformable(ret,x);
|
||||||
conformable(x,y);
|
conformable(x,y);
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||||
auto x_v = x.View();
|
auto x_v = x.AcceleratorView(ViewRead);
|
||||||
auto y_v = y.View();
|
auto y_v = y.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||||
auto tmp = a*x_v(ss)+b*y_v(ss);
|
auto tmp = a*x_v(ss)+b*y_v(ss);
|
||||||
coalescedWrite(ret_v[ss],tmp);
|
coalescedWrite(ret_v[ss],tmp);
|
||||||
|
@ -54,8 +54,20 @@ void accelerator_inline conformable(GridBase *lhs,GridBase *rhs)
|
|||||||
// Advise the LatticeAccelerator class
|
// Advise the LatticeAccelerator class
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
enum LatticeAcceleratorAdvise {
|
enum LatticeAcceleratorAdvise {
|
||||||
AdviseInfrequentUse = 0x1 // Advise that the data is used infrequently. This can
|
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
||||||
// significantly influence performance of bulk storage.
|
// significantly influence performance of bulk storage.
|
||||||
|
AdviseReadMostly = 0x2, // Data will mostly be read. On some architectures
|
||||||
|
// enables read-only copies of memory to be kept on
|
||||||
|
// host and device.
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
// View Access Mode
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
enum ViewMode {
|
||||||
|
ViewRead = 0x1,
|
||||||
|
ViewWrite = 0x2,
|
||||||
|
ViewReadWrite = 0x3
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
@ -91,6 +103,29 @@ public:
|
|||||||
if (advise & AdviseInfrequentUse) {
|
if (advise & AdviseInfrequentUse) {
|
||||||
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId);
|
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId);
|
||||||
}
|
}
|
||||||
|
if (advise & AdviseReadMostly) {
|
||||||
|
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
#ifndef __CUDA_ARCH__ // only on host
|
||||||
|
int target;
|
||||||
|
cudaGetDevice(&target);
|
||||||
|
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),target);
|
||||||
|
std::cout<< GridLogMessage << "To Device " << target << std::endl;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
#ifndef __CUDA_ARCH__ // only on host
|
||||||
|
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
|
||||||
|
std::cout<< GridLogMessage << "To Host" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
@ -225,9 +260,23 @@ public:
|
|||||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||||
// in device lambdas
|
// in device lambdas
|
||||||
/////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////
|
||||||
LatticeView<vobj> View (void) const
|
LatticeView<vobj> View (void) const // deprecated, should pick AcceleratorView for accelerator_for
|
||||||
|
{ // and HostView for thread_for
|
||||||
|
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||||
|
return accessor;
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticeView<vobj> AcceleratorView(int mode = ViewReadWrite) const
|
||||||
{
|
{
|
||||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||||
|
accessor.AcceleratorPrefetch(mode);
|
||||||
|
return accessor;
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticeView<vobj> HostView(int mode = ViewReadWrite) const
|
||||||
|
{
|
||||||
|
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||||
|
accessor.HostPrefetch(mode);
|
||||||
return accessor;
|
return accessor;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -251,7 +300,7 @@ public:
|
|||||||
assert( (cb==Odd) || (cb==Even));
|
assert( (cb==Odd) || (cb==Even));
|
||||||
this->checkerboard=cb;
|
this->checkerboard=cb;
|
||||||
|
|
||||||
auto me = View();
|
auto me = AcceleratorView(ViewWrite);
|
||||||
accelerator_for(ss,me.size(),1,{
|
accelerator_for(ss,me.size(),1,{
|
||||||
auto tmp = eval(ss,expr);
|
auto tmp = eval(ss,expr);
|
||||||
vstream(me[ss],tmp);
|
vstream(me[ss],tmp);
|
||||||
@ -270,7 +319,7 @@ public:
|
|||||||
assert( (cb==Odd) || (cb==Even));
|
assert( (cb==Odd) || (cb==Even));
|
||||||
this->checkerboard=cb;
|
this->checkerboard=cb;
|
||||||
|
|
||||||
auto me = View();
|
auto me = AcceleratorView(ViewWrite);
|
||||||
accelerator_for(ss,me.size(),1,{
|
accelerator_for(ss,me.size(),1,{
|
||||||
auto tmp = eval(ss,expr);
|
auto tmp = eval(ss,expr);
|
||||||
vstream(me[ss],tmp);
|
vstream(me[ss],tmp);
|
||||||
@ -288,7 +337,7 @@ public:
|
|||||||
CBFromExpression(cb,expr);
|
CBFromExpression(cb,expr);
|
||||||
assert( (cb==Odd) || (cb==Even));
|
assert( (cb==Odd) || (cb==Even));
|
||||||
this->checkerboard=cb;
|
this->checkerboard=cb;
|
||||||
auto me = View();
|
auto me = AcceleratorView(ViewWrite);
|
||||||
accelerator_for(ss,me.size(),1,{
|
accelerator_for(ss,me.size(),1,{
|
||||||
auto tmp = eval(ss,expr);
|
auto tmp = eval(ss,expr);
|
||||||
vstream(me[ss],tmp);
|
vstream(me[ss],tmp);
|
||||||
@ -399,8 +448,9 @@ public:
|
|||||||
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
this->checkerboard = r.Checkerboard();
|
this->checkerboard = r.Checkerboard();
|
||||||
auto me = View();
|
std::cout << GridLogMessage << "Copy other" << std::endl;
|
||||||
auto him= r.View();
|
auto me = AcceleratorView(ViewWrite);
|
||||||
|
auto him= r.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||||
coalescedWrite(me[ss],him(ss));
|
coalescedWrite(me[ss],him(ss));
|
||||||
});
|
});
|
||||||
@ -413,8 +463,9 @@ public:
|
|||||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||||
this->checkerboard = r.Checkerboard();
|
this->checkerboard = r.Checkerboard();
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
auto me = View();
|
std::cout << GridLogMessage << "Copy same" << std::endl;
|
||||||
auto him= r.View();
|
auto me = AcceleratorView(ViewWrite);
|
||||||
|
auto him= r.AcceleratorView(ViewRead);
|
||||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||||
coalescedWrite(me[ss],him(ss));
|
coalescedWrite(me[ss],him(ss));
|
||||||
});
|
});
|
||||||
|
@ -103,8 +103,8 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
|||||||
GridBase *grid = left.Grid();
|
GridBase *grid = left.Grid();
|
||||||
|
|
||||||
// Might make all code paths go this way.
|
// Might make all code paths go this way.
|
||||||
auto left_v = left.View();
|
auto left_v = left.AcceleratorView(ViewRead);
|
||||||
auto right_v=right.View();
|
auto right_v=right.AcceleratorView(ViewRead);
|
||||||
|
|
||||||
const uint64_t nsimd = grid->Nsimd();
|
const uint64_t nsimd = grid->Nsimd();
|
||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
@ -175,9 +175,9 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
|
|
||||||
GridBase *grid = x.Grid();
|
GridBase *grid = x.Grid();
|
||||||
|
|
||||||
auto x_v=x.View();
|
auto x_v=x.AcceleratorView(ViewRead);
|
||||||
auto y_v=y.View();
|
auto y_v=y.AcceleratorView(ViewRead);
|
||||||
auto z_v=z.View();
|
auto z_v=z.AcceleratorView(ViewWrite);
|
||||||
|
|
||||||
const uint64_t nsimd = grid->Nsimd();
|
const uint64_t nsimd = grid->Nsimd();
|
||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
@ -224,8 +224,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Latti
|
|||||||
|
|
||||||
GridBase *grid = left.Grid();
|
GridBase *grid = left.Grid();
|
||||||
|
|
||||||
auto left_v=left.View();
|
auto left_v=left.AcceleratorView(ViewRead);
|
||||||
auto right_v=right.View();
|
auto right_v=right.AcceleratorView(ViewRead);
|
||||||
|
|
||||||
const uint64_t nsimd = grid->Nsimd();
|
const uint64_t nsimd = grid->Nsimd();
|
||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user