mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-24 18:52:02 +01:00
Compare commits
5 Commits
ffa7fe0cc2
...
hotfix/nvc
Author | SHA1 | Date | |
---|---|---|---|
9e64387933 | |||
983b681d46 | |||
4072408b6f | |||
bd76b47fbf | |||
18ce23aa75 |
@ -519,7 +519,6 @@ void MemoryManager::Audit(std::string s)
|
|||||||
uint64_t LruBytes1=0;
|
uint64_t LruBytes1=0;
|
||||||
uint64_t LruBytes2=0;
|
uint64_t LruBytes2=0;
|
||||||
uint64_t LruCnt=0;
|
uint64_t LruCnt=0;
|
||||||
uint64_t LockedBytes=0;
|
|
||||||
|
|
||||||
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
|
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
|
||||||
for(auto it=LRU.begin();it!=LRU.end();it++){
|
for(auto it=LRU.begin();it!=LRU.end();it++){
|
||||||
|
@ -170,10 +170,7 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
|
|||||||
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
|
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
|
||||||
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
|
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
|
||||||
}
|
}
|
||||||
static inline int divides(int a,int b)
|
|
||||||
{
|
|
||||||
return ( b == ( (b/a)*a ) );
|
|
||||||
}
|
|
||||||
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
|
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
|
||||||
{
|
{
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
|
@ -320,7 +320,7 @@ struct Conj{
|
|||||||
|
|
||||||
struct TimesMinusI{
|
struct TimesMinusI{
|
||||||
//Complex single
|
//Complex single
|
||||||
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
|
inline float32x4_t operator()(float32x4_t in){
|
||||||
// ar ai br bi -> ai -ar ai -br
|
// ar ai br bi -> ai -ar ai -br
|
||||||
float32x4_t r0, r1;
|
float32x4_t r0, r1;
|
||||||
r0 = vnegq_f32(in); // -ar -ai -br -bi
|
r0 = vnegq_f32(in); // -ar -ai -br -bi
|
||||||
@ -328,7 +328,7 @@ struct TimesMinusI{
|
|||||||
return vtrn1q_f32(r1, r0); // ar -ai br -bi
|
return vtrn1q_f32(r1, r0); // ar -ai br -bi
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
|
inline float64x2_t operator()(float64x2_t in){
|
||||||
// a ib -> b -ia
|
// a ib -> b -ia
|
||||||
float64x2_t tmp;
|
float64x2_t tmp;
|
||||||
tmp = vnegq_f64(in);
|
tmp = vnegq_f64(in);
|
||||||
@ -338,7 +338,7 @@ struct TimesMinusI{
|
|||||||
|
|
||||||
struct TimesI{
|
struct TimesI{
|
||||||
//Complex single
|
//Complex single
|
||||||
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
|
inline float32x4_t operator()(float32x4_t in){
|
||||||
// ar ai br bi -> -ai ar -bi br
|
// ar ai br bi -> -ai ar -bi br
|
||||||
float32x4_t r0, r1;
|
float32x4_t r0, r1;
|
||||||
r0 = vnegq_f32(in); // -ar -ai -br -bi
|
r0 = vnegq_f32(in); // -ar -ai -br -bi
|
||||||
@ -346,7 +346,7 @@ struct TimesI{
|
|||||||
return vtrn1q_f32(r1, in); // -ai ar -bi br
|
return vtrn1q_f32(r1, in); // -ai ar -bi br
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
|
inline float64x2_t operator()(float64x2_t in){
|
||||||
// a ib -> -b ia
|
// a ib -> -b ia
|
||||||
float64x2_t tmp;
|
float64x2_t tmp;
|
||||||
tmp = vnegq_f64(in);
|
tmp = vnegq_f64(in);
|
||||||
|
@ -1369,10 +1369,11 @@ public:
|
|||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
int xmit_to_rank;
|
int xmit_to_rank;
|
||||||
int shm_send=0;
|
int shm_send=0;
|
||||||
int shm_recv=0;
|
|
||||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
#ifdef SHM_FAST_PATH
|
#ifdef SHM_FAST_PATH
|
||||||
#warning STENCIL SHM FAST PATH SELECTED
|
#warning STENCIL SHM FAST PATH SELECTED
|
||||||
|
int shm_recv=0;
|
||||||
// shm == receive pointer if offnode
|
// shm == receive pointer if offnode
|
||||||
// shm == Translate[send pointer] if on node -- my view of his send pointer
|
// shm == Translate[send pointer] if on node -- my view of his send pointer
|
||||||
cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
||||||
@ -1405,7 +1406,6 @@ public:
|
|||||||
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
|
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
|
||||||
}
|
}
|
||||||
int do_send = (comms_send|comms_partial_send) && (!shm_send );
|
int do_send = (comms_send|comms_partial_send) && (!shm_send );
|
||||||
int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
|
|
||||||
AddPacket((void *)sp,(void *)rp,
|
AddPacket((void *)sp,(void *)rp,
|
||||||
xmit_to_rank,do_send,
|
xmit_to_rank,do_send,
|
||||||
recv_from_rank,do_send,
|
recv_from_rank,do_send,
|
||||||
|
@ -133,7 +133,6 @@ typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec
|
|||||||
typedef scalar_type * pointer;
|
typedef scalar_type * pointer;
|
||||||
|
|
||||||
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
constexpr int Nsimd=vector_type::Nsimd();
|
|
||||||
|
|
||||||
scalar_object extracted;
|
scalar_object extracted;
|
||||||
pointer __restrict__ sp = (pointer)&extracted; // Type pun
|
pointer __restrict__ sp = (pointer)&extracted; // Type pun
|
||||||
@ -153,7 +152,6 @@ void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_ob
|
|||||||
typedef scalar_type * pointer;
|
typedef scalar_type * pointer;
|
||||||
|
|
||||||
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
constexpr int Nsimd=vector_type::Nsimd();
|
|
||||||
|
|
||||||
pointer __restrict__ sp = (pointer)&extracted;
|
pointer __restrict__ sp = (pointer)&extracted;
|
||||||
vector_type *vp = (vector_type *)&vec;
|
vector_type *vp = (vector_type *)&vec;
|
||||||
@ -178,8 +176,6 @@ void extract(const vobj &vec,const ExtractPointerArray<sobj> &extracted, int off
|
|||||||
const int s = Nsimd/Nextr;
|
const int s = Nsimd/Nextr;
|
||||||
|
|
||||||
vector_type * vp = (vector_type *)&vec;
|
vector_type * vp = (vector_type *)&vec;
|
||||||
scalar_type vtmp;
|
|
||||||
sobj_scalar_type stmp;
|
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
for(int i=0;i<Nextr;i++){
|
for(int i=0;i<Nextr;i++){
|
||||||
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
||||||
@ -205,7 +201,6 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
|
|||||||
|
|
||||||
vector_type * vp = (vector_type *)&vec;
|
vector_type * vp = (vector_type *)&vec;
|
||||||
scalar_type vtmp;
|
scalar_type vtmp;
|
||||||
sobj_scalar_type stmp;
|
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
for(int i=0;i<Nextr;i++){
|
for(int i=0;i<Nextr;i++){
|
||||||
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
||||||
@ -242,9 +237,6 @@ void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __rest
|
|||||||
typedef oextract_type * opointer;
|
typedef oextract_type * opointer;
|
||||||
typedef iextract_type * ipointer;
|
typedef iextract_type * ipointer;
|
||||||
|
|
||||||
constexpr int oNsimd=ovector_type::Nsimd();
|
|
||||||
constexpr int iNsimd=ivector_type::Nsimd();
|
|
||||||
|
|
||||||
iscalar_type itmp;
|
iscalar_type itmp;
|
||||||
oscalar_type otmp;
|
oscalar_type otmp;
|
||||||
|
|
||||||
|
@ -329,7 +329,6 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
|
|
||||||
auto grid4= GridPtr;
|
auto grid4= GridPtr;
|
||||||
auto rbgrid4= GridRBPtr;
|
|
||||||
auto rbgrid = StrangeOp.FermionRedBlackGrid();
|
auto rbgrid = StrangeOp.FermionRedBlackGrid();
|
||||||
auto grid = StrangeOp.FermionGrid();
|
auto grid = StrangeOp.FermionGrid();
|
||||||
if(1){
|
if(1){
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview)
|
# Grid
|
||||||
|
|
||||||
**Data parallel C++ mathematical object library.**
|
**Data parallel C++ mathematical object library.**
|
||||||
|
|
||||||
|
[),branch:default:true)/statusIcon.svg)](https://ci.dev.dirac.ed.ac.uk/project/GridBasedSoftware_Grid?mode=builds)
|
||||||
|
|
||||||
License: GPL v2.
|
License: GPL v2.
|
||||||
|
|
||||||
Last update June 2017.
|
Last update June 2017.
|
||||||
|
Reference in New Issue
Block a user