mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
GPU clean up
This commit is contained in:
parent
d5deef782d
commit
c5f93abcd7
@ -47,12 +47,13 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
|||||||
int e2=rhs.Grid()->_slice_block[dimension];
|
int e2=rhs.Grid()->_slice_block[dimension];
|
||||||
|
|
||||||
int stride=rhs.Grid()->_slice_stride[dimension];
|
int stride=rhs.Grid()->_slice_stride[dimension];
|
||||||
|
auto rhs_v = rhs.View();
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
thread_loop_collapse2( (int n=0;n<e1;n++) ,
|
thread_loop_collapse2( (int n=0;n<e1;n++) ,
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int bo = n*e2;
|
int bo = n*e2;
|
||||||
buffer[off+bo+b]=rhs[so+o+b];
|
buffer[off+bo+b]=rhs_v[so+o+b];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
@ -68,7 +69,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
thread_loop( (int i=0;i<table.size();i++),{
|
thread_loop( (int i=0;i<table.size();i++),{
|
||||||
buffer[off+table[i].first]=rhs[so+table[i].second];
|
buffer[off+table[i].first]=rhs_v[so+table[i].second];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -91,6 +92,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,ExtractPointerArray<typename vobj:
|
|||||||
int e2=rhs.Grid()->_slice_block[dimension];
|
int e2=rhs.Grid()->_slice_block[dimension];
|
||||||
int n1=rhs.Grid()->_slice_stride[dimension];
|
int n1=rhs.Grid()->_slice_stride[dimension];
|
||||||
|
|
||||||
|
auto rhs_v = rhs.View();
|
||||||
if ( cbmask ==0x3){
|
if ( cbmask ==0x3){
|
||||||
thread_loop_collapse2( (int n=0;n<e1;n++), {
|
thread_loop_collapse2( (int n=0;n<e1;n++), {
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
@ -98,7 +100,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,ExtractPointerArray<typename vobj:
|
|||||||
int o = n*n1;
|
int o = n*n1;
|
||||||
int offset = b+n*e2;
|
int offset = b+n*e2;
|
||||||
|
|
||||||
vobj temp =rhs[so+o+b];
|
vobj temp =rhs_v[so+o+b];
|
||||||
extract<vobj>(temp,pointers,offset);
|
extract<vobj>(temp,pointers,offset);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -116,7 +118,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,ExtractPointerArray<typename vobj:
|
|||||||
int offset = b+n*e2;
|
int offset = b+n*e2;
|
||||||
|
|
||||||
if ( ocb & cbmask ) {
|
if ( ocb & cbmask ) {
|
||||||
vobj temp =rhs[so+o+b];
|
vobj temp =rhs_v[so+o+b];
|
||||||
extract<vobj>(temp,pointers,offset);
|
extract<vobj>(temp,pointers,offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -140,13 +142,13 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
|||||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||||
int e2=rhs.Grid()->_slice_block[dimension];
|
int e2=rhs.Grid()->_slice_block[dimension];
|
||||||
int stride=rhs.Grid()->_slice_stride[dimension];
|
int stride=rhs.Grid()->_slice_stride[dimension];
|
||||||
|
auto rhs_v = rhs.View();
|
||||||
if ( cbmask ==0x3 ) {
|
if ( cbmask ==0x3 ) {
|
||||||
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs.Grid()->_slice_stride[dimension];
|
int o =n*rhs.Grid()->_slice_stride[dimension];
|
||||||
int bo =n*rhs.Grid()->_slice_block[dimension];
|
int bo =n*rhs.Grid()->_slice_block[dimension];
|
||||||
rhs[so+o+b]=buffer[bo+b];
|
rhs_v[so+o+b]=buffer[bo+b];
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
@ -162,7 +164,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
thread_loop( (int i=0;i<table.size();i++),{
|
thread_loop( (int i=0;i<table.size();i++),{
|
||||||
rhs[table[i].first]=buffer[table[i].second];
|
rhs_v[table[i].first]=buffer[table[i].second];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -183,19 +185,21 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
|
|||||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||||
int e2=rhs.Grid()->_slice_block[dimension];
|
int e2=rhs.Grid()->_slice_block[dimension];
|
||||||
|
|
||||||
|
auto rhs_v = rhs.View();
|
||||||
if(cbmask ==0x3 ) {
|
if(cbmask ==0x3 ) {
|
||||||
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*rhs.Grid()->_slice_stride[dimension];
|
int o = n*rhs.Grid()->_slice_stride[dimension];
|
||||||
int offset = b+n*rhs.Grid()->_slice_block[dimension];
|
int offset = b+n*rhs.Grid()->_slice_block[dimension];
|
||||||
merge(rhs[so+o+b],pointers,offset);
|
merge(rhs_v[so+o+b],pointers,offset);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||||
// Test_cshift_red_black code.
|
// Test_cshift_red_black code.
|
||||||
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
|
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;
|
||||||
|
// think this is buggy FIXME
|
||||||
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
|
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
@ -203,7 +207,7 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
|
|||||||
int offset = b+n*rhs.Grid()->_slice_block[dimension];
|
int offset = b+n*rhs.Grid()->_slice_block[dimension];
|
||||||
int ocb=1<<rhs.Grid()->CheckerBoardFromOindex(o+b);
|
int ocb=1<<rhs.Grid()->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) {
|
||||||
merge(rhs[so+o+b],pointers,offset);
|
merge(rhs_v[so+o+b],pointers,offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -142,7 +142,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
|
|
||||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
||||||
|
|
||||||
int rank = grid->_processor;
|
// int rank = grid->_processor;
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
int xmit_to_rank;
|
int xmit_to_rank;
|
||||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
@ -186,7 +186,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
// Simd direction uses an extract/merge pair
|
// Simd direction uses an extract/merge pair
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
int words = sizeof(vobj)/sizeof(vector_type);
|
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<commVector<scalar_object> > send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
std::vector<commVector<scalar_object> > recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
|
Loading…
x
Reference in New Issue
Block a user