mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	threaded FFT
This commit is contained in:
		
							
								
								
									
										52
									
								
								lib/FFT.h
									
									
									
									
									
								
							
							
						
						
									
										52
									
								
								lib/FFT.h
									
									
									
									
									
								
							@@ -200,12 +200,18 @@ namespace Grid {
 | 
			
		||||
      std::vector<int> lcoor(Nd), gcoor(Nd);
 | 
			
		||||
      result = source;
 | 
			
		||||
      for(int p=0;p<processors[dim];p++) {
 | 
			
		||||
        for(int idx=0;idx<sgrid->lSites();idx++) {
 | 
			
		||||
          sgrid->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
        PARALLEL_REGION
 | 
			
		||||
        {
 | 
			
		||||
          std::vector<int> cbuf(Nd);
 | 
			
		||||
          sobj s;
 | 
			
		||||
          peekLocalSite(s,result,lcoor);
 | 
			
		||||
          lcoor[dim]+=p*L;
 | 
			
		||||
          pokeLocalSite(s,pgbuf,lcoor);
 | 
			
		||||
          
 | 
			
		||||
          PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
          for(int idx=0;idx<sgrid->lSites();idx++) {
 | 
			
		||||
            sgrid->LocalIndexToLocalCoor(idx,cbuf);
 | 
			
		||||
            peekLocalSite(s,result,cbuf);
 | 
			
		||||
            cbuf[dim]+=p*L;
 | 
			
		||||
            pokeLocalSite(s,pgbuf,cbuf);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        result = Cshift(result,dim,L);
 | 
			
		||||
      }
 | 
			
		||||
@@ -214,14 +220,18 @@ namespace Grid {
 | 
			
		||||
      int NN=pencil_g.lSites();
 | 
			
		||||
      GridStopWatch timer;
 | 
			
		||||
      timer.Start();
 | 
			
		||||
      //PARALLEL_FOR_LOOP
 | 
			
		||||
      for(int idx=0;idx<NN;idx++) {
 | 
			
		||||
        pencil_g.LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
      PARALLEL_REGION
 | 
			
		||||
      {
 | 
			
		||||
        std::vector<int> cbuf(Nd);
 | 
			
		||||
        
 | 
			
		||||
        if ( lcoor[dim] == 0 ) {  // restricts loop to plane at lcoor[dim]==0
 | 
			
		||||
          FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
 | 
			
		||||
          FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
 | 
			
		||||
          FFTW<scalar>::fftw_execute_dft(p,in,out);
 | 
			
		||||
        PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
        for(int idx=0;idx<NN;idx++) {
 | 
			
		||||
          pencil_g.LocalIndexToLocalCoor(idx, cbuf);
 | 
			
		||||
          if ( cbuf[dim] == 0 ) {  // restricts loop to plane at lcoor[dim]==0
 | 
			
		||||
            FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
 | 
			
		||||
            FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
 | 
			
		||||
            FFTW<scalar>::fftw_execute_dft(p,in,out);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      timer.Stop();
 | 
			
		||||
@@ -235,13 +245,19 @@ namespace Grid {
 | 
			
		||||
      
 | 
			
		||||
      // writing out result
 | 
			
		||||
      int pc = processor_coor[dim];
 | 
			
		||||
      for(int idx=0;idx<sgrid->lSites();idx++) {
 | 
			
		||||
        sgrid->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
        gcoor = lcoor;
 | 
			
		||||
      PARALLEL_REGION
 | 
			
		||||
      {
 | 
			
		||||
        std::vector<int> clbuf(Nd), cgbuf(Nd);
 | 
			
		||||
        sobj s;
 | 
			
		||||
        gcoor[dim] = lcoor[dim]+L*pc;
 | 
			
		||||
        peekLocalSite(s,pgbuf,gcoor);
 | 
			
		||||
        pokeLocalSite(s,result,lcoor);
 | 
			
		||||
        
 | 
			
		||||
        PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
        for(int idx=0;idx<sgrid->lSites();idx++) {
 | 
			
		||||
          sgrid->LocalIndexToLocalCoor(idx,clbuf);
 | 
			
		||||
          cgbuf = clbuf;
 | 
			
		||||
          cgbuf[dim] = clbuf[dim]+L*pc;
 | 
			
		||||
          peekLocalSite(s,pgbuf,cgbuf);
 | 
			
		||||
          pokeLocalSite(s,result,clbuf);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // destroying plan
 | 
			
		||||
 
 | 
			
		||||
@@ -38,14 +38,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#include <omp.h>
 | 
			
		||||
#ifdef GRID_NUMA
 | 
			
		||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
 | 
			
		||||
#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
 | 
			
		||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
 | 
			
		||||
#else
 | 
			
		||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
 | 
			
		||||
#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
 | 
			
		||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
 | 
			
		||||
#endif
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
 | 
			
		||||
#define PARALLEL_REGION       _Pragma("omp parallel")
 | 
			
		||||
#else
 | 
			
		||||
#define PARALLEL_FOR_LOOP 
 | 
			
		||||
#define PARALLEL_FOR_LOOP
 | 
			
		||||
#define PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2
 | 
			
		||||
#define PARALLEL_REGION
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user