git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8922 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
103
lib/cuda/fft3d_cuda.cu
Normal file
103
lib/cuda/fft3d_cuda.cu
Normal file
@ -0,0 +1,103 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
//#define CUDA_PRECISION 1
|
||||
#include "cuda_precision.h"
|
||||
#include "cuda_common.h"
|
||||
struct FFT_DATA {
|
||||
FFT_FLOAT re;
|
||||
FFT_FLOAT im;
|
||||
};
|
||||
|
||||
#include "fft3d_cuda_cu.h"
|
||||
#include "fft3d_cuda_kernel.cu"
|
||||
#include <stdio.h>
|
||||
|
||||
void initfftdata(double* in, FFT_FLOAT* out, int nfast, int nmid, int nslow)
|
||||
{
|
||||
|
||||
dim3 grid;
|
||||
grid.x = nslow;
|
||||
grid.y = nmid;
|
||||
grid.z = 1;
|
||||
dim3 threads;
|
||||
threads.x = nfast;
|
||||
threads.y = 1;
|
||||
threads.z = 1;
|
||||
cudaThreadSynchronize();
|
||||
initfftdata_kernel <<< grid, threads, 0>>>(in, out);
|
||||
cudaThreadSynchronize();
|
||||
MYDBG(printf("ERROR-CUDA initfftdata_kernel: %s\n", cudaGetErrorString(cudaGetLastError())));
|
||||
}
|
||||
|
||||
|
||||
void permute(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow)
|
||||
{
|
||||
|
||||
dim3 grid;
|
||||
grid.x = nslow;
|
||||
grid.y = nmid;
|
||||
grid.z = 1;
|
||||
dim3 threads;
|
||||
threads.x = nfast * 2;
|
||||
threads.y = 1;
|
||||
threads.z = 1;
|
||||
permute_kernel <<< grid, threads, 0>>>((FFT_FLOAT*)in, (FFT_FLOAT*)out);
|
||||
cudaThreadSynchronize();
|
||||
MYDBG(printf("ERROR-CUDA permute_kernel: %s\n", cudaGetErrorString(cudaGetLastError())));
|
||||
}
|
||||
|
||||
void permute_scale(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow)
|
||||
{
|
||||
|
||||
dim3 grid;
|
||||
grid.x = nslow;
|
||||
grid.y = nmid;
|
||||
grid.z = 1;
|
||||
dim3 threads;
|
||||
threads.x = nfast * 2;
|
||||
threads.y = 1;
|
||||
threads.z = 1;
|
||||
permute_kernel <<< grid, threads, 0>>>((FFT_FLOAT*)in, (FFT_FLOAT*)out);
|
||||
cudaThreadSynchronize();
|
||||
}
|
||||
void permute_part(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow, int ihi, int ilo, int jhi, int jlo, int khi, int klo)
|
||||
{
|
||||
|
||||
dim3 grid;
|
||||
grid.x = (ihi - ilo + 1);
|
||||
grid.y = (jhi - jlo + 1);
|
||||
grid.z = 1;
|
||||
dim3 threads;
|
||||
threads.x = (khi - klo + 1) * 2;
|
||||
threads.y = 1;
|
||||
threads.z = 1;
|
||||
permute_part_kernel <<< grid, threads, 0>>>((FFT_FLOAT*)in, (FFT_FLOAT*)out, nfast, nmid, nslow, ihi, ilo, jhi, jlo, khi, klo);
|
||||
cudaThreadSynchronize();
|
||||
}
|
||||
|
||||
void FFTsyncthreads()
|
||||
{
|
||||
cudaThreadSynchronize();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user