git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8922 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2012-10-08 15:29:55 +00:00
parent 856a237400
commit 14bcb1e6d0
184 changed files with 23096 additions and 0 deletions
--- a/lib/cuda/fft3d_cuda.cu
+++ b/lib/cuda/fft3d_cuda.cu
@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+
+   Original Version:
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+
+   -----------------------------------------------------------------------
+
+   USER-CUDA Package and associated modifications:
+   https://sourceforge.net/projects/lammpscuda/
+
+   Christian Trott, christian.trott@tu-ilmenau.de
+   Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
+   Theoretical Physics II, University of Technology Ilmenau, Germany
+
+   See the README file in the USER-CUDA directory.
+
+   This software is distributed under the GNU General Public License.
+------------------------------------------------------------------------- */
+
+//#define CUDA_PRECISION 1
+#include "cuda_precision.h"
+#include "cuda_common.h"
+struct  FFT_DATA {
+  FFT_FLOAT re;
+  FFT_FLOAT im;
+};
+
+#include "fft3d_cuda_cu.h"
+#include "fft3d_cuda_kernel.cu"
+#include <stdio.h>
+
+void initfftdata(double* in, FFT_FLOAT* out, int nfast, int nmid, int nslow)
+{
+
+  dim3 grid;
+  grid.x = nslow;
+  grid.y = nmid;
+  grid.z = 1;
+  dim3 threads;
+  threads.x = nfast;
+  threads.y = 1;
+  threads.z = 1;
+  cudaThreadSynchronize();
+  initfftdata_kernel <<< grid, threads, 0>>>(in, out);
+  cudaThreadSynchronize();
+  MYDBG(printf("ERROR-CUDA initfftdata_kernel: %s\n", cudaGetErrorString(cudaGetLastError())));
+}
+
+
+void permute(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow)
+{
+
+  dim3 grid;
+  grid.x = nslow;
+  grid.y = nmid;
+  grid.z = 1;
+  dim3 threads;
+  threads.x = nfast * 2;
+  threads.y = 1;
+  threads.z = 1;
+  permute_kernel <<< grid, threads, 0>>>((FFT_FLOAT*)in, (FFT_FLOAT*)out);
+  cudaThreadSynchronize();
+  MYDBG(printf("ERROR-CUDA permute_kernel: %s\n", cudaGetErrorString(cudaGetLastError())));
+}
+
+void permute_scale(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow)
+{
+
+  dim3 grid;
+  grid.x = nslow;
+  grid.y = nmid;
+  grid.z = 1;
+  dim3 threads;
+  threads.x = nfast * 2;
+  threads.y = 1;
+  threads.z = 1;
+  permute_kernel <<< grid, threads, 0>>>((FFT_FLOAT*)in, (FFT_FLOAT*)out);
+  cudaThreadSynchronize();
+}
+void permute_part(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow, int ihi, int ilo, int jhi, int jlo, int khi, int klo)
+{
+
+  dim3 grid;
+  grid.x = (ihi - ilo + 1);
+  grid.y = (jhi - jlo + 1);
+  grid.z = 1;
+  dim3 threads;
+  threads.x = (khi - klo + 1) * 2;
+  threads.y = 1;
+  threads.z = 1;
+  permute_part_kernel <<< grid, threads, 0>>>((FFT_FLOAT*)in, (FFT_FLOAT*)out, nfast, nmid, nslow, ihi, ilo, jhi, jlo, khi, klo);
+  cudaThreadSynchronize();
+}
+
+void FFTsyncthreads()
+{
+  cudaThreadSynchronize();
+}
+