Created another static instance for dpd/tstat in the GPU library
This commit is contained in:
@ -69,7 +69,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||||||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o $(OBJ_DIR)/lal_dpd_tstat_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||||
@ -731,6 +731,15 @@ $(OBJ_DIR)/dpd.cubin: lal_dpd.cu lal_precision.h lal_preprocessor.h
|
|||||||
$(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
|
$(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
|
||||||
$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
|
$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
|
||||||
|
$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||||
|
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd_tstat_ext.o: $(ALL_H) lal_dpd.h lal_dpd_tstat_ext.cpp lal_base_dpd.h
|
||||||
|
$(CUDR) -o $@ -c lal_dpd_tstat_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/ufm.cubin: lal_ufm.cu lal_precision.h lal_preprocessor.h
|
$(OBJ_DIR)/ufm.cubin: lal_ufm.cu lal_precision.h lal_preprocessor.h
|
||||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_ufm.cu
|
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_ufm.cu
|
||||||
|
|
||||||
@ -743,12 +752,6 @@ $(OBJ_DIR)/lal_ufm.o: $(ALL_H) lal_ufm.h lal_ufm.cpp $(OBJ_DIR)/ufm_cubin.h $(OB
|
|||||||
$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
|
$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
|
||||||
$(CUDR) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)
|
$(CUDR) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
|
|
||||||
$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
|
|
||||||
|
|
||||||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
|
||||||
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
|
||||||
|
|
||||||
$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
|
$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
|
||||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff.cu
|
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff.cu
|
||||||
|
|
||||||
|
|||||||
@ -69,7 +69,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||||||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o $(OBJ_DIR)/lal_dpd_tstat_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||||
@ -82,7 +82,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
|
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
|
||||||
$(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
|
$(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
|
||||||
$(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
|
$(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
|
||||||
$(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o
|
$(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o \
|
||||||
|
$(OBJ_DIR)/lal_lj_tip4p_long.o $(OBJ_DIR)/lal_lj_tip4p_long_ext.o
|
||||||
|
|
||||||
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||||
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
||||||
@ -143,7 +144,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
|||||||
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h \
|
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h \
|
||||||
$(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs_cubin.h \
|
$(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs_cubin.h \
|
||||||
$(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs_cubin.h \
|
$(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs_cubin.h \
|
||||||
$(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
|
$(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs_cubin.h \
|
||||||
|
$(OBJ_DIR)/lj_tip4p_long.cubin $(OBJ_DIR)/lj_tip4p_long_cubin.h
|
||||||
|
|
||||||
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
||||||
|
|
||||||
@ -297,6 +299,18 @@ $(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp $(OBJ_DIR)/lj_cubin.h $(OBJ_DI
|
|||||||
$(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h
|
$(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h
|
||||||
$(CUDR) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR)
|
$(CUDR) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lj_tip4p_long.cubin: lal_lj_tip4p_long.cu lal_precision.h lal_preprocessor.h
|
||||||
|
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_tip4p_long.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lj_tip4p_long_cubin.h: $(OBJ_DIR)/lj_tip4p_long.cubin $(OBJ_DIR)/lj_tip4p_long.cubin
|
||||||
|
$(BIN2C) -c -n lj_tip4p_long $(OBJ_DIR)/lj_tip4p_long.cubin > $(OBJ_DIR)/lj_tip4p_long_cubin.h
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_lj_tip4p_long.o: $(ALL_H) lal_lj_tip4p_long.h lal_lj_tip4p_long.cpp $(OBJ_DIR)/lj_tip4p_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||||
|
$(CUDR) -o $@ -c lal_lj_tip4p_long.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_lj_tip4p_long_ext.o: $(ALL_H) lal_lj_tip4p_long.h lal_lj_tip4p_long_ext.cpp lal_base_atomic.h
|
||||||
|
$(CUDR) -o $@ -c lal_lj_tip4p_long_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/lj_coul.cubin: lal_lj_coul.cu lal_precision.h lal_preprocessor.h
|
$(OBJ_DIR)/lj_coul.cubin: lal_lj_coul.cu lal_precision.h lal_preprocessor.h
|
||||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_coul.cu
|
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_coul.cu
|
||||||
|
|
||||||
@ -717,6 +731,15 @@ $(OBJ_DIR)/dpd.cubin: lal_dpd.cu lal_precision.h lal_preprocessor.h
|
|||||||
$(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
|
$(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
|
||||||
$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
|
$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
|
||||||
|
$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||||
|
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd_tstat_ext.o: $(ALL_H) lal_dpd.h lal_dpd_tstat_ext.cpp lal_base_dpd.h
|
||||||
|
$(CUDR) -o $@ -c lal_dpd_tstat_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/ufm.cubin: lal_ufm.cu lal_precision.h lal_preprocessor.h
|
$(OBJ_DIR)/ufm.cubin: lal_ufm.cu lal_precision.h lal_preprocessor.h
|
||||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_ufm.cu
|
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_ufm.cu
|
||||||
|
|
||||||
@ -729,12 +752,6 @@ $(OBJ_DIR)/lal_ufm.o: $(ALL_H) lal_ufm.h lal_ufm.cpp $(OBJ_DIR)/ufm_cubin.h $(OB
|
|||||||
$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
|
$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
|
||||||
$(CUDR) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)
|
$(CUDR) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
|
|
||||||
$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
|
|
||||||
|
|
||||||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
|
||||||
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
|
||||||
|
|
||||||
$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
|
$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
|
||||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_tersoff.cu
|
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_tersoff.cu
|
||||||
|
|
||||||
|
|||||||
@ -58,7 +58,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
|||||||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o $(OBJ_DIR)/lal_dpd_tstat_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||||
@ -534,6 +534,9 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_
|
|||||||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||||
$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd_tstat_ext.o: $(ALL_H) lal_dpd.h lal_dpd_tstat_ext.cpp lal_base_dpd.h
|
||||||
|
$(OCL) -o $@ -c lal_dpd_tstat_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu lal_tersoff_extra.h $(PRE1_H)
|
$(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu lal_tersoff_extra.h $(PRE1_H)
|
||||||
$(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h;
|
$(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h;
|
||||||
|
|
||||||
|
|||||||
@ -29,7 +29,7 @@ static DPD<PRECISION,ACC_PRECISION> DPDMF;
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||||
double **host_gamma, double **host_sigma, double **host_cut,
|
double **host_gamma, double **host_sigma, double **host_cut,
|
||||||
double *special_lj, bool tstat_only, const int inum,
|
double *special_lj, const int inum,
|
||||||
const int nall, const int max_nbors, const int maxspecial,
|
const int nall, const int max_nbors, const int maxspecial,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen) {
|
const double cell_size, int &gpu_mode, FILE *screen) {
|
||||||
DPDMF.clear();
|
DPDMF.clear();
|
||||||
@ -55,7 +55,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
|||||||
int init_ok=0;
|
int init_ok=0;
|
||||||
if (world_me==0)
|
if (world_me==0)
|
||||||
init_ok=DPDMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
init_ok=DPDMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
||||||
host_cut, special_lj, tstat_only, inum, nall, 300,
|
host_cut, special_lj, false, inum, nall, 300,
|
||||||
maxspecial, cell_size, gpu_split, screen);
|
maxspecial, cell_size, gpu_split, screen);
|
||||||
|
|
||||||
DPDMF.device->world_barrier();
|
DPDMF.device->world_barrier();
|
||||||
@ -73,7 +73,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
|||||||
}
|
}
|
||||||
if (gpu_rank==i && world_me!=0)
|
if (gpu_rank==i && world_me!=0)
|
||||||
init_ok=DPDMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
init_ok=DPDMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
||||||
host_cut, special_lj, tstat_only, inum, nall, 300,
|
host_cut, special_lj, false, inum, nall, 300,
|
||||||
maxspecial, cell_size, gpu_split, screen);
|
maxspecial, cell_size, gpu_split, screen);
|
||||||
|
|
||||||
DPDMF.device->gpu_barrier();
|
DPDMF.device->gpu_barrier();
|
||||||
|
|||||||
133
lib/gpu/lal_dpd_tstat_ext.cpp
Normal file
133
lib/gpu/lal_dpd_tstat_ext.cpp
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/***************************************************************************
|
||||||
|
dpd_ext.cpp
|
||||||
|
-------------------
|
||||||
|
Trung Dac Nguyen (ORNL)
|
||||||
|
|
||||||
|
Functions for LAMMPS access to dpd acceleration routines.
|
||||||
|
|
||||||
|
__________________________________________________________________________
|
||||||
|
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||||
|
__________________________________________________________________________
|
||||||
|
|
||||||
|
begin : Jan 15, 2014
|
||||||
|
email : nguyentd@ornl.gov
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "lal_dpd.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace LAMMPS_AL;
|
||||||
|
|
||||||
|
static DPD<PRECISION,ACC_PRECISION> DPDTMF;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Allocate memory on host and device and copy constants to device
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||||
|
double **host_gamma, double **host_sigma, double **host_cut,
|
||||||
|
double *special_lj, const int inum,
|
||||||
|
const int nall, const int max_nbors, const int maxspecial,
|
||||||
|
const double cell_size, int &gpu_mode, FILE *screen) {
|
||||||
|
DPDTMF.clear();
|
||||||
|
gpu_mode=DPDTMF.device->gpu_mode();
|
||||||
|
double gpu_split=DPDTMF.device->particle_split();
|
||||||
|
int first_gpu=DPDTMF.device->first_device();
|
||||||
|
int last_gpu=DPDTMF.device->last_device();
|
||||||
|
int world_me=DPDTMF.device->world_me();
|
||||||
|
int gpu_rank=DPDTMF.device->gpu_rank();
|
||||||
|
int procs_per_gpu=DPDTMF.device->procs_per_gpu();
|
||||||
|
|
||||||
|
DPDTMF.device->init_message(screen,"dpd",first_gpu,last_gpu);
|
||||||
|
|
||||||
|
bool message=false;
|
||||||
|
if (DPDTMF.device->replica_me()==0 && screen)
|
||||||
|
message=true;
|
||||||
|
|
||||||
|
if (message) {
|
||||||
|
fprintf(screen,"Initializing Device and compiling on process 0...");
|
||||||
|
fflush(screen);
|
||||||
|
}
|
||||||
|
|
||||||
|
int init_ok=0;
|
||||||
|
if (world_me==0)
|
||||||
|
init_ok=DPDTMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
||||||
|
host_cut, special_lj, true, inum, nall, 300,
|
||||||
|
maxspecial, cell_size, gpu_split, screen);
|
||||||
|
|
||||||
|
DPDTMF.device->world_barrier();
|
||||||
|
if (message)
|
||||||
|
fprintf(screen,"Done.\n");
|
||||||
|
|
||||||
|
for (int i=0; i<procs_per_gpu; i++) {
|
||||||
|
if (message) {
|
||||||
|
if (last_gpu-first_gpu==0)
|
||||||
|
fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
|
||||||
|
else
|
||||||
|
fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
|
||||||
|
last_gpu,i);
|
||||||
|
fflush(screen);
|
||||||
|
}
|
||||||
|
if (gpu_rank==i && world_me!=0)
|
||||||
|
init_ok=DPDTMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
||||||
|
host_cut, special_lj, true, inum, nall, 300,
|
||||||
|
maxspecial, cell_size, gpu_split, screen);
|
||||||
|
|
||||||
|
DPDTMF.device->gpu_barrier();
|
||||||
|
if (message)
|
||||||
|
fprintf(screen,"Done.\n");
|
||||||
|
}
|
||||||
|
if (message)
|
||||||
|
fprintf(screen,"\n");
|
||||||
|
|
||||||
|
if (init_ok==0)
|
||||||
|
DPDTMF.estimate_gpu_overhead();
|
||||||
|
return init_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dpd_tstat_gpu_clear() {
|
||||||
|
DPDTMF.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
int ** dpd_tstat_gpu_compute_n(const int ago, const int inum_full, const int nall,
|
||||||
|
double **host_x, int *host_type, double *sublo,
|
||||||
|
double *subhi, tagint *tag, int **nspecial,
|
||||||
|
tagint **special, const bool eflag, const bool vflag,
|
||||||
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
|
int **ilist, int **jnum, const double cpu_time, bool &success,
|
||||||
|
double **host_v, const double dtinvsqrt,
|
||||||
|
const int seed, const int timestep,
|
||||||
|
double *boxlo, double *prd) {
|
||||||
|
return DPDTMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||||
|
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
||||||
|
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||||
|
host_v, dtinvsqrt, seed, timestep, boxlo, prd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void dpd_tstat_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||||
|
double **host_x, int *host_type, int *ilist, int *numj,
|
||||||
|
int **firstneigh, const bool eflag, const bool vflag,
|
||||||
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
|
const double cpu_time, bool &success, tagint *tag,
|
||||||
|
double **host_v, const double dtinvsqrt,
|
||||||
|
const int seed, const int timestep,
|
||||||
|
const int nlocal, double *boxlo, double *prd) {
|
||||||
|
DPDTMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj,
|
||||||
|
firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success,
|
||||||
|
tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void dpd_tstat_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
|
||||||
|
double **host_sigma, double **host_cut)
|
||||||
|
{
|
||||||
|
DPDTMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut);
|
||||||
|
}
|
||||||
|
|
||||||
|
double dpd_tstat_gpu_bytes() {
|
||||||
|
return DPDTMF.host_memory_usage();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -43,7 +43,7 @@ using namespace LAMMPS_NS;
|
|||||||
|
|
||||||
int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||||
double **host_gamma, double **host_sigma, double **host_cut,
|
double **host_gamma, double **host_sigma, double **host_cut,
|
||||||
double *special_lj, bool tstat_only, const int inum,
|
double *special_lj, const int inum,
|
||||||
const int nall, const int max_nbors, const int maxspecial,
|
const int nall, const int max_nbors, const int maxspecial,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen);
|
const double cell_size, int &gpu_mode, FILE *screen);
|
||||||
void dpd_gpu_clear();
|
void dpd_gpu_clear();
|
||||||
@ -309,7 +309,7 @@ void PairDPDGPU::init_style()
|
|||||||
if (atom->molecular)
|
if (atom->molecular)
|
||||||
maxspecial=atom->maxspecial;
|
maxspecial=atom->maxspecial;
|
||||||
int success = dpd_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
|
int success = dpd_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
|
||||||
cut, force->special_lj, false, atom->nlocal,
|
cut, force->special_lj, atom->nlocal,
|
||||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||||
cell_size, gpu_mode, screen);
|
cell_size, gpu_mode, screen);
|
||||||
GPU_EXTRA::check_flag(success,error,world);
|
GPU_EXTRA::check_flag(success,error,world);
|
||||||
|
|||||||
@ -41,13 +41,13 @@ using namespace LAMMPS_NS;
|
|||||||
|
|
||||||
// External functions from cuda library for atom decomposition
|
// External functions from cuda library for atom decomposition
|
||||||
|
|
||||||
int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||||
double **host_gamma, double **host_sigma, double **host_cut,
|
double **host_gamma, double **host_sigma, double **host_cut,
|
||||||
double *special_lj, bool tstat_only, const int inum,
|
double *special_lj, const int inum,
|
||||||
const int nall, const int max_nbors, const int maxspecial,
|
const int nall, const int max_nbors, const int maxspecial,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen);
|
const double cell_size, int &gpu_mode, FILE *screen);
|
||||||
void dpd_gpu_clear();
|
void dpd_tstat_gpu_clear();
|
||||||
int ** dpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
|
int ** dpd_tstat_gpu_compute_n(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, double *sublo,
|
double **host_x, int *host_type, double *sublo,
|
||||||
double *subhi, tagint *tag, int **nspecial,
|
double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, const bool eflag, const bool vflag,
|
tagint **special, const bool eflag, const bool vflag,
|
||||||
@ -56,7 +56,7 @@ int ** dpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
|
|||||||
double **host_v, const double dtinvsqrt,
|
double **host_v, const double dtinvsqrt,
|
||||||
const int seed, const int timestep,
|
const int seed, const int timestep,
|
||||||
double *boxlo, double *prd);
|
double *boxlo, double *prd);
|
||||||
void dpd_gpu_compute(const int ago, const int inum_full, const int nall,
|
void dpd_tstat_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *ilist, int *numj,
|
double **host_x, int *host_type, int *ilist, int *numj,
|
||||||
int **firstneigh, const bool eflag, const bool vflag,
|
int **firstneigh, const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
@ -64,9 +64,9 @@ void dpd_gpu_compute(const int ago, const int inum_full, const int nall,
|
|||||||
double **host_v, const double dtinvsqrt,
|
double **host_v, const double dtinvsqrt,
|
||||||
const int seed, const int timestep,
|
const int seed, const int timestep,
|
||||||
const int nlocal, double *boxlo, double *prd);
|
const int nlocal, double *boxlo, double *prd);
|
||||||
void dpd_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
|
void dpd_tstat_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
|
||||||
double **host_sigma, double **host_cut);
|
double **host_sigma, double **host_cut);
|
||||||
double dpd_gpu_bytes();
|
double dpd_tstat_gpu_bytes();
|
||||||
|
|
||||||
#define EPSILON 1.0e-10
|
#define EPSILON 1.0e-10
|
||||||
|
|
||||||
@ -223,7 +223,7 @@ PairDPDTstatGPU::PairDPDTstatGPU(LAMMPS *lmp) : PairDPDTstat(lmp),
|
|||||||
|
|
||||||
PairDPDTstatGPU::~PairDPDTstatGPU()
|
PairDPDTstatGPU::~PairDPDTstatGPU()
|
||||||
{
|
{
|
||||||
dpd_gpu_clear();
|
dpd_tstat_gpu_clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -243,7 +243,7 @@ void PairDPDTstatGPU::compute(int eflag, int vflag)
|
|||||||
for (int j = i; j <= atom->ntypes; j++)
|
for (int j = i; j <= atom->ntypes; j++)
|
||||||
sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]);
|
sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]);
|
||||||
|
|
||||||
dpd_gpu_update_coeff(atom->ntypes+1, a0, gamma, sigma, cut);
|
dpd_tstat_gpu_update_coeff(atom->ntypes+1, a0, gamma, sigma, cut);
|
||||||
}
|
}
|
||||||
|
|
||||||
int nall = atom->nlocal + atom->nghost;
|
int nall = atom->nlocal + atom->nghost;
|
||||||
@ -266,7 +266,7 @@ void PairDPDTstatGPU::compute(int eflag, int vflag)
|
|||||||
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
|
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
|
||||||
}
|
}
|
||||||
inum = atom->nlocal;
|
inum = atom->nlocal;
|
||||||
firstneigh = dpd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
|
firstneigh = dpd_tstat_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, sublo, subhi,
|
atom->type, sublo, subhi,
|
||||||
atom->tag, atom->nspecial, atom->special,
|
atom->tag, atom->nspecial, atom->special,
|
||||||
eflag, vflag, eflag_atom, vflag_atom,
|
eflag, vflag, eflag_atom, vflag_atom,
|
||||||
@ -279,7 +279,7 @@ void PairDPDTstatGPU::compute(int eflag, int vflag)
|
|||||||
ilist = list->ilist;
|
ilist = list->ilist;
|
||||||
numneigh = list->numneigh;
|
numneigh = list->numneigh;
|
||||||
firstneigh = list->firstneigh;
|
firstneigh = list->firstneigh;
|
||||||
dpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
|
dpd_tstat_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
|
||||||
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
|
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
|
||||||
vflag_atom, host_start, cpu_time, success,
|
vflag_atom, host_start, cpu_time, success,
|
||||||
atom->tag, atom->v, dtinvsqrt, seed,
|
atom->tag, atom->v, dtinvsqrt, seed,
|
||||||
@ -325,8 +325,8 @@ void PairDPDTstatGPU::init_style()
|
|||||||
int maxspecial=0;
|
int maxspecial=0;
|
||||||
if (atom->molecular)
|
if (atom->molecular)
|
||||||
maxspecial=atom->maxspecial;
|
maxspecial=atom->maxspecial;
|
||||||
int success = dpd_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
|
int success = dpd_tstat_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
|
||||||
cut, force->special_lj, true, atom->nlocal,
|
cut, force->special_lj, atom->nlocal,
|
||||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||||
cell_size, gpu_mode, screen);
|
cell_size, gpu_mode, screen);
|
||||||
GPU_EXTRA::check_flag(success,error,world);
|
GPU_EXTRA::check_flag(success,error,world);
|
||||||
@ -343,7 +343,7 @@ void PairDPDTstatGPU::init_style()
|
|||||||
double PairDPDTstatGPU::memory_usage()
|
double PairDPDTstatGPU::memory_usage()
|
||||||
{
|
{
|
||||||
double bytes = Pair::memory_usage();
|
double bytes = Pair::memory_usage();
|
||||||
return bytes + dpd_gpu_bytes();
|
return bytes + dpd_tstat_gpu_bytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|||||||
Reference in New Issue
Block a user