From 3f3c69210f8dbb1d8e906994a4fbfeb54b609010 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Wed, 29 Aug 2012 15:00:14 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8733 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/USER-CUDA/fix_shake_cuda.cpp | 2331 +++++++++++++++++------------- 1 file changed, 1297 insertions(+), 1034 deletions(-) diff --git a/src/USER-CUDA/fix_shake_cuda.cpp b/src/USER-CUDA/fix_shake_cuda.cpp index 5b155921d7..32905c153f 100644 --- a/src/USER-CUDA/fix_shake_cuda.cpp +++ b/src/USER-CUDA/fix_shake_cuda.cpp @@ -46,25 +46,29 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ -FixShakeCuda::FixShakeCuda(LAMMPS *lmp, int narg, char **arg) : +FixShakeCuda::FixShakeCuda(LAMMPS* lmp, int narg, char** arg) : Fix(lmp, narg, arg) { cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - cuda->accelerator(0,NULL); - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - neighbor_step=true; + if(cuda == NULL) + error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); + + if(atom->map_style != 1) + error->all(FLERR, "Fix shake/cuda needs atom map style array. In particular it does not currently work with hash-tables."); + + cuda->accelerator(0, NULL); + MPI_Comm_rank(world, &me); + MPI_Comm_size(world, &nprocs); + neighbor_step = true; virial_flag = 1; create_attribute = 1; // error check - if (atom->molecular == 0) - error->all(FLERR,"Cannot use fix shake with non-molecular system"); + if(atom->molecular == 0) + error->all(FLERR, "Cannot use fix shake with non-molecular system"); // perform initial allocation of atom-based arrays // register with Atom class @@ -79,7 +83,7 @@ FixShakeCuda::FixShakeCuda(LAMMPS *lmp, int narg, char **arg) : cu_list = NULL; cu_bond_distance = NULL; cu_angle_distance = NULL; - cu_virial = new cCudaData(virial,6); + cu_virial = new cCudaData(virial, 6); grow_arrays(atom->nmax); atom->add_callback(0); @@ -89,7 +93,7 @@ FixShakeCuda::FixShakeCuda(LAMMPS *lmp, int narg, char **arg) : // parse SHAKE args - if (narg < 8) error->all(FLERR,"Illegal fix shake command"); + if(narg < 8) error->all(FLERR, "Illegal fix shake command"); tolerance = atof(arg[3]); max_iter = atoi(arg[4]); @@ -101,65 +105,82 @@ FixShakeCuda::FixShakeCuda(LAMMPS *lmp, int narg, char **arg) : // store args for "m" in list of length nmass for looping over // for "m" verify that atom masses have been set - bond_flag = new int[atom->nbondtypes+1]; - for (int i = 1; i <= atom->nbondtypes; i++) bond_flag[i] = 0; - angle_flag = new int[atom->nangletypes+1]; - for (int i = 1; i <= atom->nangletypes; i++) angle_flag[i] = 0; - type_flag = new int[atom->ntypes+1]; - for (int i = 1; i <= atom->ntypes; i++) type_flag[i] = 0; + bond_flag = new int[atom->nbondtypes + 1]; + + for(int i = 1; i <= atom->nbondtypes; i++) bond_flag[i] = 0; + + angle_flag = new int[atom->nangletypes + 1]; + + for(int i = 1; i <= atom->nangletypes; i++) angle_flag[i] = 0; + + type_flag = new int[atom->ntypes + 1]; + + for(int i = 1; i <= atom->ntypes; i++) type_flag[i] = 0; + mass_list = new double[atom->ntypes]; nmass = 0; char mode = '\0'; int next = 6; - while (next < narg) { - if (strcmp(arg[next],"b") == 0) mode = 'b'; - else if (strcmp(arg[next],"a") == 0) mode = 'a'; - else if (strcmp(arg[next],"t") == 0) mode = 't'; - else if (strcmp(arg[next],"m") == 0) { + while(next < narg) { + + if(strcmp(arg[next], "b") == 0) mode = 'b'; + else if(strcmp(arg[next], "a") == 0) mode = 'a'; + else if(strcmp(arg[next], "t") == 0) mode = 't'; + else if(strcmp(arg[next], "m") == 0) { mode = 'm'; atom->check_mass(); - } else if (mode == 'b') { + } else if(mode == 'b') { int i = atoi(arg[next]); - if (i < 1 || i > atom->nbondtypes) - error->all(FLERR,"Invalid bond type index for fix shake"); + + if(i < 1 || i > atom->nbondtypes) + error->all(FLERR, "Invalid bond type index for fix shake"); + bond_flag[i] = 1; - } else if (mode == 'a') { + } else if(mode == 'a') { int i = atoi(arg[next]); - if (i < 1 || i > atom->nangletypes) - error->all(FLERR,"Invalid angle type index for fix shake"); + + if(i < 1 || i > atom->nangletypes) + error->all(FLERR, "Invalid angle type index for fix shake"); + angle_flag[i] = 1; - } else if (mode == 't') { + } else if(mode == 't') { int i = atoi(arg[next]); - if (i < 1 || i > atom->ntypes) - error->all(FLERR,"Invalid atom type index for fix shake"); + + if(i < 1 || i > atom->ntypes) + error->all(FLERR, "Invalid atom type index for fix shake"); + type_flag[i] = 1; - } else if (mode == 'm') { + } else if(mode == 'm') { double massone = atof(arg[next]); - if (massone == 0.0) error->all(FLERR,"Invalid atom mass for fix shake"); - if (nmass == atom->ntypes) error->all(FLERR,"Too many masses for fix shake"); + + if(massone == 0.0) error->all(FLERR, "Invalid atom mass for fix shake"); + + if(nmass == atom->ntypes) error->all(FLERR, "Too many masses for fix shake"); + mass_list[nmass++] = massone; - } else error->all(FLERR,"Illegal fix shake command"); + } else error->all(FLERR, "Illegal fix shake command"); + next++; } // allocate bond and angle distance arrays, indexed from 1 to n - bond_distance = new double[atom->nbondtypes+1]; - angle_distance = new double[atom->nangletypes+1]; + bond_distance = new double[atom->nbondtypes + 1]; + angle_distance = new double[atom->nangletypes + 1]; - cu_bond_distance = new cCudaData (bond_distance, atom->nbondtypes+1); - cu_angle_distance = new cCudaData (angle_distance, atom->nangletypes+1); + cu_bond_distance = new cCudaData (bond_distance, atom->nbondtypes + 1); + cu_angle_distance = new cCudaData (angle_distance, atom->nangletypes + 1); // allocate statistics arrays - if (output_every) { + if(output_every) { int nb = atom->nbondtypes + 1; b_count = new int[nb]; b_count_all = new int[nb]; @@ -181,7 +202,7 @@ FixShakeCuda::FixShakeCuda(LAMMPS *lmp, int narg, char **arg) : a_min_all = new double[na]; } - cudable_comm=true; + cudable_comm = true; // identify all SHAKE clusters find_clusters(); @@ -190,10 +211,10 @@ FixShakeCuda::FixShakeCuda(LAMMPS *lmp, int narg, char **arg) : maxlist = 0; list = NULL; - Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq, - cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(), - max_iter,tolerance); + Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, + cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), + cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), + max_iter, tolerance); } @@ -204,40 +225,55 @@ FixShakeCuda::~FixShakeCuda() { // unregister callbacks to this fix from Atom class - atom->delete_callback(id,0); + atom->delete_callback(id, 0); // set bond_type and angle_type back to positive for SHAKE clusters // must set for all SHAKE bonds and angles stored by each atom - int **bond_type = atom->bond_type; - int **angle_type = atom->angle_type; + int** bond_type = atom->bond_type; + int** angle_type = atom->angle_type; int nlocal = atom->nlocal; int n; - for (int i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - else if (shake_flag[i] == 1) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][2]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = anglefind(i,shake_atom[i][1],shake_atom[i][2]); - if (n >= 0) angle_type[i][n] = -angle_type[i][n]; - } else if (shake_flag[i] == 2) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if (shake_flag[i] == 3) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][2]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if (shake_flag[i] == 4) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][2]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][3]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; + + for(int i = 0; i < nlocal; i++) { + if(shake_flag[i] == 0) continue; + else if(shake_flag[i] == 1) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = anglefind(i, shake_atom[i][1], shake_atom[i][2]); + + if(n >= 0) angle_type[i][n] = -angle_type[i][n]; + } else if(shake_flag[i] == 2) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + } else if(shake_flag[i] == 3) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + } else if(shake_flag[i] == 4) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][3]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; } } @@ -256,7 +292,7 @@ FixShakeCuda::~FixShakeCuda() delete [] bond_distance; delete [] angle_distance; - if (output_every) { + if(output_every) { delete [] b_count; delete [] b_count_all; delete [] b_ave; @@ -305,61 +341,68 @@ int FixShakeCuda::setmask() void FixShakeCuda::init() { - int i,m,flag,flag_all,type1,type2,bond1_type,bond2_type; - double rsq,angle; + int i, m, flag, flag_all, type1, type2, bond1_type, bond2_type; + double rsq, angle; // error if more than one shake fix int count = 0; - for (i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"shake") == 0) count++; - if (count > 1) error->all(FLERR,"More than one fix shake"); + + for(i = 0; i < modify->nfix; i++) + if(strcmp(modify->fix[i]->style, "shake") == 0) count++; + + if(count > 1) error->all(FLERR, "More than one fix shake"); // cannot use with minimization since SHAKE turns off bonds // that should contribute to potential energy - if (update->whichflag == 2) - error->all(FLERR,"Fix shake cannot be used with minimization"); + if(update->whichflag == 2) + error->all(FLERR, "Fix shake cannot be used with minimization"); // error if npt,nph fix comes before shake fix - for (i = 0; i < modify->nfix; i++) { - if (strcmp(modify->fix[i]->style,"npt") == 0) break; - if (strcmp(modify->fix[i]->style,"nph") == 0) break; + for(i = 0; i < modify->nfix; i++) { + if(strcmp(modify->fix[i]->style, "npt") == 0) break; + + if(strcmp(modify->fix[i]->style, "nph") == 0) break; } - if (i < modify->nfix) { - for (int j = i; j < modify->nfix; j++) - if (strcmp(modify->fix[j]->style,"shake") == 0) - error->all(FLERR,"Shake fix must come before NPT/NPH fix"); + + if(i < modify->nfix) { + for(int j = i; j < modify->nfix; j++) + if(strcmp(modify->fix[j]->style, "shake") == 0) + error->all(FLERR, "Shake fix must come before NPT/NPH fix"); } // if rRESPA, find associated fix that must exist // could have changed locations in fix list since created // set ptrs to rRESPA variables - if (strstr(update->integrate_style,"respa")) { - for (i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"RESPA") == 0) ifix_respa = i; - nlevels_respa = ((Respa *) update->integrate)->nlevels; - loop_respa = ((Respa *) update->integrate)->loop; - step_respa = ((Respa *) update->integrate)->step; + if(strstr(update->integrate_style, "respa")) { + for(i = 0; i < modify->nfix; i++) + if(strcmp(modify->fix[i]->style, "RESPA") == 0) ifix_respa = i; + + nlevels_respa = ((Respa*) update->integrate)->nlevels; + loop_respa = ((Respa*) update->integrate)->loop; + step_respa = ((Respa*) update->integrate)->step; } // set equilibrium bond distances - if (force->bond == NULL) - error->all(FLERR,"Bond potential must be defined for SHAKE"); - for (i = 1; i <= atom->nbondtypes; i++) + if(force->bond == NULL) + error->all(FLERR, "Bond potential must be defined for SHAKE"); + + for(i = 1; i <= atom->nbondtypes; i++) bond_distance[i] = force->bond->equilibrium_distance(i); // set equilibrium angle distances int nlocal = atom->nlocal; - for (i = 1; i <= atom->nangletypes; i++) { - if (angle_flag[i] == 0) continue; - if (force->angle == NULL) - error->all(FLERR,"Angle potential must be defined for SHAKE"); + for(i = 1; i <= atom->nangletypes; i++) { + if(angle_flag[i] == 0) continue; + + if(force->angle == NULL) + error->all(FLERR, "Angle potential must be defined for SHAKE"); // scan all atoms for a SHAKE angle cluster // extract bond types for the 2 bonds in the cluster @@ -368,37 +411,43 @@ void FixShakeCuda::init() flag = 0; bond1_type = bond2_type = 0; - for (m = 0; m < nlocal; m++) { - if (shake_flag[m] != 1) continue; - if (shake_type[m][2] != i) continue; - type1 = MIN(shake_type[m][0],shake_type[m][1]); - type2 = MAX(shake_type[m][0],shake_type[m][1]); - if (bond1_type > 0) { - if (type1 != bond1_type || type2 != bond2_type) { + + for(m = 0; m < nlocal; m++) { + if(shake_flag[m] != 1) continue; + + if(shake_type[m][2] != i) continue; + + type1 = MIN(shake_type[m][0], shake_type[m][1]); + type2 = MAX(shake_type[m][0], shake_type[m][1]); + + if(bond1_type > 0) { + if(type1 != bond1_type || type2 != bond2_type) { flag = 1; break; } } + bond1_type = type1; bond2_type = type2; } // error check for any bond types that are not the same - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_MAX,world); - if (flag_all) error->all(FLERR,"Shake angles have different bond types"); + MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_MAX, world); + + if(flag_all) error->all(FLERR, "Shake angles have different bond types"); // insure all procs have bond types - MPI_Allreduce(&bond1_type,&flag_all,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&bond1_type, &flag_all, 1, MPI_INT, MPI_MAX, world); bond1_type = flag_all; - MPI_Allreduce(&bond2_type,&flag_all,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&bond2_type, &flag_all, 1, MPI_INT, MPI_MAX, world); bond2_type = flag_all; // if bond types are 0, no SHAKE angles of this type exist // just skip this angle - if (bond1_type == 0) { + if(bond1_type == 0) { angle_distance[i] = 0.0; continue; } @@ -406,8 +455,8 @@ void FixShakeCuda::init() // compute the angle distance as a function of 2 bond distances angle = force->angle->equilibrium_angle(i); - rsq = 2.0*bond_distance[bond1_type]*bond_distance[bond2_type] * - (1.0-cos(angle)); + rsq = 2.0 * bond_distance[bond1_type] * bond_distance[bond2_type] * + (1.0 - cos(angle)); angle_distance[i] = sqrt(rsq); } } @@ -420,19 +469,21 @@ void FixShakeCuda::setup(int vflag) { pre_neighbor(); - if (output_every) stats(); + if(output_every) stats(); // setup SHAKE output int ntimestep = update->ntimestep; next_output = ntimestep + output_every; - if (output_every == 0) next_output = update->laststep + 1; - if (output_every && ntimestep % output_every != 0) - next_output = (ntimestep/output_every)*output_every + output_every; + + if(output_every == 0) next_output = update->laststep + 1; + + if(output_every && ntimestep % output_every != 0) + next_output = (ntimestep / output_every) * output_every + output_every; // half timestep constraint on pre-step, full timestep thereafter - if (strstr(update->integrate_style,"verlet")) { + if(strstr(update->integrate_style, "verlet")) { dtv = update->dt; dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; post_force(vflag); @@ -441,15 +492,16 @@ void FixShakeCuda::setup(int vflag) dtv = step_respa[0]; dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v; dtf_inner = dtf_innerhalf; - ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1); - post_force_respa(vflag,nlevels_respa-1,0); - ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1); + ((Respa*) update->integrate)->copy_flevel_f(nlevels_respa - 1); + post_force_respa(vflag, nlevels_respa - 1, 0); + ((Respa*) update->integrate)->copy_f_flevel(nlevels_respa - 1); dtf_inner = step_respa[0] * force->ftm2v; } - Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq, - cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(), - max_iter,tolerance); + + Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, + cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), + cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), + max_iter, tolerance); } /* ---------------------------------------------------------------------- @@ -460,7 +512,7 @@ void FixShakeCuda::setup(int vflag) void FixShakeCuda::pre_neighbor() { - int atom1,atom2,atom3,atom4; + int atom1, atom2, atom3, atom4; // local copies of atom quantities // used by SHAKE until next re-neighboring @@ -475,112 +527,141 @@ void FixShakeCuda::pre_neighbor() // extend size of SHAKE list if necessary - if (nlocal > maxlist) { + if(nlocal > maxlist) { maxlist = nlocal; memory->destroy(list); - memory->create(list,maxlist,"shake:list"); - delete cu_list; cu_list = new cCudaData(list,maxlist); + memory->create(list, maxlist, "shake:list"); + delete cu_list; + cu_list = new cCudaData(list, maxlist); } // build list of SHAKE clusters I compute nlist = 0; - int count2=0,count3=0,count4=0,count3a=0; - for (int i = 0; i < nlocal; i++) - if (shake_flag[i]) { - if(shake_flag[i] == 2) count2++; - if(shake_flag[i] == 3) count3++; - if(shake_flag[i] == 4) count4++; - if(shake_flag[i] == 1) count3a++; + int count2 = 0, count3 = 0, count4 = 0, count3a = 0; - if (shake_flag[i] == 2) { + for(int i = 0; i < nlocal; i++) + if(shake_flag[i]) { + if(shake_flag[i] == 2) count2++; + + if(shake_flag[i] == 3) count3++; + + if(shake_flag[i] == 4) count4++; + + if(shake_flag[i] == 1) count3a++; + + if(shake_flag[i] == 2) { atom1 = atom->map(shake_atom[i][0]); atom2 = atom->map(shake_atom[i][1]); - if (atom1 == -1 || atom2 == -1) { + + if(atom1 == -1 || atom2 == -1) { char str[128]; sprintf(str, "Shake atoms %d %d missing on proc %d at step " BIGINT_FORMAT, - shake_atom[i][0],shake_atom[i][1],me,update->ntimestep); - error->one(FLERR,str); + shake_atom[i][0], shake_atom[i][1], me, update->ntimestep); + error->one(FLERR, str); } - if (i <= atom1 && i <= atom2) list[nlist++] = i; - } else if (shake_flag[i] % 2 == 1) { + + if(i <= atom1 && i <= atom2) list[nlist++] = i; + } else if(shake_flag[i] % 2 == 1) { atom1 = atom->map(shake_atom[i][0]); atom2 = atom->map(shake_atom[i][1]); atom3 = atom->map(shake_atom[i][2]); - if (atom1 == -1 || atom2 == -1 || atom3 == -1) { + + if(atom1 == -1 || atom2 == -1 || atom3 == -1) { char str[128]; sprintf(str, "Shake atoms %d %d %d missing on proc %d at step " BIGINT_FORMAT, - shake_atom[i][0],shake_atom[i][1],shake_atom[i][2], - me,update->ntimestep); - error->one(FLERR,str); + shake_atom[i][0], shake_atom[i][1], shake_atom[i][2], + me, update->ntimestep); + error->one(FLERR, str); } - if (i <= atom1 && i <= atom2 && i <= atom3) list[nlist++] = i; + + if(i <= atom1 && i <= atom2 && i <= atom3) list[nlist++] = i; } else { atom1 = atom->map(shake_atom[i][0]); atom2 = atom->map(shake_atom[i][1]); atom3 = atom->map(shake_atom[i][2]); atom4 = atom->map(shake_atom[i][3]); - if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) { + + if(atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) { char str[128]; sprintf(str, "Shake atoms %d %d %d %d missing on proc %d at step " BIGINT_FORMAT, - shake_atom[i][0],shake_atom[i][1], - shake_atom[i][2],shake_atom[i][3], - me,update->ntimestep); - error->one(FLERR,str); + shake_atom[i][0], shake_atom[i][1], + shake_atom[i][2], shake_atom[i][3], + me, update->ntimestep); + error->one(FLERR, str); } - if (i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4) + + if(i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4) list[nlist++] = i; } } - count2/=2; - count3/=3; - count4/=4; - count3a/=3; - count3+=count2; - count4+=count3; - count3a+=count4; - for(int k = 0,l = count2; k < count2; k++) - { - if(shake_flag[list[k]]!=2) - { - while(shake_flag[list[l]]!=2 && lupload(); - cu_bond_distance->upload(); - cu_angle_distance->upload(); - cu_shake_flag->upload(); - cu_shake_atom->upload(); - cu_shake_type->upload(); + for(int k = 0, l = count2; k < count2; k++) { + if(shake_flag[list[k]] != 2) { + while(shake_flag[list[l]] != 2 && l < nlist - 1) l++; - neighbor_step=true; + if(shake_flag[list[l]] != 2) { + printf("FixShakeCuda: Error in List SortA %i %i\n", k, l); + return; + } + + int tmp = list[k]; + list[k] = list[l]; + list[l] = tmp; + } + } + + for(int k = count2, l = count3; k < count3; k++) { + if(shake_flag[list[k]] != 3) { + while(shake_flag[list[l]] != 3 && l < nlist - 1) l++; + + if(shake_flag[list[l]] != 3) { + printf("FixShakeCuda: Error in List SortB %i %i\n", k, l); + return; + } + + int tmp = list[k]; + list[k] = list[l]; + list[l] = tmp; + } + } + + for(int k = count3, l = count4; k < count4; k++) { + if(shake_flag[list[k]] != 4) { + while(shake_flag[list[l]] != 4 && l < nlist - 1) l++; + + if(shake_flag[list[l]] != 4) { + printf("FixShakeCuda: Error in List SortC %i %i\n", k, l); + return; + } + + int tmp = list[k]; + list[k] = list[l]; + list[l] = tmp; + } + } + + cu_list->upload(); + cu_bond_distance->upload(); + cu_angle_distance->upload(); + cu_shake_flag->upload(); + cu_shake_atom->upload(); + cu_shake_type->upload(); + + neighbor_step = true; } /* ---------------------------------------------------------------------- @@ -589,26 +670,26 @@ void FixShakeCuda::pre_neighbor() void FixShakeCuda::post_force(int vflag) { - timespec starttime; - timespec endtime; + timespec starttime; + timespec endtime; - if(cuda->finished_setup && neighbor_step) - { - Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq, - cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(), - max_iter,tolerance); + if(cuda->finished_setup && neighbor_step) { + Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, + cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), + cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), + max_iter, tolerance); - } + } - if(not cuda->finished_setup) - cuda->downloadAll(); - if (update->ntimestep == next_output) - { - if(cuda->finished_setup) - cuda->cu_x->download(); - stats(); + if(not cuda->finished_setup) + cuda->downloadAll(); + + if(update->ntimestep == next_output) { + if(cuda->finished_setup) + cuda->cu_x->download(); + + stats(); } // xshake = unconstrained move with current v,f @@ -619,47 +700,53 @@ void FixShakeCuda::post_force(int vflag) //if(cuda->finished_setup) cu_xshake->download(); - if (nprocs > 1) - { - //if(cuda->finished_setup) - //cu_xshake->download(); - comm->forward_comm_fix(this); - //if(cuda->finished_setup) - //cu_xshake->upload(); + if(nprocs > 1) { + //if(cuda->finished_setup) + //cu_xshake->download(); + comm->forward_comm_fix(this); + //if(cuda->finished_setup) + //cu_xshake->upload(); } + // virial setup - if (vflag) v_setup(vflag); + if(vflag) v_setup(vflag); else evflag = 0; // loop over clusters - clock_gettime(CLOCK_REALTIME,&starttime); - if(cuda->finished_setup) - { - cu_virial->upload(); - if(vflag_atom) cuda->cu_vatom->upload(); + clock_gettime(CLOCK_REALTIME, &starttime); + + if(cuda->finished_setup) { + cu_virial->upload(); + + if(vflag_atom) cuda->cu_vatom->upload(); + + Cuda_FixShakeCuda_Shake(&cuda->shared_data, vflag, vflag_atom, (int*)cu_list->dev_data(), nlist); + cu_virial->download(); - Cuda_FixShakeCuda_Shake(&cuda->shared_data,vflag,vflag_atom,(int*)cu_list->dev_data(),nlist); - cu_virial->download(); if(vflag_atom) cuda->cu_vatom->download(); - } - else - for (int i = 0; i < nlist; i++) { - int m = list[i]; - if (shake_flag[m] == 2) shake2(m); - else if (shake_flag[m] == 3) shake3(m); - else if (shake_flag[m] == 4) shake4(m); - else shake3angle(m); - } + } else + for(int i = 0; i < nlist; i++) { + int m = list[i]; + + if(shake_flag[m] == 2) shake2(m); + else if(shake_flag[m] == 3) shake3(m); + else if(shake_flag[m] == 4) shake4(m); + else shake3angle(m); + } + if((not cuda->finished_setup)) cuda->cu_f->upload(); - clock_gettime(CLOCK_REALTIME,&endtime); - if(cuda->finished_setup) - time_postforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - else - time_postforce=0.0; - //printf("Postforce time: %lf\n",time_postforce); + + clock_gettime(CLOCK_REALTIME, &endtime); + + if(cuda->finished_setup) + time_postforce += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); + else + time_postforce = 0.0; + + //printf("Postforce time: %lf\n",time_postforce); } /* ---------------------------------------------------------------------- @@ -670,26 +757,30 @@ int FixShakeCuda::dof(int igroup) { int groupbit = group->bitmask[igroup]; - int *mask = atom->mask; - int *tag = atom->tag; + int* mask = atom->mask; + int* tag = atom->tag; int nlocal = atom->nlocal; // count dof in a cluster if and only if // the central atom is in group and atom i is the central atom int n = 0; - for (int i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - if (shake_flag[i] == 0) continue; - if (shake_atom[i][0] != tag[i]) continue; - if (shake_flag[i] == 1) n += 3; - else if (shake_flag[i] == 2) n += 1; - else if (shake_flag[i] == 3) n += 2; - else if (shake_flag[i] == 4) n += 3; + + for(int i = 0; i < nlocal; i++) { + if(!(mask[i] & groupbit)) continue; + + if(shake_flag[i] == 0) continue; + + if(shake_atom[i][0] != tag[i]) continue; + + if(shake_flag[i] == 1) n += 3; + else if(shake_flag[i] == 2) n += 1; + else if(shake_flag[i] == 3) n += 2; + else if(shake_flag[i] == 4) n += 3; } int nall; - MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&n, &nall, 1, MPI_INT, MPI_SUM, world); return nall; } @@ -703,34 +794,36 @@ int FixShakeCuda::dof(int igroup) void FixShakeCuda::find_clusters() { - int i,j,m,n; - int flag,flag_all,messtag,loop,nbuf,nbufmax,size; + int i, j, m, n; + int flag, flag_all, messtag, loop, nbuf, nbufmax, size; double massone; - int *buf,*bufcopy; + int* buf, *bufcopy; MPI_Request request; MPI_Status status; - if (me == 0 && screen) fprintf(screen,"Finding SHAKE clusters ...\n"); + if(me == 0 && screen) fprintf(screen, "Finding SHAKE clusters ...\n"); // local copies of atom ptrs - int *tag = atom->tag; - int *type = atom->type; - int *mask = atom->mask; - double *mass = atom->mass; - double *rmass = atom->rmass; - int **bond_type = atom->bond_type; - int **angle_type = atom->angle_type; - int **nspecial = atom->nspecial; - int **special = atom->special; + int* tag = atom->tag; + int* type = atom->type; + int* mask = atom->mask; + double* mass = atom->mass; + double* rmass = atom->rmass; + int** bond_type = atom->bond_type; + int** angle_type = atom->angle_type; + int** nspecial = atom->nspecial; + int** special = atom->special; int nlocal = atom->nlocal; // setup ring of procs int next = me + 1; - int prev = me -1; - if (next == nprocs) next = 0; - if (prev < 0) prev = nprocs - 1; + int prev = me - 1; + + if(next == nprocs) next = 0; + + if(prev < 0) prev = nprocs - 1; // ----------------------------------------------------- // allocate arrays for self (1d) and bond partners (2d) @@ -747,29 +840,31 @@ void FixShakeCuda::find_clusters() // ----------------------------------------------------- int max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]); - int *npartner,*nshake; - memory->create(npartner,nlocal,"shake:npartner"); - memory->create(nshake,nlocal,"shake:nshake"); + for(i = 0; i < nlocal; i++) max = MAX(max, nspecial[i][0]); - int **partner_tag,**partner_mask,**partner_type,**partner_massflag; - int ** partner_bondtype,**partner_shake,**partner_nshake; - memory->create(partner_tag,nlocal,max,"shake:partner_tag"); - memory->create(partner_mask,nlocal,max,"shake:partner_mask"); - memory->create(partner_type,nlocal,max,"shake:partner_type"); - memory->create(partner_massflag,nlocal,max,"shake:partner_massflag"); - memory->create(partner_bondtype,nlocal,max,"shake:partner_bondtype"); - memory->create(partner_shake,nlocal,max,"shake:partner_shake"); - memory->create(partner_nshake,nlocal,max,"shake:partner_nshake"); + int* npartner, *nshake; + memory->create(npartner, nlocal, "shake:npartner"); + memory->create(nshake, nlocal, "shake:nshake"); + + int** partner_tag, **partner_mask, **partner_type, **partner_massflag; + int** partner_bondtype, **partner_shake, **partner_nshake; + memory->create(partner_tag, nlocal, max, "shake:partner_tag"); + memory->create(partner_mask, nlocal, max, "shake:partner_mask"); + memory->create(partner_type, nlocal, max, "shake:partner_type"); + memory->create(partner_massflag, nlocal, max, "shake:partner_massflag"); + memory->create(partner_bondtype, nlocal, max, "shake:partner_bondtype"); + memory->create(partner_shake, nlocal, max, "shake:partner_shake"); + memory->create(partner_nshake, nlocal, max, "shake:partner_nshake"); // ----------------------------------------------------- // set npartner and partner_tag from special arrays // ----------------------------------------------------- - for (i = 0; i < nlocal; i++) { + for(i = 0; i < nlocal; i++) { npartner[i] = nspecial[i][0]; - for (j = 0; j < npartner[i]; j++) partner_tag[i][j] = special[i][j]; + + for(j = 0; j < npartner[i]; j++) partner_tag[i][j] = special[i][j]; } // ----------------------------------------------------- @@ -786,33 +881,40 @@ void FixShakeCuda::find_clusters() int nper = 6; nbuf = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { + + for(i = 0; i < nlocal; i++) { + for(j = 0; j < npartner[i]; j++) { partner_mask[i][j] = 0; partner_type[i][j] = 0; partner_massflag[i][j] = 0; partner_bondtype[i][j] = 0; m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) { + + if(m >= 0 && m < nlocal) { partner_mask[i][j] = mask[m]; partner_type[i][j] = type[m]; - if (nmass) { - if (rmass) massone = rmass[m]; + + if(nmass) { + if(rmass) massone = rmass[m]; else massone = mass[type[m]]; + partner_massflag[i][j] = masscheck(massone); } - n = bondfind(i,tag[i],partner_tag[i][j]); - if (n >= 0) partner_bondtype[i][j] = bond_type[i][n]; + + n = bondfind(i, tag[i], partner_tag[i][j]); + + if(n >= 0) partner_bondtype[i][j] = bond_type[i][n]; else { - n = bondfind(m,tag[i],partner_tag[i][j]); - if (n >= 0) partner_bondtype[i][j] = bond_type[m][n]; + n = bondfind(m, tag[i], partner_tag[i][j]); + + if(n >= 0) partner_bondtype[i][j] = bond_type[m][n]; } } else nbuf += nper; } } - MPI_Allreduce(&nbuf,&nbufmax,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf, &nbufmax, 1, MPI_INT, MPI_MAX, world); buf = new int[nbufmax]; bufcopy = new int[nbufmax]; @@ -820,18 +922,22 @@ void FixShakeCuda::find_clusters() // fill buffer with info size = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { + + for(i = 0; i < nlocal; i++) { + for(j = 0; j < npartner[i]; j++) { m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { + + if(m < 0 || m >= nlocal) { buf[size] = tag[i]; - buf[size+1] = partner_tag[i][j]; - buf[size+2] = 0; - buf[size+3] = 0; - buf[size+4] = 0; - n = bondfind(i,tag[i],partner_tag[i][j]); - if (n >= 0) buf[size+5] = bond_type[i][n]; - else buf[size+5] = 0; + buf[size + 1] = partner_tag[i][j]; + buf[size + 2] = 0; + buf[size + 3] = 0; + buf[size + 4] = 0; + n = bondfind(i, tag[i], partner_tag[i][j]); + + if(n >= 0) buf[size + 5] = bond_type[i][n]; + else buf[size + 5] = 0; + size += nper; } } @@ -844,45 +950,58 @@ void FixShakeCuda::find_clusters() // search for bond with 1st atom and fill in bondtype messtag = 1; - for (loop = 0; loop < nprocs; loop++) { + + for(loop = 0; loop < nprocs; loop++) { i = 0; - while (i < size) { - m = atom->map(buf[i+1]); - if (m >= 0 && m < nlocal) { - buf[i+2] = mask[m]; - buf[i+3] = type[m]; - if (nmass) { - if (rmass) massone = rmass[m]; + + while(i < size) { + m = atom->map(buf[i + 1]); + + if(m >= 0 && m < nlocal) { + buf[i + 2] = mask[m]; + buf[i + 3] = type[m]; + + if(nmass) { + if(rmass) massone = rmass[m]; else massone = mass[type[m]]; - buf[i+4] = masscheck(massone); + + buf[i + 4] = masscheck(massone); } - if (buf[i+5] == 0) { - n = bondfind(m,buf[i],buf[i+1]); - if (n >= 0) buf[i+5] = bond_type[m][n]; + + if(buf[i + 5] == 0) { + n = bondfind(m, buf[i], buf[i + 1]); + + if(n >= 0) buf[i + 5] = bond_type[m][n]; } } + i += nper; } - if (me != next) { - MPI_Irecv(bufcopy,nbufmax,MPI_INT,prev,messtag,world,&request); - MPI_Send(buf,size,MPI_INT,next,messtag,world); - MPI_Wait(&request,&status); - MPI_Get_count(&status,MPI_INT,&size); - for (j = 0; j < size; j++) buf[j] = bufcopy[j]; + + if(me != next) { + MPI_Irecv(bufcopy, nbufmax, MPI_INT, prev, messtag, world, &request); + MPI_Send(buf, size, MPI_INT, next, messtag, world); + MPI_Wait(&request, &status); + MPI_Get_count(&status, MPI_INT, &size); + + for(j = 0; j < size; j++) buf[j] = bufcopy[j]; } } // store partner info returned to me m = 0; - while (m < size) { + + while(m < size) { i = atom->map(buf[m]); - for (j = 0; j < npartner[i]; j++) - if (buf[m+1] == partner_tag[i][j]) break; - partner_mask[i][j] = buf[m+2]; - partner_type[i][j] = buf[m+3]; - partner_massflag[i][j] = buf[m+4]; - partner_bondtype[i][j] = buf[m+5]; + + for(j = 0; j < npartner[i]; j++) + if(buf[m + 1] == partner_tag[i][j]) break; + + partner_mask[i][j] = buf[m + 2]; + partner_type[i][j] = buf[m + 3]; + partner_massflag[i][j] = buf[m + 4]; + partner_bondtype[i][j] = buf[m + 5]; m += nper; } @@ -898,16 +1017,21 @@ void FixShakeCuda::find_clusters() // else it's an error flag = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < npartner[i]; j++) { - if (partner_type[i][j] == 0) flag = 1; - if (!(mask[i] & groupbit)) continue; - if (!(partner_mask[i][j] & groupbit)) continue; - if (partner_bondtype[i][j] == 0) flag = 1; + + for(i = 0; i < nlocal; i++) + for(j = 0; j < npartner[i]; j++) { + if(partner_type[i][j] == 0) flag = 1; + + if(!(mask[i] & groupbit)) continue; + + if(!(partner_mask[i][j] & groupbit)) continue; + + if(partner_bondtype[i][j] == 0) flag = 1; } - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR,"Did not find fix shake partner info"); + MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_SUM, world); + + if(flag_all) error->all(FLERR, "Did not find fix shake partner info"); // ----------------------------------------------------- // identify SHAKEable bonds @@ -921,35 +1045,41 @@ void FixShakeCuda::find_clusters() int np; - for (i = 0; i < nlocal; i++) { + for(i = 0; i < nlocal; i++) { nshake[i] = 0; np = npartner[i]; - for (j = 0; j < np; j++) { + + for(j = 0; j < np; j++) { partner_shake[i][j] = 0; - if (!(mask[i] & groupbit)) continue; - if (!(partner_mask[i][j] & groupbit)) continue; - if (partner_bondtype[i][j] <= 0) continue; + if(!(mask[i] & groupbit)) continue; - if (bond_flag[partner_bondtype[i][j]]) { + if(!(partner_mask[i][j] & groupbit)) continue; + + if(partner_bondtype[i][j] <= 0) continue; + + if(bond_flag[partner_bondtype[i][j]]) { partner_shake[i][j] = 1; nshake[i]++; continue; } - if (type_flag[type[i]] || type_flag[partner_type[i][j]]) { + + if(type_flag[type[i]] || type_flag[partner_type[i][j]]) { partner_shake[i][j] = 1; nshake[i]++; continue; } - if (nmass) { - if (partner_massflag[i][j]) { + + if(nmass) { + if(partner_massflag[i][j]) { partner_shake[i][j] = 1; nshake[i]++; continue; } else { - if (rmass) massone = rmass[i]; + if(rmass) massone = rmass[i]; else massone = mass[type[i]]; - if (masscheck(massone)) { + + if(masscheck(massone)) { partner_shake[i][j] = 1; nshake[i]++; continue; @@ -970,15 +1100,17 @@ void FixShakeCuda::find_clusters() // nbufmax = largest buffer needed to hold info from any proc nbuf = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { + + for(i = 0; i < nlocal; i++) { + for(j = 0; j < npartner[i]; j++) { m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m]; + + if(m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m]; else nbuf += 3; } } - MPI_Allreduce(&nbuf,&nbufmax,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf, &nbufmax, 1, MPI_INT, MPI_MAX, world); buf = new int[nbufmax]; bufcopy = new int[nbufmax]; @@ -986,12 +1118,14 @@ void FixShakeCuda::find_clusters() // fill buffer with info size = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { + + for(i = 0; i < nlocal; i++) { + for(j = 0; j < npartner[i]; j++) { m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { + + if(m < 0 || m >= nlocal) { buf[size] = tag[i]; - buf[size+1] = partner_tag[i][j]; + buf[size + 1] = partner_tag[i][j]; size += 3; } } @@ -1002,30 +1136,39 @@ void FixShakeCuda::find_clusters() // if I own partner, fill in nshake value messtag = 2; - for (loop = 0; loop < nprocs; loop++) { + + for(loop = 0; loop < nprocs; loop++) { i = 0; - while (i < size) { - m = atom->map(buf[i+1]); - if (m >= 0 && m < nlocal) buf[i+2] = nshake[m]; + + while(i < size) { + m = atom->map(buf[i + 1]); + + if(m >= 0 && m < nlocal) buf[i + 2] = nshake[m]; + i += 3; } - if (me != next) { - MPI_Irecv(bufcopy,nbufmax,MPI_INT,prev,messtag,world,&request); - MPI_Send(buf,size,MPI_INT,next,messtag,world); - MPI_Wait(&request,&status); - MPI_Get_count(&status,MPI_INT,&size); - for (j = 0; j < size; j++) buf[j] = bufcopy[j]; + + if(me != next) { + MPI_Irecv(bufcopy, nbufmax, MPI_INT, prev, messtag, world, &request); + MPI_Send(buf, size, MPI_INT, next, messtag, world); + MPI_Wait(&request, &status); + MPI_Get_count(&status, MPI_INT, &size); + + for(j = 0; j < size; j++) buf[j] = bufcopy[j]; } } // store partner info returned to me m = 0; - while (m < size) { + + while(m < size) { i = atom->map(buf[m]); - for (j = 0; j < npartner[i]; j++) - if (buf[m+1] == partner_tag[i][j]) break; - partner_nshake[i][j] = buf[m+2]; + + for(j = 0; j < npartner[i]; j++) + if(buf[m + 1] == partner_tag[i][j]) break; + + partner_nshake[i][j] = buf[m + 2]; m += 3; } @@ -1039,18 +1182,25 @@ void FixShakeCuda::find_clusters() // ----------------------------------------------------- flag = 0; - for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag = 1; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR,"Shake cluster of more than 4 atoms"); + + for(i = 0; i < nlocal; i++) if(nshake[i] > 3) flag = 1; + + MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_SUM, world); + + if(flag_all) error->all(FLERR, "Shake cluster of more than 4 atoms"); flag = 0; - for (i = 0; i < nlocal; i++) { - if (nshake[i] <= 1) continue; - for (j = 0; j < npartner[i]; j++) - if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1; + + for(i = 0; i < nlocal; i++) { + if(nshake[i] <= 1) continue; + + for(j = 0; j < npartner[i]; j++) + if(partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1; } - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR,"Shake clusters are connected"); + + MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_SUM, world); + + if(flag_all) error->all(FLERR, "Shake clusters are connected"); // ----------------------------------------------------- // set SHAKE arrays that are stored with atoms & add angle constraints @@ -1070,7 +1220,7 @@ void FixShakeCuda::find_clusters() // for 3-atom angle cluster, 3rd value is angletype // ----------------------------------------------------- - for (i = 0; i < nlocal; i++) { + for(i = 0; i < nlocal; i++) { shake_flag[i] = 0; shake_atom[i][0] = 0; shake_atom[i][1] = 0; @@ -1080,10 +1230,11 @@ void FixShakeCuda::find_clusters() shake_type[i][1] = 0; shake_type[i][2] = 0; - if (nshake[i] == 1) { - for (j = 0; j < npartner[i]; j++) - if (partner_shake[i][j]) break; - if (partner_nshake[i][j] == 1 && tag[i] < partner_tag[i][j]) { + if(nshake[i] == 1) { + for(j = 0; j < npartner[i]; j++) + if(partner_shake[i][j]) break; + + if(partner_nshake[i][j] == 1 && tag[i] < partner_tag[i][j]) { shake_flag[i] = 2; shake_atom[i][0] = tag[i]; shake_atom[i][1] = partner_tag[i][j]; @@ -1091,23 +1242,27 @@ void FixShakeCuda::find_clusters() } } - if (nshake[i] > 1) { + if(nshake[i] > 1) { shake_flag[i] = 1; shake_atom[i][0] = tag[i]; - for (j = 0; j < npartner[i]; j++) - if (partner_shake[i][j]) { + + for(j = 0; j < npartner[i]; j++) + if(partner_shake[i][j]) { m = shake_flag[i]; shake_atom[i][m] = partner_tag[i][j]; - shake_type[i][m-1] = partner_bondtype[i][j]; + shake_type[i][m - 1] = partner_bondtype[i][j]; shake_flag[i]++; } } - if (nshake[i] == 2) { - n = anglefind(i,shake_atom[i][1],shake_atom[i][2]); - if (n < 0) continue; - if (angle_type[i][n] < 0) continue; - if (angle_flag[angle_type[i][n]]) { + if(nshake[i] == 2) { + n = anglefind(i, shake_atom[i][1], shake_atom[i][2]); + + if(n < 0) continue; + + if(angle_type[i][n] < 0) continue; + + if(angle_flag[angle_type[i][n]]) { shake_flag[i] = 1; shake_type[i][2] = angle_type[i][n]; } @@ -1125,12 +1280,16 @@ void FixShakeCuda::find_clusters() // nbufmax = largest buffer needed to hold info from any proc nbuf = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - for (j = 0; j < npartner[i]; j++) { - if (partner_shake[i][j] == 0) continue; + + for(i = 0; i < nlocal; i++) { + if(shake_flag[i] == 0) continue; + + for(j = 0; j < npartner[i]; j++) { + if(partner_shake[i][j] == 0) continue; + m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) { + + if(m >= 0 && m < nlocal) { shake_flag[m] = shake_flag[i]; shake_atom[m][0] = shake_atom[i][0]; shake_atom[m][1] = shake_atom[i][1]; @@ -1143,7 +1302,7 @@ void FixShakeCuda::find_clusters() } } - MPI_Allreduce(&nbuf,&nbufmax,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf, &nbufmax, 1, MPI_INT, MPI_MAX, world); buf = new int[nbufmax]; bufcopy = new int[nbufmax]; @@ -1151,21 +1310,25 @@ void FixShakeCuda::find_clusters() // fill buffer with info size = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - for (j = 0; j < npartner[i]; j++) { - if (partner_shake[i][j] == 0) continue; + + for(i = 0; i < nlocal; i++) { + if(shake_flag[i] == 0) continue; + + for(j = 0; j < npartner[i]; j++) { + if(partner_shake[i][j] == 0) continue; + m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { + + if(m < 0 || m >= nlocal) { buf[size] = partner_tag[i][j]; - buf[size+1] = shake_flag[i]; - buf[size+2] = shake_atom[i][0]; - buf[size+3] = shake_atom[i][1]; - buf[size+4] = shake_atom[i][2]; - buf[size+5] = shake_atom[i][3]; - buf[size+6] = shake_type[i][0]; - buf[size+7] = shake_type[i][1]; - buf[size+8] = shake_type[i][2]; + buf[size + 1] = shake_flag[i]; + buf[size + 2] = shake_atom[i][0]; + buf[size + 3] = shake_atom[i][1]; + buf[size + 4] = shake_atom[i][2]; + buf[size + 5] = shake_atom[i][3]; + buf[size + 6] = shake_type[i][0]; + buf[size + 7] = shake_type[i][1]; + buf[size + 8] = shake_type[i][2]; size += 9; } } @@ -1176,28 +1339,34 @@ void FixShakeCuda::find_clusters() // if I own ID, fill in shake array values messtag = 3; - for (loop = 0; loop < nprocs; loop++) { + + for(loop = 0; loop < nprocs; loop++) { i = 0; - while (i < size) { + + while(i < size) { m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) { - shake_flag[m] = buf[i+1]; - shake_atom[m][0] = buf[i+2]; - shake_atom[m][1] = buf[i+3]; - shake_atom[m][2] = buf[i+4]; - shake_atom[m][3] = buf[i+5]; - shake_type[m][0] = buf[i+6]; - shake_type[m][1] = buf[i+7]; - shake_type[m][2] = buf[i+8]; + + if(m >= 0 && m < nlocal) { + shake_flag[m] = buf[i + 1]; + shake_atom[m][0] = buf[i + 2]; + shake_atom[m][1] = buf[i + 3]; + shake_atom[m][2] = buf[i + 4]; + shake_atom[m][3] = buf[i + 5]; + shake_type[m][0] = buf[i + 6]; + shake_type[m][1] = buf[i + 7]; + shake_type[m][2] = buf[i + 8]; } + i += 9; } - if (me != next) { - MPI_Irecv(bufcopy,nbufmax,MPI_INT,prev,messtag,world,&request); - MPI_Send(buf,size,MPI_INT,next,messtag,world); - MPI_Wait(&request,&status); - MPI_Get_count(&status,MPI_INT,&size); - for (j = 0; j < size; j++) buf[j] = bufcopy[j]; + + if(me != next) { + MPI_Irecv(bufcopy, nbufmax, MPI_INT, prev, messtag, world, &request); + MPI_Send(buf, size, MPI_INT, next, messtag, world); + MPI_Wait(&request, &status); + MPI_Get_count(&status, MPI_INT, &size); + + for(j = 0; j < size; j++) buf[j] = bufcopy[j]; } } @@ -1223,30 +1392,44 @@ void FixShakeCuda::find_clusters() // must set for all SHAKE bonds and angles stored by each atom // ----------------------------------------------------- - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - else if (shake_flag[i] == 1) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][2]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = anglefind(i,shake_atom[i][1],shake_atom[i][2]); - if (n >= 0) angle_type[i][n] = -angle_type[i][n]; - } else if (shake_flag[i] == 2) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if (shake_flag[i] == 3) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][2]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if (shake_flag[i] == 4) { - n = bondfind(i,shake_atom[i][0],shake_atom[i][1]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][2]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; - n = bondfind(i,shake_atom[i][0],shake_atom[i][3]); - if (n >= 0) bond_type[i][n] = -bond_type[i][n]; + for(i = 0; i < nlocal; i++) { + if(shake_flag[i] == 0) continue; + else if(shake_flag[i] == 1) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = anglefind(i, shake_atom[i][1], shake_atom[i][2]); + + if(n >= 0) angle_type[i][n] = -angle_type[i][n]; + } else if(shake_flag[i] == 2) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + } else if(shake_flag[i] == 3) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + } else if(shake_flag[i] == 4) { + n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; + + n = bondfind(i, shake_atom[i][0], shake_atom[i][3]); + + if(n >= 0) bond_type[i][n] = -bond_type[i][n]; } } @@ -1254,65 +1437,83 @@ void FixShakeCuda::find_clusters() // print info on SHAKE clusters // ----------------------------------------------------- - int count1,count2,count3,count4; + int count1, count2, count3, count4; count1 = count2 = count3 = count4 = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 1) count1++; - else if (shake_flag[i] == 2) count2++; - else if (shake_flag[i] == 3) count3++; - else if (shake_flag[i] == 4) count4++; + + for(i = 0; i < nlocal; i++) { + if(shake_flag[i] == 1) count1++; + else if(shake_flag[i] == 2) count2++; + else if(shake_flag[i] == 3) count3++; + else if(shake_flag[i] == 4) count4++; } - for(int i=0;iupload(); cu_shake_atom->upload(); cu_shake_type->upload(); - Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq, - cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(), - max_iter,tolerance); + Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, + cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), + cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), + max_iter, tolerance); } void FixShakeCuda::swap_clusters(int i, int j) { - int tmp; - tmp = shake_flag[i]; shake_flag[i] = shake_flag[j]; shake_flag[j] = tmp; - tmp = shake_atom[i][0]; shake_atom[i][0] = shake_atom[j][0]; shake_atom[j][0] = tmp; - tmp = shake_atom[i][1]; shake_atom[i][1] = shake_atom[j][1]; shake_atom[j][1] = tmp; - tmp = shake_atom[i][2]; shake_atom[i][2] = shake_atom[j][2]; shake_atom[j][2] = tmp; - tmp = shake_atom[i][3]; shake_atom[i][3] = shake_atom[j][3]; shake_atom[j][3] = tmp; - tmp = shake_type[i][0]; shake_type[i][0] = shake_type[j][0]; shake_type[j][0] = tmp; - tmp = shake_type[i][1]; shake_type[i][1] = shake_type[j][1]; shake_type[j][1] = tmp; - tmp = shake_type[i][2]; shake_type[i][2] = shake_type[j][2]; shake_type[j][2] = tmp; + int tmp; + tmp = shake_flag[i]; + shake_flag[i] = shake_flag[j]; + shake_flag[j] = tmp; + tmp = shake_atom[i][0]; + shake_atom[i][0] = shake_atom[j][0]; + shake_atom[j][0] = tmp; + tmp = shake_atom[i][1]; + shake_atom[i][1] = shake_atom[j][1]; + shake_atom[j][1] = tmp; + tmp = shake_atom[i][2]; + shake_atom[i][2] = shake_atom[j][2]; + shake_atom[j][2] = tmp; + tmp = shake_atom[i][3]; + shake_atom[i][3] = shake_atom[j][3]; + shake_atom[j][3] = tmp; + tmp = shake_type[i][0]; + shake_type[i][0] = shake_type[j][0]; + shake_type[j][0] = tmp; + tmp = shake_type[i][1]; + shake_type[i][1] = shake_type[j][1]; + shake_type[j][1] = tmp; + tmp = shake_type[i][2]; + shake_type[i][2] = shake_type[j][2]; + shake_type[j][2] = tmp; } /* ---------------------------------------------------------------------- @@ -1322,8 +1523,9 @@ void FixShakeCuda::swap_clusters(int i, int j) int FixShakeCuda::masscheck(double massone) { - for (int i = 0; i < nmass; i++) - if (fabs(mass_list[i]-massone) <= MASSDELTA) return 1; + for(int i = 0; i < nmass; i++) + if(fabs(mass_list[i] - massone) <= MASSDELTA) return 1; + return 0; } @@ -1335,33 +1537,33 @@ int FixShakeCuda::masscheck(double massone) void FixShakeCuda::unconstrained_update() { - if(cuda->finished_setup) - { + if(cuda->finished_setup) { Cuda_FixShakeCuda_UnconstrainedUpdate(&cuda->shared_data); return; } double dtfmsq; - if (rmass) { - for (int i = 0; i < nlocal; i++) { - if (shake_flag[i]) { + if(rmass) { + for(int i = 0; i < nlocal; i++) { + if(shake_flag[i]) { dtfmsq = dtfsq / rmass[i]; - xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0]; - xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1]; - xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2]; + xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; + xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; + xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; } } else { - for (int i = 0; i < nlocal; i++) { - if (shake_flag[i]) { + for(int i = 0; i < nlocal; i++) { + if(shake_flag[i]) { dtfmsq = dtfsq / mass[type[i]]; - xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0]; - xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1]; - xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2]; + xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; + xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; + xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; } } + cu_xshake->upload(); } @@ -1369,9 +1571,9 @@ void FixShakeCuda::unconstrained_update() void FixShakeCuda::shake2(int m) { - int nlist,list[2]; + int nlist, list[2]; double v[6]; - double invmass0,invmass1; + double invmass0, invmass1; // local atom IDs and constraint distances @@ -1397,69 +1599,72 @@ void FixShakeCuda::shake2(int m) // scalar distances between atoms - double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; - double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; + double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; + double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; // a,b,c = coeffs in quadratic equation for lamda - if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; + if(rmass) { + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; } - double a = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; - double b = 2.0 * (invmass0+invmass1) * - (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); - double c = s01sq - bond1*bond1; + double a = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; + double b = 2.0 * (invmass0 + invmass1) * + (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); + double c = s01sq - bond1 * bond1; // error check - double determ = b*b - 4.0*a*c; - if (determ < 0.0) { - error->warning(FLERR,"Shake determinant < 0.0"); + double determ = b * b - 4.0 * a * c; + + if(determ < 0.0) { + error->warning(FLERR, "Shake determinant < 0.0"); determ = 0.0; } // exact quadratic solution for lamda - double lamda,lamda1,lamda2; - lamda1 = (-b+sqrt(determ)) / (2.0*a); - lamda2 = (-b-sqrt(determ)) / (2.0*a); + double lamda, lamda1, lamda2; + lamda1 = (-b + sqrt(determ)) / (2.0 * a); + lamda2 = (-b - sqrt(determ)) / (2.0 * a); - if (fabs(lamda1) <= fabs(lamda2)) lamda = lamda1; + if(fabs(lamda1) <= fabs(lamda2)) lamda = lamda1; else lamda = lamda2; // update forces if atom is owned by this processor lamda /= dtfsq; - if (i0 < nlocal) { - f[i0][0] += lamda*r01[0]; - f[i0][1] += lamda*r01[1]; - f[i0][2] += lamda*r01[2]; + if(i0 < nlocal) { + f[i0][0] += lamda * r01[0]; + f[i0][1] += lamda * r01[1]; + f[i0][2] += lamda * r01[2]; } - if (i1 < nlocal) { - f[i1][0] -= lamda*r01[0]; - f[i1][1] -= lamda*r01[1]; - f[i1][2] -= lamda*r01[2]; + if(i1 < nlocal) { + f[i1][0] -= lamda * r01[0]; + f[i1][1] -= lamda * r01[1]; + f[i1][2] -= lamda * r01[2]; } - if (evflag) { + if(evflag) { nlist = 0; - if (i0 < nlocal) list[nlist++] = i0; - if (i1 < nlocal) list[nlist++] = i1; - v[0] = lamda*r01[0]*r01[0]; - v[1] = lamda*r01[1]*r01[1]; - v[2] = lamda*r01[2]*r01[2]; - v[3] = lamda*r01[0]*r01[1]; - v[4] = lamda*r01[0]*r01[2]; - v[5] = lamda*r01[1]*r01[2]; + if(i0 < nlocal) list[nlist++] = i0; - v_tally(nlist,list,2.0,v); + if(i1 < nlocal) list[nlist++] = i1; + + v[0] = lamda * r01[0] * r01[0]; + v[1] = lamda * r01[1] * r01[1]; + v[2] = lamda * r01[2] * r01[2]; + v[3] = lamda * r01[0] * r01[1]; + v[4] = lamda * r01[0] * r01[2]; + v[5] = lamda * r01[1] * r01[2]; + + v_tally(nlist, list, 2.0, v); } } @@ -1467,9 +1672,9 @@ void FixShakeCuda::shake2(int m) void FixShakeCuda::shake3(int m) { - int nlist,list[3]; + int nlist, list[3]; double v[6]; - double invmass0,invmass1,invmass2; + double invmass0, invmass1, invmass2; // local atom IDs and constraint distances @@ -1509,54 +1714,56 @@ void FixShakeCuda::shake3(int m) // scalar distances between atoms - double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; - double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; - double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; - double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; + double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; + double r02sq = r02[0] * r02[0] + r02[1] * r02[1] + r02[2] * r02[2]; + double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; + double s02sq = s02[0] * s02[0] + s02[1] * s02[1] + s02[2] * s02[2]; // matrix coeffs and rhs for lamda equations - if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; - invmass2 = 1.0/rmass[i2]; + if(rmass) { + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; + invmass2 = 1.0 / rmass[i2]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; - invmass2 = 1.0/mass[type[i2]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; + invmass2 = 1.0 / mass[type[i2]]; } - double a11 = 2.0 * (invmass0+invmass1) * - (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); + double a11 = 2.0 * (invmass0 + invmass1) * + (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); double a12 = 2.0 * invmass0 * - (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); + (s01[0] * r02[0] + s01[1] * r02[1] + s01[2] * r02[2]); double a21 = 2.0 * invmass0 * - (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); - double a22 = 2.0 * (invmass0+invmass2) * - (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); + (s02[0] * r01[0] + s02[1] * r01[1] + s02[2] * r01[2]); + double a22 = 2.0 * (invmass0 + invmass2) * + (s02[0] * r02[0] + s02[1] * r02[1] + s02[2] * r02[2]); // inverse of matrix - double determ = a11*a22 - a12*a21; - if (determ == 0.0) error->one(FLERR,"Shake determinant = 0.0"); - double determinv = 1.0/determ; + double determ = a11 * a22 - a12 * a21; - double a11inv = a22*determinv; - double a12inv = -a12*determinv; - double a21inv = -a21*determinv; - double a22inv = a11*determinv; + if(determ == 0.0) error->one(FLERR, "Shake determinant = 0.0"); + + double determinv = 1.0 / determ; + + double a11inv = a22 * determinv; + double a12inv = -a12 * determinv; + double a21inv = -a21 * determinv; + double a22inv = a11 * determinv; // quadratic correction coeffs - double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); + double r0102 = (r01[0] * r02[0] + r01[1] * r02[1] + r01[2] * r02[2]); - double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; - double quad1_0202 = invmass0*invmass0 * r02sq; - double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; + double quad1_0101 = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; + double quad1_0202 = invmass0 * invmass0 * r02sq; + double quad1_0102 = 2.0 * (invmass0 + invmass1) * invmass0 * r0102; - double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; - double quad2_0101 = invmass0*invmass0 * r01sq; - double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; + double quad2_0202 = (invmass0 + invmass2) * (invmass0 + invmass2) * r02sq; + double quad2_0101 = invmass0 * invmass0 * r01sq; + double quad2_0102 = 2.0 * (invmass0 + invmass2) * invmass0 * r0102; // iterate until converged @@ -1565,23 +1772,25 @@ void FixShakeCuda::shake3(int m) int niter = 0; int done = 0; - double quad1,quad2,b1,b2,lamda01_new,lamda02_new; + double quad1, quad2, b1, b2, lamda01_new, lamda02_new; - while (!done && niter < max_iter) { - quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + - quad1_0102 * lamda01*lamda02; - quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 + - quad2_0102 * lamda01*lamda02; + while(!done && niter < max_iter) { + quad1 = quad1_0101 * lamda01 * lamda01 + quad1_0202 * lamda02 * lamda02 + + quad1_0102 * lamda01 * lamda02; + quad2 = quad2_0101 * lamda01 * lamda01 + quad2_0202 * lamda02 * lamda02 + + quad2_0102 * lamda01 * lamda02; - b1 = bond1*bond1 - s01sq - quad1; - b2 = bond2*bond2 - s02sq - quad2; + b1 = bond1 * bond1 - s01sq - quad1; + b2 = bond2 * bond2 - s02sq - quad2; - lamda01_new = a11inv*b1 + a12inv*b2; - lamda02_new = a21inv*b1 + a22inv*b2; + lamda01_new = a11inv * b1 + a12inv * b2; + lamda02_new = a21inv * b1 + a22inv * b2; done = 1; - if (fabs(lamda01_new-lamda01) > tolerance) done = 0; - if (fabs(lamda02_new-lamda02) > tolerance) done = 0; + + if(fabs(lamda01_new - lamda01) > tolerance) done = 0; + + if(fabs(lamda02_new - lamda02) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; @@ -1590,41 +1799,44 @@ void FixShakeCuda::shake3(int m) // update forces if atom is owned by this processor - lamda01 = lamda01/dtfsq; - lamda02 = lamda02/dtfsq; + lamda01 = lamda01 / dtfsq; + lamda02 = lamda02 / dtfsq; - if (i0 < nlocal) { - f[i0][0] += lamda01*r01[0] + lamda02*r02[0]; - f[i0][1] += lamda01*r01[1] + lamda02*r02[1]; - f[i0][2] += lamda01*r01[2] + lamda02*r02[2]; + if(i0 < nlocal) { + f[i0][0] += lamda01 * r01[0] + lamda02 * r02[0]; + f[i0][1] += lamda01 * r01[1] + lamda02 * r02[1]; + f[i0][2] += lamda01 * r01[2] + lamda02 * r02[2]; } - if (i1 < nlocal) { - f[i1][0] -= lamda01*r01[0]; - f[i1][1] -= lamda01*r01[1]; - f[i1][2] -= lamda01*r01[2]; + if(i1 < nlocal) { + f[i1][0] -= lamda01 * r01[0]; + f[i1][1] -= lamda01 * r01[1]; + f[i1][2] -= lamda01 * r01[2]; } - if (i2 < nlocal) { - f[i2][0] -= lamda02*r02[0]; - f[i2][1] -= lamda02*r02[1]; - f[i2][2] -= lamda02*r02[2]; + if(i2 < nlocal) { + f[i2][0] -= lamda02 * r02[0]; + f[i2][1] -= lamda02 * r02[1]; + f[i2][2] -= lamda02 * r02[2]; } - if (evflag) { + if(evflag) { nlist = 0; - if (i0 < nlocal) list[nlist++] = i0; - if (i1 < nlocal) list[nlist++] = i1; - if (i2 < nlocal) list[nlist++] = i2; - v[0] = lamda01*r01[0]*r01[0] + lamda02*r02[0]*r02[0]; - v[1] = lamda01*r01[1]*r01[1] + lamda02*r02[1]*r02[1]; - v[2] = lamda01*r01[2]*r01[2] + lamda02*r02[2]*r02[2]; - v[3] = lamda01*r01[0]*r01[1] + lamda02*r02[0]*r02[1]; - v[4] = lamda01*r01[0]*r01[2] + lamda02*r02[0]*r02[2]; - v[5] = lamda01*r01[1]*r01[2] + lamda02*r02[1]*r02[2]; + if(i0 < nlocal) list[nlist++] = i0; - v_tally(nlist,list,3.0,v); + if(i1 < nlocal) list[nlist++] = i1; + + if(i2 < nlocal) list[nlist++] = i2; + + v[0] = lamda01 * r01[0] * r01[0] + lamda02 * r02[0] * r02[0]; + v[1] = lamda01 * r01[1] * r01[1] + lamda02 * r02[1] * r02[1]; + v[2] = lamda01 * r01[2] * r01[2] + lamda02 * r02[2] * r02[2]; + v[3] = lamda01 * r01[0] * r01[1] + lamda02 * r02[0] * r02[1]; + v[4] = lamda01 * r01[0] * r01[2] + lamda02 * r02[0] * r02[2]; + v[5] = lamda01 * r01[1] * r01[2] + lamda02 * r02[1] * r02[2]; + + v_tally(nlist, list, 3.0, v); } } @@ -1632,9 +1844,9 @@ void FixShakeCuda::shake3(int m) void FixShakeCuda::shake4(int m) { - int nlist,list[4]; + int nlist, list[4]; double v[6]; - double invmass0,invmass1,invmass2,invmass3; + double invmass0, invmass1, invmass2, invmass3; // local atom IDs and constraint distances @@ -1688,89 +1900,91 @@ void FixShakeCuda::shake4(int m) // scalar distances between atoms - double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; - double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; - double r03sq = r03[0]*r03[0] + r03[1]*r03[1] + r03[2]*r03[2]; - double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; - double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; - double s03sq = s03[0]*s03[0] + s03[1]*s03[1] + s03[2]*s03[2]; + double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; + double r02sq = r02[0] * r02[0] + r02[1] * r02[1] + r02[2] * r02[2]; + double r03sq = r03[0] * r03[0] + r03[1] * r03[1] + r03[2] * r03[2]; + double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; + double s02sq = s02[0] * s02[0] + s02[1] * s02[1] + s02[2] * s02[2]; + double s03sq = s03[0] * s03[0] + s03[1] * s03[1] + s03[2] * s03[2]; // matrix coeffs and rhs for lamda equations - if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; - invmass2 = 1.0/rmass[i2]; - invmass3 = 1.0/rmass[i3]; + if(rmass) { + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; + invmass2 = 1.0 / rmass[i2]; + invmass3 = 1.0 / rmass[i3]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; - invmass2 = 1.0/mass[type[i2]]; - invmass3 = 1.0/mass[type[i3]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; + invmass2 = 1.0 / mass[type[i2]]; + invmass3 = 1.0 / mass[type[i3]]; } - double a11 = 2.0 * (invmass0+invmass1) * - (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); + double a11 = 2.0 * (invmass0 + invmass1) * + (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); double a12 = 2.0 * invmass0 * - (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); + (s01[0] * r02[0] + s01[1] * r02[1] + s01[2] * r02[2]); double a13 = 2.0 * invmass0 * - (s01[0]*r03[0] + s01[1]*r03[1] + s01[2]*r03[2]); + (s01[0] * r03[0] + s01[1] * r03[1] + s01[2] * r03[2]); double a21 = 2.0 * invmass0 * - (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); - double a22 = 2.0 * (invmass0+invmass2) * - (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); + (s02[0] * r01[0] + s02[1] * r01[1] + s02[2] * r01[2]); + double a22 = 2.0 * (invmass0 + invmass2) * + (s02[0] * r02[0] + s02[1] * r02[1] + s02[2] * r02[2]); double a23 = 2.0 * invmass0 * - (s02[0]*r03[0] + s02[1]*r03[1] + s02[2]*r03[2]); + (s02[0] * r03[0] + s02[1] * r03[1] + s02[2] * r03[2]); double a31 = 2.0 * invmass0 * - (s03[0]*r01[0] + s03[1]*r01[1] + s03[2]*r01[2]); + (s03[0] * r01[0] + s03[1] * r01[1] + s03[2] * r01[2]); double a32 = 2.0 * invmass0 * - (s03[0]*r02[0] + s03[1]*r02[1] + s03[2]*r02[2]); - double a33 = 2.0 * (invmass0+invmass3) * - (s03[0]*r03[0] + s03[1]*r03[1] + s03[2]*r03[2]); + (s03[0] * r02[0] + s03[1] * r02[1] + s03[2] * r02[2]); + double a33 = 2.0 * (invmass0 + invmass3) * + (s03[0] * r03[0] + s03[1] * r03[1] + s03[2] * r03[2]); // inverse of matrix; - double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; - if (determ == 0.0) error->one(FLERR,"Shake determinant = 0.0"); - double determinv = 1.0/determ; + double determ = a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - + a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31; - double a11inv = determinv * (a22*a33 - a23*a32); - double a12inv = -determinv * (a12*a33 - a13*a32); - double a13inv = determinv * (a12*a23 - a13*a22); - double a21inv = -determinv * (a21*a33 - a23*a31); - double a22inv = determinv * (a11*a33 - a13*a31); - double a23inv = -determinv * (a11*a23 - a13*a21); - double a31inv = determinv * (a21*a32 - a22*a31); - double a32inv = -determinv * (a11*a32 - a12*a31); - double a33inv = determinv * (a11*a22 - a12*a21); + if(determ == 0.0) error->one(FLERR, "Shake determinant = 0.0"); + + double determinv = 1.0 / determ; + + double a11inv = determinv * (a22 * a33 - a23 * a32); + double a12inv = -determinv * (a12 * a33 - a13 * a32); + double a13inv = determinv * (a12 * a23 - a13 * a22); + double a21inv = -determinv * (a21 * a33 - a23 * a31); + double a22inv = determinv * (a11 * a33 - a13 * a31); + double a23inv = -determinv * (a11 * a23 - a13 * a21); + double a31inv = determinv * (a21 * a32 - a22 * a31); + double a32inv = -determinv * (a11 * a32 - a12 * a31); + double a33inv = determinv * (a11 * a22 - a12 * a21); // quadratic correction coeffs - double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); - double r0103 = (r01[0]*r03[0] + r01[1]*r03[1] + r01[2]*r03[2]); - double r0203 = (r02[0]*r03[0] + r02[1]*r03[1] + r02[2]*r03[2]); + double r0102 = (r01[0] * r02[0] + r01[1] * r02[1] + r01[2] * r02[2]); + double r0103 = (r01[0] * r03[0] + r01[1] * r03[1] + r01[2] * r03[2]); + double r0203 = (r02[0] * r03[0] + r02[1] * r03[1] + r02[2] * r03[2]); - double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; - double quad1_0202 = invmass0*invmass0 * r02sq; - double quad1_0303 = invmass0*invmass0 * r03sq; - double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; - double quad1_0103 = 2.0 * (invmass0+invmass1)*invmass0 * r0103; - double quad1_0203 = 2.0 * invmass0*invmass0 * r0203; + double quad1_0101 = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; + double quad1_0202 = invmass0 * invmass0 * r02sq; + double quad1_0303 = invmass0 * invmass0 * r03sq; + double quad1_0102 = 2.0 * (invmass0 + invmass1) * invmass0 * r0102; + double quad1_0103 = 2.0 * (invmass0 + invmass1) * invmass0 * r0103; + double quad1_0203 = 2.0 * invmass0 * invmass0 * r0203; - double quad2_0101 = invmass0*invmass0 * r01sq; - double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; - double quad2_0303 = invmass0*invmass0 * r03sq; - double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; - double quad2_0103 = 2.0 * invmass0*invmass0 * r0103; - double quad2_0203 = 2.0 * (invmass0+invmass2)*invmass0 * r0203; + double quad2_0101 = invmass0 * invmass0 * r01sq; + double quad2_0202 = (invmass0 + invmass2) * (invmass0 + invmass2) * r02sq; + double quad2_0303 = invmass0 * invmass0 * r03sq; + double quad2_0102 = 2.0 * (invmass0 + invmass2) * invmass0 * r0102; + double quad2_0103 = 2.0 * invmass0 * invmass0 * r0103; + double quad2_0203 = 2.0 * (invmass0 + invmass2) * invmass0 * r0203; - double quad3_0101 = invmass0*invmass0 * r01sq; - double quad3_0202 = invmass0*invmass0 * r02sq; - double quad3_0303 = (invmass0+invmass3)*(invmass0+invmass3) * r03sq; - double quad3_0102 = 2.0 * invmass0*invmass0 * r0102; - double quad3_0103 = 2.0 * (invmass0+invmass3)*invmass0 * r0103; - double quad3_0203 = 2.0 * (invmass0+invmass3)*invmass0 * r0203; + double quad3_0101 = invmass0 * invmass0 * r01sq; + double quad3_0202 = invmass0 * invmass0 * r02sq; + double quad3_0303 = (invmass0 + invmass3) * (invmass0 + invmass3) * r03sq; + double quad3_0102 = 2.0 * invmass0 * invmass0 * r0102; + double quad3_0103 = 2.0 * (invmass0 + invmass3) * invmass0 * r0103; + double quad3_0203 = 2.0 * (invmass0 + invmass3) * invmass0 * r0203; // iterate until converged @@ -1780,42 +1994,45 @@ void FixShakeCuda::shake4(int m) int niter = 0; int done = 0; - double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda03_new; + double quad1, quad2, quad3, b1, b2, b3, lamda01_new, lamda02_new, lamda03_new; - while (!done && niter < max_iter) { - quad1 = quad1_0101 * lamda01*lamda01 + - quad1_0202 * lamda02*lamda02 + - quad1_0303 * lamda03*lamda03 + - quad1_0102 * lamda01*lamda02 + - quad1_0103 * lamda01*lamda03 + - quad1_0203 * lamda02*lamda03; + while(!done && niter < max_iter) { + quad1 = quad1_0101 * lamda01 * lamda01 + + quad1_0202 * lamda02 * lamda02 + + quad1_0303 * lamda03 * lamda03 + + quad1_0102 * lamda01 * lamda02 + + quad1_0103 * lamda01 * lamda03 + + quad1_0203 * lamda02 * lamda03; - quad2 = quad2_0101 * lamda01*lamda01 + - quad2_0202 * lamda02*lamda02 + - quad2_0303 * lamda03*lamda03 + - quad2_0102 * lamda01*lamda02 + - quad2_0103 * lamda01*lamda03 + - quad2_0203 * lamda02*lamda03; + quad2 = quad2_0101 * lamda01 * lamda01 + + quad2_0202 * lamda02 * lamda02 + + quad2_0303 * lamda03 * lamda03 + + quad2_0102 * lamda01 * lamda02 + + quad2_0103 * lamda01 * lamda03 + + quad2_0203 * lamda02 * lamda03; - quad3 = quad3_0101 * lamda01*lamda01 + - quad3_0202 * lamda02*lamda02 + - quad3_0303 * lamda03*lamda03 + - quad3_0102 * lamda01*lamda02 + - quad3_0103 * lamda01*lamda03 + - quad3_0203 * lamda02*lamda03; + quad3 = quad3_0101 * lamda01 * lamda01 + + quad3_0202 * lamda02 * lamda02 + + quad3_0303 * lamda03 * lamda03 + + quad3_0102 * lamda01 * lamda02 + + quad3_0103 * lamda01 * lamda03 + + quad3_0203 * lamda02 * lamda03; - b1 = bond1*bond1 - s01sq - quad1; - b2 = bond2*bond2 - s02sq - quad2; - b3 = bond3*bond3 - s03sq - quad3; + b1 = bond1 * bond1 - s01sq - quad1; + b2 = bond2 * bond2 - s02sq - quad2; + b3 = bond3 * bond3 - s03sq - quad3; - lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3; - lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3; - lamda03_new = a31inv*b1 + a32inv*b2 + a33inv*b3; + lamda01_new = a11inv * b1 + a12inv * b2 + a13inv * b3; + lamda02_new = a21inv * b1 + a22inv * b2 + a23inv * b3; + lamda03_new = a31inv * b1 + a32inv * b2 + a33inv * b3; done = 1; - if (fabs(lamda01_new-lamda01) > tolerance) done = 0; - if (fabs(lamda02_new-lamda02) > tolerance) done = 0; - if (fabs(lamda03_new-lamda03) > tolerance) done = 0; + + if(fabs(lamda01_new - lamda01) > tolerance) done = 0; + + if(fabs(lamda02_new - lamda02) > tolerance) done = 0; + + if(fabs(lamda03_new - lamda03) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; @@ -1825,50 +2042,54 @@ void FixShakeCuda::shake4(int m) // update forces if atom is owned by this processor - lamda01 = lamda01/dtfsq; - lamda02 = lamda02/dtfsq; - lamda03 = lamda03/dtfsq; + lamda01 = lamda01 / dtfsq; + lamda02 = lamda02 / dtfsq; + lamda03 = lamda03 / dtfsq; - if (i0 < nlocal) { - f[i0][0] += lamda01*r01[0] + lamda02*r02[0] + lamda03*r03[0]; - f[i0][1] += lamda01*r01[1] + lamda02*r02[1] + lamda03*r03[1]; - f[i0][2] += lamda01*r01[2] + lamda02*r02[2] + lamda03*r03[2]; + if(i0 < nlocal) { + f[i0][0] += lamda01 * r01[0] + lamda02 * r02[0] + lamda03 * r03[0]; + f[i0][1] += lamda01 * r01[1] + lamda02 * r02[1] + lamda03 * r03[1]; + f[i0][2] += lamda01 * r01[2] + lamda02 * r02[2] + lamda03 * r03[2]; } - if (i1 < nlocal) { - f[i1][0] -= lamda01*r01[0]; - f[i1][1] -= lamda01*r01[1]; - f[i1][2] -= lamda01*r01[2]; + if(i1 < nlocal) { + f[i1][0] -= lamda01 * r01[0]; + f[i1][1] -= lamda01 * r01[1]; + f[i1][2] -= lamda01 * r01[2]; } - if (i2 < nlocal) { - f[i2][0] -= lamda02*r02[0]; - f[i2][1] -= lamda02*r02[1]; - f[i2][2] -= lamda02*r02[2]; + if(i2 < nlocal) { + f[i2][0] -= lamda02 * r02[0]; + f[i2][1] -= lamda02 * r02[1]; + f[i2][2] -= lamda02 * r02[2]; } - if (i3 < nlocal) { - f[i3][0] -= lamda03*r03[0]; - f[i3][1] -= lamda03*r03[1]; - f[i3][2] -= lamda03*r03[2]; + if(i3 < nlocal) { + f[i3][0] -= lamda03 * r03[0]; + f[i3][1] -= lamda03 * r03[1]; + f[i3][2] -= lamda03 * r03[2]; } - if (evflag) { + if(evflag) { nlist = 0; - if (i0 < nlocal) list[nlist++] = i0; - if (i1 < nlocal) list[nlist++] = i1; - if (i2 < nlocal) list[nlist++] = i2; - if (i3 < nlocal) list[nlist++] = i3; - v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda03*r03[0]*r03[0]; - v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda03*r03[1]*r03[1]; - v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda03*r03[2]*r03[2]; - v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda03*r03[0]*r03[1]; - v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda03*r03[0]*r03[2]; - v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda03*r03[1]*r03[2]; -//if(i0==7271) printf("%lf %lf %lf %lf %lf %lf\n",v[0],v[1],v[2],v[3],v[4],v[5]); + if(i0 < nlocal) list[nlist++] = i0; - v_tally(nlist,list,4.0,v); + if(i1 < nlocal) list[nlist++] = i1; + + if(i2 < nlocal) list[nlist++] = i2; + + if(i3 < nlocal) list[nlist++] = i3; + + v[0] = lamda01 * r01[0] * r01[0] + lamda02 * r02[0] * r02[0] + lamda03 * r03[0] * r03[0]; + v[1] = lamda01 * r01[1] * r01[1] + lamda02 * r02[1] * r02[1] + lamda03 * r03[1] * r03[1]; + v[2] = lamda01 * r01[2] * r01[2] + lamda02 * r02[2] * r02[2] + lamda03 * r03[2] * r03[2]; + v[3] = lamda01 * r01[0] * r01[1] + lamda02 * r02[0] * r02[1] + lamda03 * r03[0] * r03[1]; + v[4] = lamda01 * r01[0] * r01[2] + lamda02 * r02[0] * r02[2] + lamda03 * r03[0] * r03[2]; + v[5] = lamda01 * r01[1] * r01[2] + lamda02 * r02[1] * r02[2] + lamda03 * r03[1] * r03[2]; + //if(i0==7271) printf("%lf %lf %lf %lf %lf %lf\n",v[0],v[1],v[2],v[3],v[4],v[5]); + + v_tally(nlist, list, 4.0, v); } } @@ -1876,9 +2097,9 @@ void FixShakeCuda::shake4(int m) void FixShakeCuda::shake3angle(int m) { - int nlist,list[3]; + int nlist, list[3]; double v[6]; - double invmass0,invmass1,invmass2; + double invmass0, invmass1, invmass2; // local atom IDs and constraint distances @@ -1931,87 +2152,89 @@ void FixShakeCuda::shake3angle(int m) // scalar distances between atoms - double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; - double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; - double r12sq = r12[0]*r12[0] + r12[1]*r12[1] + r12[2]*r12[2]; - double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; - double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; - double s12sq = s12[0]*s12[0] + s12[1]*s12[1] + s12[2]*s12[2]; + double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; + double r02sq = r02[0] * r02[0] + r02[1] * r02[1] + r02[2] * r02[2]; + double r12sq = r12[0] * r12[0] + r12[1] * r12[1] + r12[2] * r12[2]; + double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; + double s02sq = s02[0] * s02[0] + s02[1] * s02[1] + s02[2] * s02[2]; + double s12sq = s12[0] * s12[0] + s12[1] * s12[1] + s12[2] * s12[2]; // matrix coeffs and rhs for lamda equations - if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; - invmass2 = 1.0/rmass[i2]; + if(rmass) { + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; + invmass2 = 1.0 / rmass[i2]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; - invmass2 = 1.0/mass[type[i2]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; + invmass2 = 1.0 / mass[type[i2]]; } - double a11 = 2.0 * (invmass0+invmass1) * - (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); + double a11 = 2.0 * (invmass0 + invmass1) * + (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); double a12 = 2.0 * invmass0 * - (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); + (s01[0] * r02[0] + s01[1] * r02[1] + s01[2] * r02[2]); double a13 = - 2.0 * invmass1 * - (s01[0]*r12[0] + s01[1]*r12[1] + s01[2]*r12[2]); + (s01[0] * r12[0] + s01[1] * r12[1] + s01[2] * r12[2]); double a21 = 2.0 * invmass0 * - (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); - double a22 = 2.0 * (invmass0+invmass2) * - (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); + (s02[0] * r01[0] + s02[1] * r01[1] + s02[2] * r01[2]); + double a22 = 2.0 * (invmass0 + invmass2) * + (s02[0] * r02[0] + s02[1] * r02[1] + s02[2] * r02[2]); double a23 = 2.0 * invmass2 * - (s02[0]*r12[0] + s02[1]*r12[1] + s02[2]*r12[2]); + (s02[0] * r12[0] + s02[1] * r12[1] + s02[2] * r12[2]); double a31 = - 2.0 * invmass1 * - (s12[0]*r01[0] + s12[1]*r01[1] + s12[2]*r01[2]); + (s12[0] * r01[0] + s12[1] * r01[1] + s12[2] * r01[2]); double a32 = 2.0 * invmass2 * - (s12[0]*r02[0] + s12[1]*r02[1] + s12[2]*r02[2]); - double a33 = 2.0 * (invmass1+invmass2) * - (s12[0]*r12[0] + s12[1]*r12[1] + s12[2]*r12[2]); + (s12[0] * r02[0] + s12[1] * r02[1] + s12[2] * r02[2]); + double a33 = 2.0 * (invmass1 + invmass2) * + (s12[0] * r12[0] + s12[1] * r12[1] + s12[2] * r12[2]); // inverse of matrix - double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; - if (determ == 0.0) error->one(FLERR,"Shake determinant = 0.0"); - double determinv = 1.0/determ; + double determ = a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - + a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31; - double a11inv = determinv * (a22*a33 - a23*a32); - double a12inv = -determinv * (a12*a33 - a13*a32); - double a13inv = determinv * (a12*a23 - a13*a22); - double a21inv = -determinv * (a21*a33 - a23*a31); - double a22inv = determinv * (a11*a33 - a13*a31); - double a23inv = -determinv * (a11*a23 - a13*a21); - double a31inv = determinv * (a21*a32 - a22*a31); - double a32inv = -determinv * (a11*a32 - a12*a31); - double a33inv = determinv * (a11*a22 - a12*a21); + if(determ == 0.0) error->one(FLERR, "Shake determinant = 0.0"); + + double determinv = 1.0 / determ; + + double a11inv = determinv * (a22 * a33 - a23 * a32); + double a12inv = -determinv * (a12 * a33 - a13 * a32); + double a13inv = determinv * (a12 * a23 - a13 * a22); + double a21inv = -determinv * (a21 * a33 - a23 * a31); + double a22inv = determinv * (a11 * a33 - a13 * a31); + double a23inv = -determinv * (a11 * a23 - a13 * a21); + double a31inv = determinv * (a21 * a32 - a22 * a31); + double a32inv = -determinv * (a11 * a32 - a12 * a31); + double a33inv = determinv * (a11 * a22 - a12 * a21); // quadratic correction coeffs - double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); - double r0112 = (r01[0]*r12[0] + r01[1]*r12[1] + r01[2]*r12[2]); - double r0212 = (r02[0]*r12[0] + r02[1]*r12[1] + r02[2]*r12[2]); + double r0102 = (r01[0] * r02[0] + r01[1] * r02[1] + r01[2] * r02[2]); + double r0112 = (r01[0] * r12[0] + r01[1] * r12[1] + r01[2] * r12[2]); + double r0212 = (r02[0] * r12[0] + r02[1] * r12[1] + r02[2] * r12[2]); - double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; - double quad1_0202 = invmass0*invmass0 * r02sq; - double quad1_1212 = invmass1*invmass1 * r12sq; - double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; - double quad1_0112 = - 2.0 * (invmass0+invmass1)*invmass1 * r0112; - double quad1_0212 = - 2.0 * invmass0*invmass1 * r0212; + double quad1_0101 = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; + double quad1_0202 = invmass0 * invmass0 * r02sq; + double quad1_1212 = invmass1 * invmass1 * r12sq; + double quad1_0102 = 2.0 * (invmass0 + invmass1) * invmass0 * r0102; + double quad1_0112 = - 2.0 * (invmass0 + invmass1) * invmass1 * r0112; + double quad1_0212 = - 2.0 * invmass0 * invmass1 * r0212; - double quad2_0101 = invmass0*invmass0 * r01sq; - double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; - double quad2_1212 = invmass2*invmass2 * r12sq; - double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; - double quad2_0112 = 2.0 * invmass0*invmass2 * r0112; - double quad2_0212 = 2.0 * (invmass0+invmass2)*invmass2 * r0212; + double quad2_0101 = invmass0 * invmass0 * r01sq; + double quad2_0202 = (invmass0 + invmass2) * (invmass0 + invmass2) * r02sq; + double quad2_1212 = invmass2 * invmass2 * r12sq; + double quad2_0102 = 2.0 * (invmass0 + invmass2) * invmass0 * r0102; + double quad2_0112 = 2.0 * invmass0 * invmass2 * r0112; + double quad2_0212 = 2.0 * (invmass0 + invmass2) * invmass2 * r0212; - double quad3_0101 = invmass1*invmass1 * r01sq; - double quad3_0202 = invmass2*invmass2 * r02sq; - double quad3_1212 = (invmass1+invmass2)*(invmass1+invmass2) * r12sq; - double quad3_0102 = - 2.0 * invmass1*invmass2 * r0102; - double quad3_0112 = - 2.0 * (invmass1+invmass2)*invmass1 * r0112; - double quad3_0212 = 2.0 * (invmass1+invmass2)*invmass2 * r0212; + double quad3_0101 = invmass1 * invmass1 * r01sq; + double quad3_0202 = invmass2 * invmass2 * r02sq; + double quad3_1212 = (invmass1 + invmass2) * (invmass1 + invmass2) * r12sq; + double quad3_0102 = - 2.0 * invmass1 * invmass2 * r0102; + double quad3_0112 = - 2.0 * (invmass1 + invmass2) * invmass1 * r0112; + double quad3_0212 = 2.0 * (invmass1 + invmass2) * invmass2 * r0212; // iterate until converged @@ -2021,42 +2244,45 @@ void FixShakeCuda::shake3angle(int m) int niter = 0; int done = 0; - double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda12_new; + double quad1, quad2, quad3, b1, b2, b3, lamda01_new, lamda02_new, lamda12_new; - while (!done && niter < max_iter) { - quad1 = quad1_0101 * lamda01*lamda01 + - quad1_0202 * lamda02*lamda02 + - quad1_1212 * lamda12*lamda12 + - quad1_0102 * lamda01*lamda02 + - quad1_0112 * lamda01*lamda12 + - quad1_0212 * lamda02*lamda12; + while(!done && niter < max_iter) { + quad1 = quad1_0101 * lamda01 * lamda01 + + quad1_0202 * lamda02 * lamda02 + + quad1_1212 * lamda12 * lamda12 + + quad1_0102 * lamda01 * lamda02 + + quad1_0112 * lamda01 * lamda12 + + quad1_0212 * lamda02 * lamda12; - quad2 = quad2_0101 * lamda01*lamda01 + - quad2_0202 * lamda02*lamda02 + - quad2_1212 * lamda12*lamda12 + - quad2_0102 * lamda01*lamda02 + - quad2_0112 * lamda01*lamda12 + - quad2_0212 * lamda02*lamda12; + quad2 = quad2_0101 * lamda01 * lamda01 + + quad2_0202 * lamda02 * lamda02 + + quad2_1212 * lamda12 * lamda12 + + quad2_0102 * lamda01 * lamda02 + + quad2_0112 * lamda01 * lamda12 + + quad2_0212 * lamda02 * lamda12; - quad3 = quad3_0101 * lamda01*lamda01 + - quad3_0202 * lamda02*lamda02 + - quad3_1212 * lamda12*lamda12 + - quad3_0102 * lamda01*lamda02 + - quad3_0112 * lamda01*lamda12 + - quad3_0212 * lamda02*lamda12; + quad3 = quad3_0101 * lamda01 * lamda01 + + quad3_0202 * lamda02 * lamda02 + + quad3_1212 * lamda12 * lamda12 + + quad3_0102 * lamda01 * lamda02 + + quad3_0112 * lamda01 * lamda12 + + quad3_0212 * lamda02 * lamda12; - b1 = bond1*bond1 - s01sq - quad1; - b2 = bond2*bond2 - s02sq - quad2; - b3 = bond12*bond12 - s12sq - quad3; + b1 = bond1 * bond1 - s01sq - quad1; + b2 = bond2 * bond2 - s02sq - quad2; + b3 = bond12 * bond12 - s12sq - quad3; - lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3; - lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3; - lamda12_new = a31inv*b1 + a32inv*b2 + a33inv*b3; + lamda01_new = a11inv * b1 + a12inv * b2 + a13inv * b3; + lamda02_new = a21inv * b1 + a22inv * b2 + a23inv * b3; + lamda12_new = a31inv * b1 + a32inv * b2 + a33inv * b3; done = 1; - if (fabs(lamda01_new-lamda01) > tolerance) done = 0; - if (fabs(lamda02_new-lamda02) > tolerance) done = 0; - if (fabs(lamda12_new-lamda12) > tolerance) done = 0; + + if(fabs(lamda01_new - lamda01) > tolerance) done = 0; + + if(fabs(lamda02_new - lamda02) > tolerance) done = 0; + + if(fabs(lamda12_new - lamda12) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; @@ -2066,42 +2292,45 @@ void FixShakeCuda::shake3angle(int m) // update forces if atom is owned by this processor - lamda01 = lamda01/dtfsq; - lamda02 = lamda02/dtfsq; - lamda12 = lamda12/dtfsq; + lamda01 = lamda01 / dtfsq; + lamda02 = lamda02 / dtfsq; + lamda12 = lamda12 / dtfsq; - if (i0 < nlocal) { - f[i0][0] += lamda01*r01[0] + lamda02*r02[0]; - f[i0][1] += lamda01*r01[1] + lamda02*r02[1]; - f[i0][2] += lamda01*r01[2] + lamda02*r02[2]; + if(i0 < nlocal) { + f[i0][0] += lamda01 * r01[0] + lamda02 * r02[0]; + f[i0][1] += lamda01 * r01[1] + lamda02 * r02[1]; + f[i0][2] += lamda01 * r01[2] + lamda02 * r02[2]; } - if (i1 < nlocal) { - f[i1][0] -= lamda01*r01[0] - lamda12*r12[0]; - f[i1][1] -= lamda01*r01[1] - lamda12*r12[1]; - f[i1][2] -= lamda01*r01[2] - lamda12*r12[2]; + if(i1 < nlocal) { + f[i1][0] -= lamda01 * r01[0] - lamda12 * r12[0]; + f[i1][1] -= lamda01 * r01[1] - lamda12 * r12[1]; + f[i1][2] -= lamda01 * r01[2] - lamda12 * r12[2]; } - if (i2 < nlocal) { - f[i2][0] -= lamda02*r02[0] + lamda12*r12[0]; - f[i2][1] -= lamda02*r02[1] + lamda12*r12[1]; - f[i2][2] -= lamda02*r02[2] + lamda12*r12[2]; + if(i2 < nlocal) { + f[i2][0] -= lamda02 * r02[0] + lamda12 * r12[0]; + f[i2][1] -= lamda02 * r02[1] + lamda12 * r12[1]; + f[i2][2] -= lamda02 * r02[2] + lamda12 * r12[2]; } - if (evflag) { + if(evflag) { nlist = 0; - if (i0 < nlocal) list[nlist++] = i0; - if (i1 < nlocal) list[nlist++] = i1; - if (i2 < nlocal) list[nlist++] = i2; - v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda12*r12[0]*r12[0]; - v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda12*r12[1]*r12[1]; - v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda12*r12[2]*r12[2]; - v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda12*r12[0]*r12[1]; - v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda12*r12[0]*r12[2]; - v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda12*r12[1]*r12[2]; + if(i0 < nlocal) list[nlist++] = i0; - v_tally(nlist,list,3.0,v); + if(i1 < nlocal) list[nlist++] = i1; + + if(i2 < nlocal) list[nlist++] = i2; + + v[0] = lamda01 * r01[0] * r01[0] + lamda02 * r02[0] * r02[0] + lamda12 * r12[0] * r12[0]; + v[1] = lamda01 * r01[1] * r01[1] + lamda02 * r02[1] * r02[1] + lamda12 * r12[1] * r12[1]; + v[2] = lamda01 * r01[2] * r01[2] + lamda02 * r02[2] * r02[2] + lamda12 * r12[2] * r12[2]; + v[3] = lamda01 * r01[0] * r01[1] + lamda02 * r02[0] * r02[1] + lamda12 * r12[0] * r12[1]; + v[4] = lamda01 * r01[0] * r01[2] + lamda02 * r02[0] * r02[2] + lamda12 * r12[0] * r12[2]; + v[5] = lamda01 * r01[1] * r01[2] + lamda02 * r02[1] * r02[2] + lamda12 * r12[1] * r12[2]; + + v_tally(nlist, list, 3.0, v); } } @@ -2111,21 +2340,22 @@ void FixShakeCuda::shake3angle(int m) void FixShakeCuda::stats() { - int i,j,m,n,iatom,jatom,katom; - double delx,dely,delz; - double r,r1,r2,r3,angle; + int i, j, m, n, iatom, jatom, katom; + double delx, dely, delz; + double r, r1, r2, r3, angle; // zero out accumulators int nb = atom->nbondtypes + 1; int na = atom->nangletypes + 1; - for (i = 0; i < nb; i++) { + for(i = 0; i < nb; i++) { b_count[i] = 0; b_ave[i] = b_max[i] = 0.0; b_min[i] = BIG; } - for (i = 0; i < na; i++) { + + for(i = 0; i < na; i++) { a_count[i] = 0; a_ave[i] = a_max[i] = 0.0; a_min[i] = BIG; @@ -2134,35 +2364,38 @@ void FixShakeCuda::stats() // log stats for each bond & angle // OK to double count since are just averaging - double **x = atom->x; + double** x = atom->x; int nlocal = atom->nlocal; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; + for(i = 0; i < nlocal; i++) { + if(shake_flag[i] == 0) continue; // bond stats n = shake_flag[i]; - if (n == 1) n = 3; + + if(n == 1) n = 3; + iatom = atom->map(shake_atom[i][0]); - for (j = 1; j < n; j++) { + + for(j = 1; j < n; j++) { jatom = atom->map(shake_atom[i][j]); delx = x[iatom][0] - x[jatom][0]; dely = x[iatom][1] - x[jatom][1]; delz = x[iatom][2] - x[jatom][2]; - domain->minimum_image(delx,dely,delz); - r = sqrt(delx*delx + dely*dely + delz*delz); + domain->minimum_image(delx, dely, delz); + r = sqrt(delx * delx + dely * dely + delz * delz); - m = shake_type[i][j-1]; + m = shake_type[i][j - 1]; b_count[m]++; b_ave[m] += r; - b_max[m] = MAX(b_max[m],r); - b_min[m] = MIN(b_min[m],r); + b_max[m] = MAX(b_max[m], r); + b_min[m] = MIN(b_min[m], r); } // angle stats - if (shake_flag[i] == 1) { + if(shake_flag[i] == 1) { iatom = atom->map(shake_atom[i][0]); jatom = atom->map(shake_atom[i][1]); katom = atom->map(shake_atom[i][2]); @@ -2170,71 +2403,76 @@ void FixShakeCuda::stats() delx = x[iatom][0] - x[jatom][0]; dely = x[iatom][1] - x[jatom][1]; delz = x[iatom][2] - x[jatom][2]; - domain->minimum_image(delx,dely,delz); - r1 = sqrt(delx*delx + dely*dely + delz*delz); + domain->minimum_image(delx, dely, delz); + r1 = sqrt(delx * delx + dely * dely + delz * delz); delx = x[iatom][0] - x[katom][0]; dely = x[iatom][1] - x[katom][1]; delz = x[iatom][2] - x[katom][2]; - domain->minimum_image(delx,dely,delz); - r2 = sqrt(delx*delx + dely*dely + delz*delz); + domain->minimum_image(delx, dely, delz); + r2 = sqrt(delx * delx + dely * dely + delz * delz); delx = x[jatom][0] - x[katom][0]; dely = x[jatom][1] - x[katom][1]; delz = x[jatom][2] - x[katom][2]; - domain->minimum_image(delx,dely,delz); - r3 = sqrt(delx*delx + dely*dely + delz*delz); + domain->minimum_image(delx, dely, delz); + r3 = sqrt(delx * delx + dely * dely + delz * delz); - angle = acos((r1*r1 + r2*r2 - r3*r3) / (2.0*r1*r2)); - angle *= 180.0/MY_PI; + angle = acos((r1 * r1 + r2 * r2 - r3 * r3) / (2.0 * r1 * r2)); + angle *= 180.0 / MY_PI; m = shake_type[i][2]; a_count[m]++; a_ave[m] += angle; - a_max[m] = MAX(a_max[m],angle); - a_min[m] = MIN(a_min[m],angle); + a_max[m] = MAX(a_max[m], angle); + a_min[m] = MIN(a_min[m], angle); } } // sum across all procs - MPI_Allreduce(b_count,b_count_all,nb,MPI_INT,MPI_SUM,world); - MPI_Allreduce(b_ave,b_ave_all,nb,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(b_max,b_max_all,nb,MPI_DOUBLE,MPI_MAX,world); - MPI_Allreduce(b_min,b_min_all,nb,MPI_DOUBLE,MPI_MIN,world); + MPI_Allreduce(b_count, b_count_all, nb, MPI_INT, MPI_SUM, world); + MPI_Allreduce(b_ave, b_ave_all, nb, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(b_max, b_max_all, nb, MPI_DOUBLE, MPI_MAX, world); + MPI_Allreduce(b_min, b_min_all, nb, MPI_DOUBLE, MPI_MIN, world); - MPI_Allreduce(a_count,a_count_all,na,MPI_INT,MPI_SUM,world); - MPI_Allreduce(a_ave,a_ave_all,na,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(a_max,a_max_all,na,MPI_DOUBLE,MPI_MAX,world); - MPI_Allreduce(a_min,a_min_all,na,MPI_DOUBLE,MPI_MIN,world); + MPI_Allreduce(a_count, a_count_all, na, MPI_INT, MPI_SUM, world); + MPI_Allreduce(a_ave, a_ave_all, na, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(a_max, a_max_all, na, MPI_DOUBLE, MPI_MAX, world); + MPI_Allreduce(a_min, a_min_all, na, MPI_DOUBLE, MPI_MIN, world); // print stats only for non-zero counts - if (me == 0) { - if (screen) { + if(me == 0) { + if(screen) { fprintf(screen, "SHAKE stats (type/ave/delta) on step " BIGINT_FORMAT "\n", update->ntimestep); - for (i = 1; i < nb; i++) - if (b_count_all[i]) - fprintf(screen," %d %g %g\n",i, - b_ave_all[i]/b_count_all[i],b_max_all[i]-b_min_all[i]); - for (i = 1; i < na; i++) - if (a_count_all[i]) - fprintf(screen," %d %g %g\n",i, - a_ave_all[i]/a_count_all[i],a_max_all[i]-a_min_all[i]); + + for(i = 1; i < nb; i++) + if(b_count_all[i]) + fprintf(screen, " %d %g %g\n", i, + b_ave_all[i] / b_count_all[i], b_max_all[i] - b_min_all[i]); + + for(i = 1; i < na; i++) + if(a_count_all[i]) + fprintf(screen, " %d %g %g\n", i, + a_ave_all[i] / a_count_all[i], a_max_all[i] - a_min_all[i]); } - if (logfile) { + + if(logfile) { fprintf(logfile, "SHAKE stats (type/ave/delta) on step " BIGINT_FORMAT "\n", update->ntimestep); - for (i = 0; i < nb; i++) - if (b_count_all[i]) - fprintf(logfile," %d %g %g\n",i, - b_ave_all[i]/b_count_all[i],b_max_all[i]-b_min_all[i]); - for (i = 0; i < na; i++) - if (a_count_all[i]) - fprintf(logfile," %d %g %g\n",i, - a_ave_all[i]/a_count_all[i],a_max_all[i]-a_min_all[i]); + + for(i = 0; i < nb; i++) + if(b_count_all[i]) + fprintf(logfile, " %d %g %g\n", i, + b_ave_all[i] / b_count_all[i], b_max_all[i] - b_min_all[i]); + + for(i = 0; i < na; i++) + if(a_count_all[i]) + fprintf(logfile, " %d %g %g\n", i, + a_ave_all[i] / a_count_all[i], a_max_all[i] - a_min_all[i]); } } @@ -2251,16 +2489,20 @@ void FixShakeCuda::stats() int FixShakeCuda::bondfind(int i, int n1, int n2) { - int *tag = atom->tag; - int **bond_atom = atom->bond_atom; + int* tag = atom->tag; + int** bond_atom = atom->bond_atom; int nbonds = atom->num_bond[i]; int m; - for (m = 0; m < nbonds; m++) { - if (n1 == tag[i] && n2 == bond_atom[i][m]) break; - if (n1 == bond_atom[i][m] && n2 == tag[i]) break; + + for(m = 0; m < nbonds; m++) { + if(n1 == tag[i] && n2 == bond_atom[i][m]) break; + + if(n1 == bond_atom[i][m] && n2 == tag[i]) break; } - if (m < nbonds) return m; + + if(m < nbonds) return m; + return -1; } @@ -2272,16 +2514,20 @@ int FixShakeCuda::bondfind(int i, int n1, int n2) int FixShakeCuda::anglefind(int i, int n1, int n2) { - int **angle_atom1 = atom->angle_atom1; - int **angle_atom3 = atom->angle_atom3; + int** angle_atom1 = atom->angle_atom1; + int** angle_atom3 = atom->angle_atom3; int nangles = atom->num_angle[i]; int m; - for (m = 0; m < nangles; m++) { - if (n1 == angle_atom1[i][m] && n2 == angle_atom3[i][m]) break; - if (n1 == angle_atom3[i][m] && n2 == angle_atom1[i][m]) break; + + for(m = 0; m < nangles; m++) { + if(n1 == angle_atom1[i][m] && n2 == angle_atom3[i][m]) break; + + if(n1 == angle_atom3[i][m] && n2 == angle_atom1[i][m]) break; } - if (m < nangles) return m; + + if(m < nangles) return m; + return -1; } @@ -2293,10 +2539,10 @@ double FixShakeCuda::memory_usage() { int nmax = atom->nmax; double bytes = nmax * sizeof(int); - bytes += nmax*4 * sizeof(int); - bytes += nmax*3 * sizeof(int); - bytes += nmax*3 * sizeof(double); - bytes += maxvatom*6 * sizeof(double); + bytes += nmax * 4 * sizeof(int); + bytes += nmax * 3 * sizeof(int); + bytes += nmax * 3 * sizeof(double); + bytes += maxvatom * 6 * sizeof(double); return bytes; } @@ -2306,24 +2552,29 @@ double FixShakeCuda::memory_usage() void FixShakeCuda::grow_arrays(int nmax) { - memory->grow(shake_flag,nmax,"shake:shake_flag"); - memory->grow(shake_atom,nmax,4,"shake:shake_atom"); - memory->grow(shake_type,nmax,3,"shake:shake_type"); + memory->grow(shake_flag, nmax, "shake:shake_flag"); + memory->grow(shake_atom, nmax, 4, "shake:shake_atom"); + memory->grow(shake_type, nmax, 3, "shake:shake_type"); memory->destroy(xshake); - memory->create(xshake,nmax,3,"shake:xshake"); + memory->create(xshake, nmax, 3, "shake:xshake"); - delete cu_shake_flag; cu_shake_flag = new cCudaData (shake_flag, nmax ); - delete cu_shake_atom; cu_shake_atom = new cCudaData ((int*)shake_atom, nmax, 4); - delete cu_shake_type; cu_shake_type = new cCudaData ((int*)shake_type, nmax, 3); - delete cu_xshake; cu_xshake = new cCudaData ((double*)xshake, nmax, 3); + delete cu_shake_flag; + cu_shake_flag = new cCudaData (shake_flag, nmax); + delete cu_shake_atom; + cu_shake_atom = new cCudaData ((int*)shake_atom, nmax, 4); + delete cu_shake_type; + cu_shake_type = new cCudaData ((int*)shake_type, nmax, 3); + delete cu_xshake; + cu_xshake = new cCudaData ((double*)xshake, nmax, 3); cu_shake_flag->upload(); cu_shake_atom->upload(); cu_shake_type->upload(); + if(cu_bond_distance) - Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq, - cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(), - max_iter,tolerance); + Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, + cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), + cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), + max_iter, tolerance); } /* ---------------------------------------------------------------------- @@ -2333,24 +2584,25 @@ void FixShakeCuda::grow_arrays(int nmax) void FixShakeCuda::copy_arrays(int i, int j) { int flag = shake_flag[j] = shake_flag[i]; - if (flag == 1) { + + if(flag == 1) { shake_atom[j][0] = shake_atom[i][0]; shake_atom[j][1] = shake_atom[i][1]; shake_atom[j][2] = shake_atom[i][2]; shake_type[j][0] = shake_type[i][0]; shake_type[j][1] = shake_type[i][1]; shake_type[j][2] = shake_type[i][2]; - } else if (flag == 2) { + } else if(flag == 2) { shake_atom[j][0] = shake_atom[i][0]; shake_atom[j][1] = shake_atom[i][1]; shake_type[j][0] = shake_type[i][0]; - } else if (flag == 3) { + } else if(flag == 3) { shake_atom[j][0] = shake_atom[i][0]; shake_atom[j][1] = shake_atom[i][1]; shake_atom[j][2] = shake_atom[i][2]; shake_type[j][0] = shake_type[i][0]; shake_type[j][1] = shake_type[i][1]; - } else if (flag == 4) { + } else if(flag == 4) { shake_atom[j][0] = shake_atom[i][0]; shake_atom[j][1] = shake_atom[i][1]; shake_atom[j][2] = shake_atom[i][2]; @@ -2374,29 +2626,30 @@ void FixShakeCuda::set_arrays(int i) pack values in local atom-based arrays for exchange with another proc ------------------------------------------------------------------------- */ -int FixShakeCuda::pack_exchange(int i, double *buf) +int FixShakeCuda::pack_exchange(int i, double* buf) { int m = 0; buf[m++] = shake_flag[i]; int flag = shake_flag[i]; - if (flag == 1) { + + if(flag == 1) { buf[m++] = shake_atom[i][0]; buf[m++] = shake_atom[i][1]; buf[m++] = shake_atom[i][2]; buf[m++] = shake_type[i][0]; buf[m++] = shake_type[i][1]; buf[m++] = shake_type[i][2]; - } else if (flag == 2) { + } else if(flag == 2) { buf[m++] = shake_atom[i][0]; buf[m++] = shake_atom[i][1]; buf[m++] = shake_type[i][0]; - } else if (flag == 3) { + } else if(flag == 3) { buf[m++] = shake_atom[i][0]; buf[m++] = shake_atom[i][1]; buf[m++] = shake_atom[i][2]; buf[m++] = shake_type[i][0]; buf[m++] = shake_type[i][1]; - } else if (flag == 4) { + } else if(flag == 4) { buf[m++] = shake_atom[i][0]; buf[m++] = shake_atom[i][1]; buf[m++] = shake_atom[i][2]; @@ -2405,6 +2658,7 @@ int FixShakeCuda::pack_exchange(int i, double *buf) buf[m++] = shake_type[i][1]; buf[m++] = shake_type[i][2]; } + return m; } @@ -2412,36 +2666,38 @@ int FixShakeCuda::pack_exchange(int i, double *buf) unpack values in local atom-based arrays from exchange with another proc ------------------------------------------------------------------------- */ -int FixShakeCuda::unpack_exchange(int nlocal, double *buf) +int FixShakeCuda::unpack_exchange(int nlocal, double* buf) { int m = 0; - int flag = shake_flag[nlocal] = static_cast (buf[m++]); - if (flag == 1) { - shake_atom[nlocal][0] = static_cast (buf[m++]); - shake_atom[nlocal][1] = static_cast (buf[m++]); - shake_atom[nlocal][2] = static_cast (buf[m++]); - shake_type[nlocal][0] = static_cast (buf[m++]); - shake_type[nlocal][1] = static_cast (buf[m++]); - shake_type[nlocal][2] = static_cast (buf[m++]); - } else if (flag == 2) { - shake_atom[nlocal][0] = static_cast (buf[m++]); - shake_atom[nlocal][1] = static_cast (buf[m++]); - shake_type[nlocal][0] = static_cast (buf[m++]); - } else if (flag == 3) { - shake_atom[nlocal][0] = static_cast (buf[m++]); - shake_atom[nlocal][1] = static_cast (buf[m++]); - shake_atom[nlocal][2] = static_cast (buf[m++]); - shake_type[nlocal][0] = static_cast (buf[m++]); - shake_type[nlocal][1] = static_cast (buf[m++]); - } else if (flag == 4) { - shake_atom[nlocal][0] = static_cast (buf[m++]); - shake_atom[nlocal][1] = static_cast (buf[m++]); - shake_atom[nlocal][2] = static_cast (buf[m++]); - shake_atom[nlocal][3] = static_cast (buf[m++]); - shake_type[nlocal][0] = static_cast (buf[m++]); - shake_type[nlocal][1] = static_cast (buf[m++]); - shake_type[nlocal][2] = static_cast (buf[m++]); + int flag = shake_flag[nlocal] = static_cast(buf[m++]); + + if(flag == 1) { + shake_atom[nlocal][0] = static_cast(buf[m++]); + shake_atom[nlocal][1] = static_cast(buf[m++]); + shake_atom[nlocal][2] = static_cast(buf[m++]); + shake_type[nlocal][0] = static_cast(buf[m++]); + shake_type[nlocal][1] = static_cast(buf[m++]); + shake_type[nlocal][2] = static_cast(buf[m++]); + } else if(flag == 2) { + shake_atom[nlocal][0] = static_cast(buf[m++]); + shake_atom[nlocal][1] = static_cast(buf[m++]); + shake_type[nlocal][0] = static_cast(buf[m++]); + } else if(flag == 3) { + shake_atom[nlocal][0] = static_cast(buf[m++]); + shake_atom[nlocal][1] = static_cast(buf[m++]); + shake_atom[nlocal][2] = static_cast(buf[m++]); + shake_type[nlocal][0] = static_cast(buf[m++]); + shake_type[nlocal][1] = static_cast(buf[m++]); + } else if(flag == 4) { + shake_atom[nlocal][0] = static_cast(buf[m++]); + shake_atom[nlocal][1] = static_cast(buf[m++]); + shake_atom[nlocal][2] = static_cast(buf[m++]); + shake_atom[nlocal][3] = static_cast(buf[m++]); + shake_type[nlocal][0] = static_cast(buf[m++]); + shake_type[nlocal][1] = static_cast(buf[m++]); + shake_type[nlocal][2] = static_cast(buf[m++]); } + return m; } @@ -2455,12 +2711,12 @@ void FixShakeCuda::post_force_respa(int vflag, int ilevel, int iloop) { // call stats only on outermost level - if (ilevel == nlevels_respa-1 && update->ntimestep == next_output) stats(); + if(ilevel == nlevels_respa - 1 && update->ntimestep == next_output) stats(); // perform SHAKE on every loop iteration of every rRESPA level // except last loop iteration of inner levels - if (ilevel < nlevels_respa-1 && iloop == loop_respa[ilevel]-1) return; + if(ilevel < nlevels_respa - 1 && iloop == loop_respa[ilevel] - 1) return; // xshake = atom coords after next x update in innermost loop // depends on rRESPA level @@ -2468,42 +2724,44 @@ void FixShakeCuda::post_force_respa(int vflag, int ilevel, int iloop) // xshake = predicted position from call to this routine at level N = // x + dt0 (v + dtN/m fN + 1/2 dt(N-1)/m f(N-1) + ... + 1/2 dt0/m f0) - double ***f_level = ((FixRespa *) modify->fix[ifix_respa])->f_level; + double** *f_level = ((FixRespa*) modify->fix[ifix_respa])->f_level; dtfsq = dtf_inner * step_respa[ilevel]; - double invmass,dtfmsq; + double invmass, dtfmsq; int jlevel; - if (rmass) { - for (int i = 0; i < nlocal; i++) { - if (shake_flag[i]) { + if(rmass) { + for(int i = 0; i < nlocal; i++) { + if(shake_flag[i]) { invmass = 1.0 / rmass[i]; dtfmsq = dtfsq * invmass; - xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0]; - xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1]; - xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2]; - for (jlevel = 0; jlevel < ilevel; jlevel++) { + xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; + xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; + xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; + + for(jlevel = 0; jlevel < ilevel; jlevel++) { dtfmsq = dtf_innerhalf * step_respa[jlevel] * invmass; - xshake[i][0] += dtfmsq*f_level[i][jlevel][0]; - xshake[i][1] += dtfmsq*f_level[i][jlevel][1]; - xshake[i][2] += dtfmsq*f_level[i][jlevel][2]; + xshake[i][0] += dtfmsq * f_level[i][jlevel][0]; + xshake[i][1] += dtfmsq * f_level[i][jlevel][1]; + xshake[i][2] += dtfmsq * f_level[i][jlevel][2]; } } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; } } else { - for (int i = 0; i < nlocal; i++) { - if (shake_flag[i]) { + for(int i = 0; i < nlocal; i++) { + if(shake_flag[i]) { invmass = 1.0 / mass[type[i]]; dtfmsq = dtfsq * invmass; - xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0]; - xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1]; - xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2]; - for (jlevel = 0; jlevel < ilevel; jlevel++) { + xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; + xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; + xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; + + for(jlevel = 0; jlevel < ilevel; jlevel++) { dtfmsq = dtf_innerhalf * step_respa[jlevel] * invmass; - xshake[i][0] += dtfmsq*f_level[i][jlevel][0]; - xshake[i][1] += dtfmsq*f_level[i][jlevel][1]; - xshake[i][2] += dtfmsq*f_level[i][jlevel][2]; + xshake[i][0] += dtfmsq * f_level[i][jlevel][0]; + xshake[i][1] += dtfmsq * f_level[i][jlevel][1]; + xshake[i][2] += dtfmsq * f_level[i][jlevel][2]; } } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; } @@ -2511,21 +2769,23 @@ void FixShakeCuda::post_force_respa(int vflag, int ilevel, int iloop) // communicate results if necessary - if (nprocs > 1) comm->forward_comm_fix(this); + if(nprocs > 1) comm->forward_comm_fix(this); // virial setup - if (vflag) v_setup(vflag); + if(vflag) v_setup(vflag); else evflag = 0; // loop over clusters int m; - for (int i = 0; i < nlist; i++) { + + for(int i = 0; i < nlist; i++) { m = list[i]; - if (shake_flag[m] == 2) shake2(m); - else if (shake_flag[m] == 3) shake3(m); - else if (shake_flag[m] == 4) shake4(m); + + if(shake_flag[m] == 2) shake2(m); + else if(shake_flag[m] == 3) shake3(m); + else if(shake_flag[m] == 4) shake4(m); else shake3angle(m); } } @@ -2533,68 +2793,70 @@ void FixShakeCuda::post_force_respa(int vflag, int ilevel, int iloop) /* ---------------------------------------------------------------------- */ -int FixShakeCuda::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) +int FixShakeCuda::pack_comm(int n, int* list, double* buf, int pbc_flag, int* pbc) { - if(cuda->finished_setup) - { - int iswap=*list; - if(iswap<0) - { - iswap=-iswap-1; - int first= ((int*) buf)[0]; - Cuda_FixShakeCuda_PackComm_Self(&cuda->shared_data,n,iswap,first,pbc,pbc_flag); - } - else - Cuda_FixShakeCuda_PackComm(&cuda->shared_data,n,iswap,(void*) buf,pbc,pbc_flag); - return 3; + if(cuda->finished_setup) { + int iswap = *list; + + if(iswap < 0) { + iswap = -iswap - 1; + int first = ((int*) buf)[0]; + Cuda_FixShakeCuda_PackComm_Self(&cuda->shared_data, n, iswap, first, pbc, pbc_flag); + } else + Cuda_FixShakeCuda_PackComm(&cuda->shared_data, n, iswap, (void*) buf, pbc, pbc_flag); + + return 3; } - int i,j,m; - double dx,dy,dz; + int i, j, m; + double dx, dy, dz; m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { + + if(pbc_flag == 0) { + for(i = 0; i < n; i++) { j = list[i]; buf[m++] = xshake[j][0]; buf[m++] = xshake[j][1]; buf[m++] = xshake[j][2]; } } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; + if(domain->triclinic == 0) { + dx = pbc[0] * domain->xprd; + dy = pbc[1] * domain->yprd; + dz = pbc[2] * domain->zprd; } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; + dx = pbc[0] * domain->xprd + pbc[5] * domain->xy + pbc[4] * domain->xz; + dy = pbc[1] * domain->yprd + pbc[3] * domain->yz; + dz = pbc[2] * domain->zprd; } - for (i = 0; i < n; i++) { + + for(i = 0; i < n; i++) { j = list[i]; buf[m++] = xshake[j][0] + dx; buf[m++] = xshake[j][1] + dy; buf[m++] = xshake[j][2] + dz; } } + return 3; } /* ---------------------------------------------------------------------- */ -void FixShakeCuda::unpack_comm(int n, int first, double *buf) +void FixShakeCuda::unpack_comm(int n, int first, double* buf) { - if(cuda->finished_setup) - { - Cuda_FixShakeCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf); - return; + if(cuda->finished_setup) { + Cuda_FixShakeCuda_UnpackComm(&cuda->shared_data, n, first, (void*)buf); + return; } - int i,m,last; + int i, m, last; m = 0; last = first + n; - for (i = first; i < last; i++) { + + for(i = first; i < last; i++) { xshake[i][0] = buf[m++]; xshake[i][1] = buf[m++]; xshake[i][2] = buf[m++]; @@ -2605,7 +2867,7 @@ void FixShakeCuda::unpack_comm(int n, int first, double *buf) void FixShakeCuda::reset_dt() { - if (strstr(update->integrate_style,"verlet")) { + if(strstr(update->integrate_style, "verlet")) { dtv = update->dt; dtfsq = update->dt * update->dt * force->ftm2v; } else { @@ -2613,9 +2875,10 @@ void FixShakeCuda::reset_dt() dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v; dtf_inner = step_respa[0] * force->ftm2v; } + if(cu_shake_atom) - Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq, - cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(), - max_iter,tolerance); + Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, + cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), + cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), + max_iter, tolerance); }