Cleaned up baseline code, prior to parallelization

2022-06-24 17:02:12 -06:00
parent 14d472d691
commit 2f1d320510
2 changed files with 347 additions and 325 deletions
--- a/examples/snap/compute_snap_dgrad.py
+++ b/examples/snap/compute_snap_dgrad.py
@ -1,8 +1,8 @@
 """
 compute_snap_dgrad.py
 Purpose: Demonstrate extraction of descriptor gradient (dB/dR) array from compute snap.
-         Show that dBi/dRj components summed over neighbors i yields same output as regular compute snap with dgradflag=0.
-         This shows that the dBi/dRj components extracted with dgradflag=1 are correct.
+         Show that dBi/dRj components summed over neighbors i yields same output as regular compute snap with dgradflag = 0.
+         This shows that the dBi/dRj components extracted with dgradflag = 1 are correct.
 Serial syntax:
    python compute_snap_dgrad.py
 Parallel syntax:
@ -22,9 +22,12 @@ me = lmp.extract_setting("world_rank")
 nprocs = lmp.extract_setting("world_size")

 cmds = ["-screen", "none", "-log", "none"]
-lmp = lammps(cmdargs=cmds)
+lmp = lammps(cmdargs = cmds)

 def run_lammps(dgradflag):
+
+    # simulation settings
+    
    lmp.command("clear")
    lmp.command("units metal")
    lmp.command("boundary	p p p")
@ -35,99 +38,121 @@ def run_lammps(dgradflag):
    lmp.command(f"create_atoms	{ntypes} box")
    lmp.command("mass 		* 180.88")
    lmp.command("displace_atoms 	all random 0.01 0.01 0.01 123456")
-    # Pair style
-    snap_options=f'{rcutfac} {rfac0} {twojmax} {radelem1} {radelem2} {wj1} {wj2} rmin0 {rmin0} quadraticflag {quadratic} bzeroflag {bzero} switchflag {switch} bikflag {bikflag} dgradflag {dgradflag}'
+
+    # potential settings
+    
+    snap_options = f'{rcutfac} {rfac0} {twojmax} {radelem1} {radelem2} {wj1} {wj2} rmin0 {rmin0} quadraticflag {quadratic} bzeroflag {bzero} switchflag {switch} bikflag {bikflag} dgradflag {dgradflag}'
    lmp.command(f"pair_style 	zero {rcutfac}")
    lmp.command(f"pair_coeff 	* *")
    lmp.command(f"pair_style 	zbl {zblcutinner} {zblcutouter}")
    lmp.command(f"pair_coeff 	* * {zblz} {zblz}")
-    # set up compute snap generating global array
+
+    # define compute snap
+
    lmp.command(f"compute 	snap all snap {snap_options}")
-    # Run
+
+    # run
+
    lmp.command(f"thermo 		100")
    lmp.command(f"run {nsteps}")

-# Declare simulation/structure variables
-nsteps=0
-nrep=2
-latparam=2.0
-ntypes=2
-nx=nrep
-ny=nrep
-nz=nrep
+# declare simulation/structure variables

-# Declare compute snap variables
-twojmax=8
-m = (twojmax/2)+1
-rcutfac=1.0
-rfac0=0.99363
-rmin0=0
-radelem1=2.3
-radelem2=2.0
-wj1=1.0
-wj2=0.96
-quadratic=0
-bzero=0
-switch=0
-bikflag=1
-dgradflag=1
+nsteps = 0
+nrep = 2
+latparam = 2.0
+ntypes = 2
+nx = nrep
+ny = nrep
+nz = nrep

-# Declare reference potential variables
-zblcutinner=4.0
-zblcutouter=4.8
-zblz=73
+# declare compute snap variables
+
+twojmax = 8
+rcutfac = 1.0
+rfac0 = 0.99363
+rmin0 = 0
+radelem1 = 2.3
+radelem2 = 2.0
+wj1 = 1.0
+wj2 = 0.96
+quadratic = 0
+bzero = 0
+switch = 0
+bikflag = 1
+
+# define reference potential
+
+zblcutinner = 4.0
+zblcutouter = 4.8
+zblz = 73
+
+# number of descriptors

-# Number of descriptors
 if (twojmax % 2 == 0):
+    m  =  twojmax / 2 + 1
    nd = int(m*(m+1)*(2*m+1)/6)
 else:
+    m  =  (twojmax + 1) / 2
    nd = int(m*(m+1)*(m+2)/3)
+
 if me == 0:
    print(f"Number of descriptors based on twojmax : {nd}")

-# Run lammps with dgradflag on
+# run lammps with dgradflag on
+
 if me == 0:
    print("Running with dgradflag on")
-run_lammps(1)

-# Get global snap array
-lmp_snap = lmp.numpy.extract_compute("snap",0, 2)
+dgradflag = 1
+run_lammps(dgradflag)
+
+# get global snap array
+
+lmp_snap = lmp.numpy.extract_compute("snap", LMP_STYLE_GLOBAL, LMP_TYPE_ARRAY)
+
+# extract dBj/dRi (includes dBi/dRi)

-# Extract dBj/dRi (includes dBi/dRi)
 natoms = lmp.get_natoms()
 fref1 = lmp_snap[0:natoms,0:3].flatten()
-eref1 = lmp_snap[-1,0] #lmp_snap[-6,0]
-dbdr_length = np.shape(lmp_snap)[0]-(natoms) - 1 # Length of neighborlist pruned dbdr array
+eref1 = lmp_snap[-1,0]
+dbdr_length = np.shape(lmp_snap)[0]-(natoms) - 1
 dBdR = lmp_snap[natoms:(natoms+dbdr_length),3:(nd+3)]
 force_indices = lmp_snap[natoms:(natoms+dbdr_length),0:3].astype(np.int32)

-# Sum over neighbors j for each atom i, like dgradflag=0 does.
+# sum over atoms i that j is a neighbor of, like dgradflag = 0 does.
+
 array1 = np.zeros((3*natoms,nd))
-a = 0
 for k in range(0,nd):
    for l in range(0,dbdr_length):
-        j = force_indices[l,0]
-        i = force_indices[l,1]
-        array1[3*(i)+a,k] += dBdR[l,k]
-        a = a+1
-        if (a>2):
-            a=0
+        i = force_indices[l,0]
+        j = force_indices[l,1]
+        a = force_indices[l,2]
+        array1[3 * j + a, k] += dBdR[l,k]

-# Run lammps with dgradflag off
-print("Running with dgradflag off")
-run_lammps(0)
+# run lammps with dgradflag off

-# Get global snap array
-lmp_snap = lmp.numpy.extract_compute("snap",0, 2)
+if me == 0:
+    print("Running with dgradflag off")
+
+dgradflag = 0    
+run_lammps(dgradflag)
+
+# get global snap array
+
+lmp_snap = lmp.numpy.extract_compute("snap", LMP_STYLE_GLOBAL, LMP_TYPE_ARRAY)
 natoms = lmp.get_natoms()
 fref2 = lmp_snap[natoms:(natoms+3*natoms),-1]
 eref2 = lmp_snap[0,-1]
 array2 = lmp_snap[natoms:natoms+(3*natoms), nd:-1]

-# Sum the arrays obtained from dgradflag on and off.
-summ = array1 + array2
-#np.savetxt("sum.dat", summ)
+# take difference of arrays obtained from dgradflag on and off.

-print(f"Maximum difference in descriptor sums: {np.max(summ)}")
-print(f"Maximum difference in reference forces: {np.max(fref1-fref2)}")
-print(f"Difference in reference energy: {np.max(eref1-eref2)}")
+diffm = array1 - array2
+difff = fref1 - fref2
+diffe = eref1 - eref2
+
+if me == 0:
+    print(f"Max/min difference in dSum(Bi)/dRj: {np.max(diffm)} {np.min(diffm)}")
+    print(f"Max/min difference in reference forces: {np.max(difff)} {np.min(difff)}")
+    print(f"Difference in reference energy: {diffe}")
--- a/src/ML-SNAP/compute_snap.cpp
+++ b/src/ML-SNAP/compute_snap.cpp
@ -188,6 +188,9 @@ ComputeSnap::ComputeSnap(LAMMPS *lmp, int narg, char **arg) :
  if (dgradflag && !bikflag)
    error->all(FLERR,"Illegal compute snap command: dgradflag=1 requires bikflag=1");

+  if (dgradflag && quadraticflag)
+    error->all(FLERR,"Illegal compute snap command: dgradflag=1 not implemented for quadratic SNAP");
+  
  snaptr = new SNA(lmp, rfac0, twojmax,
                   rmin0, switchflag, bzeroflag,
                   chemflag, bnormflag, wselfallflag,
@ -436,231 +439,152 @@ void ComputeSnap::compute_array()
        snaptr->compute_duidrj(jj);
        snaptr->compute_dbidrj();

-        // Accumulate dBi/dRi, -dBi/dRj
+        // accumulate dBi/dRi, -dBi/dRj

-        double *snadi = snap_peratom[i]+typeoffset_local;
-        double *snadj = snap_peratom[j]+typeoffset_local;
+	if (!dgradflag) {

-        for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
+	  double *snadi = snap_peratom[i]+typeoffset_local;
+	  double *snadj = snap_peratom[j]+typeoffset_local;

-          snadi[icoeff] += snaptr->dblist[icoeff][0];
-          snadi[icoeff+yoffset] += snaptr->dblist[icoeff][1];
-          snadi[icoeff+zoffset] += snaptr->dblist[icoeff][2];
+	  for (int icoeff = 0; icoeff < ncoeff; icoeff++) {

-          snadj[icoeff] -= snaptr->dblist[icoeff][0];
-          snadj[icoeff+yoffset] -= snaptr->dblist[icoeff][1];
-          snadj[icoeff+zoffset] -= snaptr->dblist[icoeff][2];
+	    snadi[icoeff] += snaptr->dblist[icoeff][0];
+	    snadi[icoeff+yoffset] += snaptr->dblist[icoeff][1];
+	    snadi[icoeff+zoffset] += snaptr->dblist[icoeff][2];

+	    snadj[icoeff] -= snaptr->dblist[icoeff][0];
+	    snadj[icoeff+yoffset] -= snaptr->dblist[icoeff][1];
+	    snadj[icoeff+zoffset] -= snaptr->dblist[icoeff][2];
+	  }

-          if (dgradflag){
-            dgrad[dgrad_row_indx+0][icoeff] = snaptr->dblist[icoeff][0];
-            dgrad[dgrad_row_indx+1][icoeff] = snaptr->dblist[icoeff][1];
-            dgrad[dgrad_row_indx+2][icoeff] = snaptr->dblist[icoeff][2];
-            if (icoeff==(ncoeff-1)){
-              dgrad[dgrad_row_indx+0][ncoeff] = atom->tag[i]-1;
-              dgrad[dgrad_row_indx+0][ncoeff+1] = atom->tag[j]-1;
-              dgrad[dgrad_row_indx+0][ncoeff+2] = 0;
-              dgrad[dgrad_row_indx+1][ncoeff] = atom->tag[i]-1;
-              dgrad[dgrad_row_indx+1][ncoeff+1] = atom->tag[j]-1;
-              dgrad[dgrad_row_indx+1][ncoeff+2] = 1;
-              dgrad[dgrad_row_indx+2][ncoeff] = atom->tag[i]-1;
-              dgrad[dgrad_row_indx+2][ncoeff+1] = atom->tag[j]-1;
-              dgrad[dgrad_row_indx+2][ncoeff+2] = 2;
-            }
-            // Accumulate dBi/dRi = sum (-dBi/dRj) for neighbors j of if i
-            dbiri[3*(atom->tag[i]-1)+0][icoeff] -= snaptr->dblist[icoeff][0];
-            dbiri[3*(atom->tag[i]-1)+1][icoeff] -= snaptr->dblist[icoeff][1];
-            dbiri[3*(atom->tag[i]-1)+2][icoeff] -= snaptr->dblist[icoeff][2];
-            // Get last columns which are i, j, and Cartesian index
-            if (icoeff==(ncoeff-1)){
-              dbiri[3*(atom->tag[i]-1)+0][ncoeff] = atom->tag[i]-1;
-              dbiri[3*(atom->tag[i]-1)+0][ncoeff+1] = atom->tag[i]-1;
-              dbiri[3*(atom->tag[i]-1)+0][ncoeff+2] = 0;
+	  if (quadraticflag) {
+	    const int quadraticoffset = ncoeff;
+	    snadi += quadraticoffset;
+	    snadj += quadraticoffset;
+	    int ncount = 0;
+	    for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
+	      double bi = snaptr->blist[icoeff];
+	      double bix = snaptr->dblist[icoeff][0];
+	      double biy = snaptr->dblist[icoeff][1];
+	      double biz = snaptr->dblist[icoeff][2];
+	      
+	      // diagonal elements of quadratic matrix
+	      
+	      double dbxtmp = bi*bix;
+	      double dbytmp = bi*biy;
+	      double dbztmp = bi*biz;
+	      
+	      snadi[ncount] +=         dbxtmp;
+	      snadi[ncount+yoffset] += dbytmp;
+	      snadi[ncount+zoffset] += dbztmp;
+	      snadj[ncount] -=         dbxtmp;
+	      snadj[ncount+yoffset] -= dbytmp;
+	      snadj[ncount+zoffset] -= dbztmp;

-              dbiri[3*(atom->tag[i]-1)+1][ncoeff] = atom->tag[i]-1;
-              dbiri[3*(atom->tag[i]-1)+1][ncoeff+1] = atom->tag[i]-1;
-              dbiri[3*(atom->tag[i]-1)+1][ncoeff+2] = 1;
+	      ncount++;

-              dbiri[3*(atom->tag[i]-1)+2][ncoeff] = atom->tag[i]-1;
-              dbiri[3*(atom->tag[i]-1)+2][ncoeff+1] = atom->tag[i]-1;
-              dbiri[3*(atom->tag[i]-1)+2][ncoeff+2] = 2;
-            }
-          }
+	      // upper-triangular elements of quadratic matrix
+	      
+	      for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
+		double dbxtmp = bi*snaptr->dblist[jcoeff][0]
+		  + bix*snaptr->blist[jcoeff];
+		double dbytmp = bi*snaptr->dblist[jcoeff][1]
+		  + biy*snaptr->blist[jcoeff];
+		double dbztmp = bi*snaptr->dblist[jcoeff][2]
+		  + biz*snaptr->blist[jcoeff];
+	      
+		snadi[ncount] +=         dbxtmp;
+		snadi[ncount+yoffset] += dbytmp;
+		snadi[ncount+zoffset] += dbztmp;
+		snadj[ncount] -=         dbxtmp;
+		snadj[ncount+yoffset] -= dbytmp;
+		snadj[ncount+zoffset] -= dbztmp;
+		
+		ncount++;
+	      }
+	      
+	    }

+	  }
+	  
+	} else {

-        }
+	  for (int icoeff = 0; icoeff < ncoeff; icoeff++) {

-        if (quadraticflag) {
-          const int quadraticoffset = ncoeff;
-          snadi += quadraticoffset;
-          snadj += quadraticoffset;
-          int ncount = 0;
-          for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
-            double bi = snaptr->blist[icoeff];
-            double bix = snaptr->dblist[icoeff][0];
-            double biy = snaptr->dblist[icoeff][1];
-            double biz = snaptr->dblist[icoeff][2];
+	    // sign convention same as compute snad
+	    
+	    dgrad[dgrad_row_indx+0][icoeff] = -snaptr->dblist[icoeff][0];
+	    dgrad[dgrad_row_indx+1][icoeff] = -snaptr->dblist[icoeff][1];
+	    dgrad[dgrad_row_indx+2][icoeff] = -snaptr->dblist[icoeff][2];

-            // diagonal elements of quadratic matrix
+	    // accumulate dBi/dRi = sum (-dBi/dRj) for neighbors j of if i

-            double dbxtmp = bi*bix;
-            double dbytmp = bi*biy;
-            double dbztmp = bi*biz;
+	    dbiri[3*(atom->tag[i]-1)+0][icoeff] += snaptr->dblist[icoeff][0];
+	    dbiri[3*(atom->tag[i]-1)+1][icoeff] += snaptr->dblist[icoeff][1];
+	    dbiri[3*(atom->tag[i]-1)+2][icoeff] += snaptr->dblist[icoeff][2];
+	    
+	  }

-            snadi[ncount] +=         dbxtmp;
-            snadi[ncount+yoffset] += dbytmp;
-            snadi[ncount+zoffset] += dbztmp;
-            snadj[ncount] -=         dbxtmp;
-            snadj[ncount+yoffset] -= dbytmp;
-            snadj[ncount+zoffset] -= dbztmp;
+	  dgrad[dgrad_row_indx+0][ncoeff] = atom->tag[i]-1;
+	  dgrad[dgrad_row_indx+0][ncoeff+1] = atom->tag[j]-1;
+	  dgrad[dgrad_row_indx+0][ncoeff+2] = 0;
+	  dgrad[dgrad_row_indx+1][ncoeff] = atom->tag[i]-1;
+	  dgrad[dgrad_row_indx+1][ncoeff+1] = atom->tag[j]-1;
+	  dgrad[dgrad_row_indx+1][ncoeff+2] = 1;
+	  dgrad[dgrad_row_indx+2][ncoeff] = atom->tag[i]-1;
+	  dgrad[dgrad_row_indx+2][ncoeff+1] = atom->tag[j]-1;
+	  dgrad[dgrad_row_indx+2][ncoeff+2] = 2;

-            ncount++;
+	  dbiri[3*(atom->tag[i]-1)+0][ncoeff] = atom->tag[i]-1;
+	  dbiri[3*(atom->tag[i]-1)+0][ncoeff+1] = atom->tag[i]-1;
+	  dbiri[3*(atom->tag[i]-1)+0][ncoeff+2] = 0;
+		
+	  dbiri[3*(atom->tag[i]-1)+1][ncoeff] = atom->tag[i]-1;
+	  dbiri[3*(atom->tag[i]-1)+1][ncoeff+1] = atom->tag[i]-1;
+	  dbiri[3*(atom->tag[i]-1)+1][ncoeff+2] = 1;
+		
+	  dbiri[3*(atom->tag[i]-1)+2][ncoeff] = atom->tag[i]-1;
+	  dbiri[3*(atom->tag[i]-1)+2][ncoeff+1] = atom->tag[i]-1;
+	  dbiri[3*(atom->tag[i]-1)+2][ncoeff+2] = 2;

-            // upper-triangular elements of quadratic matrix
-
-            for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
-              double dbxtmp = bi*snaptr->dblist[jcoeff][0]
-                + bix*snaptr->blist[jcoeff];
-              double dbytmp = bi*snaptr->dblist[jcoeff][1]
-                + biy*snaptr->blist[jcoeff];
-              double dbztmp = bi*snaptr->dblist[jcoeff][2]
-                + biz*snaptr->blist[jcoeff];
-
-              snadi[ncount] +=         dbxtmp;
-              snadi[ncount+yoffset] += dbytmp;
-              snadi[ncount+zoffset] += dbztmp;
-              snadj[ncount] -=         dbxtmp;
-              snadj[ncount+yoffset] -= dbytmp;
-              snadj[ncount+zoffset] -= dbztmp;
-
-              ncount++;
-            }
-          }
-
-        }
+	}
      }

-      // linear contributions
+      // accumulate Bi
+      
+      if (!dgradflag) {

-      int k;
-      if (dgradflag){
-        k = 0;
-        for (int icoeff = 0; icoeff < ncoeff; icoeff++){
-          snap[irow][k+3] += snaptr->blist[icoeff];
-          k = k+1;
-        }
-      }
-      else{
-        k = typeoffset_global;
-        for (int icoeff = 0; icoeff < ncoeff; icoeff++){
+	// linear contributions
+
+        int k = typeoffset_global;
+        for (int icoeff = 0; icoeff < ncoeff; icoeff++)
          snap[irow][k++] += snaptr->blist[icoeff];
-        }
-      }

-      // quadratic contributions
+	// quadratic contributions

-      if (quadraticflag) {
-        for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
-          double bveci = snaptr->blist[icoeff];
-          snap[irow][k++] += 0.5*bveci*bveci;
-          for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
-            double bvecj = snaptr->blist[jcoeff];
-            snap[irow][k++] += bveci*bvecj;
-          }
-        }
+	if (quadraticflag) {
+	  for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
+	    double bveci = snaptr->blist[icoeff];
+	    snap[irow][k++] += 0.5*bveci*bveci;
+	    for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
+	      double bvecj = snaptr->blist[jcoeff];
+	      snap[irow][k++] += bveci*bvecj;
+	    }
+	  }
+	}
+
+      } else {
+        int k = 3;
+        for (int icoeff = 0; icoeff < ncoeff; icoeff++)
+          snap[irow][k++] += snaptr->blist[icoeff];
+	numneigh_sum += ninside;
      }
    }
-
-    numneigh_sum += ninside;
-  } // for (int ii = 0; ii < inum; ii++)
-
-  // Accumulate contributions to global array
-
-  if (dgradflag){
-
-    int dbiri_indx;
-    int irow;
-    for (int itype=0; itype<1; itype++){
-      const int typeoffset_local = ndims_peratom*nperdim*itype;
-      const int typeoffset_global = nperdim*itype;
-      for (int icoeff = 0; icoeff < nperdim; icoeff++) {
-        dbiri_indx=0;
-        for (int i = 0; i < atom->nlocal; i++) {
-
-            for (int jj=0; jj<nneighs[i]; jj++){
-
-              int dgrad_row_indx = 3*neighsum[atom->tag[i]-1] + 3*jj;
-              int snap_row_indx = 3*neighsum[atom->tag[i]-1] + 3*(atom->tag[i]-1) + 3*jj;
-              irow = snap_row_indx + bik_rows;
-
-              // x-coordinate
-              snap[irow][icoeff+3] += dgrad[dgrad_row_indx+0][icoeff];
-              if (icoeff==(ncoeff-1)){
-                snap[irow][0] += dgrad[dgrad_row_indx+0][ncoeff];
-                snap[irow][0+1] += dgrad[dgrad_row_indx+0][ncoeff+1];
-                snap[irow][0+2] += dgrad[dgrad_row_indx+0][ncoeff+2];
-              }
-              irow++;
-
-              // y-coordinate
-              snap[irow][icoeff+3] += dgrad[dgrad_row_indx+1][icoeff];
-              if (icoeff==(ncoeff-1)){
-                snap[irow][0] += dgrad[dgrad_row_indx+1][ncoeff];
-                snap[irow][0+1] += dgrad[dgrad_row_indx+1][ncoeff+1];
-                snap[irow][0+2] += dgrad[dgrad_row_indx+1][ncoeff+2];
-              }
-              irow++;
-
-              // z-coordinate
-              snap[irow][icoeff+3]   += dgrad[dgrad_row_indx+2][icoeff];
-              if (icoeff==(ncoeff-1)){
-                snap[irow][0] += dgrad[dgrad_row_indx+2][ncoeff];
-                snap[irow][0+1] += dgrad[dgrad_row_indx+2][ncoeff+1];
-                snap[irow][0+2] += dgrad[dgrad_row_indx+2][ncoeff+2];
-              }
-              dbiri_indx = dbiri_indx+3;
-            }
-
-            // Put dBi/dRi at end of each dBj/dRi chunk.
-
-            irow = dbiri_indx + bik_rows;
-
-            // x-coordinate
-            snap[irow][icoeff+3] += dbiri[3*i+0][icoeff];
-            if (icoeff==(ncoeff-1)){
-              snap[irow][0] += dbiri[3*i+0][ncoeff];
-              snap[irow][0+1] += dbiri[3*i+0][ncoeff+1];
-              snap[irow][0+2] += dbiri[3*i+0][ncoeff+2];
-            }
-            irow++;
-
-            // y-coordinate
-            snap[irow][icoeff+3] += dbiri[3*i+1][icoeff];
-            if (icoeff==(ncoeff-1)){
-              snap[irow][0] += dbiri[3*i+1][ncoeff];
-              snap[irow][0+1] += dbiri[3*i+1][ncoeff+1];
-              snap[irow][0+2] += dbiri[3*i+1][ncoeff+2];
-            }
-            irow++;
-
-            // z-coordinate
-            snap[irow][icoeff+3]   += dbiri[3*i+2][icoeff];
-            if (icoeff==(ncoeff-1)){
-              snap[irow][0] += dbiri[3*i+2][ncoeff];
-              snap[irow][0+1] += dbiri[3*i+2][ncoeff+1];
-              snap[irow][0+2] += dbiri[3*i+2][ncoeff+2];
-            }
-
-            dbiri_indx = dbiri_indx+3;
-        }
-      }
-    }
-
  }

-  else{
+  // accumulate bispectrum force contributions to global array

-    // accumulate bispectrum force contributions to global array
+  if (!dgradflag) {

    for (int itype = 0; itype < atom->ntypes; itype++) {
      const int typeoffset_local = ndims_peratom*nperdim*itype;
@ -676,21 +600,81 @@ void ComputeSnap::compute_array()
        }
      }
    }
+
+  } else {
+
+    int irow = bik_rows;
+    for (int itype = 0; itype < 1; itype++) {
+      const int typeoffset_local = ndims_peratom * nperdim * itype;
+      const int typeoffset_global = nperdim * itype;
+      for (int i = 0; i < atom->nlocal; i++) {
+
+	for (int jj = 0; jj<nneighs[i]; jj++){
+
+	  int dgrad_row_indx = 3 * neighsum[atom->tag[i]-1] + 3 * jj;
+	  int snap_row_indx = 3 * neighsum[atom->tag[i]-1] + 3 * (atom->tag[i]-1) + 3 * jj;
+	  //	  irow = snap_row_indx + bik_rows;
+
+	  // x-coordinate
+	  for (int icoeff = 0; icoeff < nperdim; icoeff++)
+	    snap[irow][icoeff+3] += dgrad[dgrad_row_indx+0][icoeff];
+	  snap[irow][0] += dgrad[dgrad_row_indx+0][ncoeff];
+	  snap[irow][0+1] += dgrad[dgrad_row_indx+0][ncoeff+1];
+	  snap[irow][0+2] += dgrad[dgrad_row_indx+0][ncoeff+2];
+	  irow++;
+
+	  // y-coordinate
+	  for (int icoeff = 0; icoeff < nperdim; icoeff++)
+	    snap[irow][icoeff+3] += dgrad[dgrad_row_indx+1][icoeff];
+	  snap[irow][0] += dgrad[dgrad_row_indx+1][ncoeff];
+	  snap[irow][0+1] += dgrad[dgrad_row_indx+1][ncoeff+1];
+	  snap[irow][0+2] += dgrad[dgrad_row_indx+1][ncoeff+2];
+	  irow++;
+	  
+	  // z-coordinate
+	  for (int icoeff = 0; icoeff < nperdim; icoeff++)
+	    snap[irow][icoeff+3] += dgrad[dgrad_row_indx+2][icoeff];
+	  snap[irow][0] += dgrad[dgrad_row_indx+2][ncoeff];
+	  snap[irow][0+1] += dgrad[dgrad_row_indx+2][ncoeff+1];
+	  snap[irow][0+2] += dgrad[dgrad_row_indx+2][ncoeff+2];
+	  irow++;
+
+	}
+
+	// Put dBi/dRi at end of each dBj/dRi chunk.
+	
+	// x-coordinate
+	for (int icoeff = 0; icoeff < nperdim; icoeff++)
+	  snap[irow][icoeff+3] += dbiri[3*i+0][icoeff];
+	snap[irow][0] += dbiri[3*i+0][ncoeff];
+	snap[irow][0+1] += dbiri[3*i+0][ncoeff+1];
+	snap[irow][0+2] += dbiri[3*i+0][ncoeff+2];
+	irow++;
+
+	// y-coordinate
+	for (int icoeff = 0; icoeff < nperdim; icoeff++)
+	  snap[irow][icoeff+3] += dbiri[3*i+1][icoeff];
+	snap[irow][0] += dbiri[3*i+1][ncoeff];
+	snap[irow][0+1] += dbiri[3*i+1][ncoeff+1];
+	snap[irow][0+2] += dbiri[3*i+1][ncoeff+2];
+	irow++;
+
+	// z-coordinate
+	for (int icoeff = 0; icoeff < nperdim; icoeff++)
+	  snap[irow][icoeff+3]   += dbiri[3*i+2][icoeff];
+	snap[irow][0] += dbiri[3*i+2][ncoeff];
+	snap[irow][0+1] += dbiri[3*i+2][ncoeff+1];
+	snap[irow][0+2] += dbiri[3*i+2][ncoeff+2];
+	irow++;
+	
+      }
+    }
+
  }

 // accumulate forces to global array

- if (dgradflag){
-   // for dgradflag=1, put forces at first 3 columns of bik rows
-   for (int i=0; i<atom->nlocal; i++){
-     int iglobal = atom->tag[i];
-     snap[iglobal-1][0+0] = atom->f[i][0];
-     snap[iglobal-1][0+1] = atom->f[i][1];
-     snap[iglobal-1][0+2] = atom->f[i][2];
-
-   }
- }
- else{
+  if (!dgradflag) {
    for (int i = 0; i < atom->nlocal; i++) {
      int iglobal = atom->tag[i];
      int irow = 3*(iglobal-1)+bik_rows;
@ -698,42 +682,50 @@ void ComputeSnap::compute_array()
      snap[irow++][lastcol] = atom->f[i][1];
      snap[irow][lastcol] = atom->f[i][2];
    }
+    
+  } else {
+   
+    // for dgradflag=1, put forces at first 3 columns of bik rows
+   
+    for (int i=0; i<atom->nlocal; i++){
+      int iglobal = atom->tag[i];
+      snap[iglobal-1][0+0] = atom->f[i][0];
+      snap[iglobal-1][0+1] = atom->f[i][1];
+      snap[iglobal-1][0+2] = atom->f[i][2];
+    }
  }

  // accumulate bispectrum virial contributions to global array

-  if (dgradflag){
-    // no virial terms for dgrad yet
-  }
-  else{
-    dbdotr_compute();
-  }
+  dbdotr_compute();

  // sum up over all processes
+  
  MPI_Allreduce(&snap[0][0],&snapall[0][0],size_array_rows*size_array_cols,MPI_DOUBLE,MPI_SUM,world);
+
  // assign energy to last column
-  if (dgradflag){
+
+  if (!dgradflag) {
+    for (int i = 0; i < bik_rows; i++) snapall[i][lastcol] = 0;
+    int irow = 0;
+    double reference_energy = c_pe->compute_scalar();
+    snapall[irow][lastcol] = reference_energy;
+
+  } else {
+
    // Assign reference energy right after the dgrad rows, first column.
    // Add 3N for the dBi/dRi rows.
    int irow = bik_rows + dgrad_rows + 3*natoms;
    double reference_energy = c_pe->compute_scalar();
    snapall[irow][0] = reference_energy;
  }
-  else{
-    for (int i = 0; i < bik_rows; i++) snapall[i][lastcol] = 0;
-    int irow = 0;
-    double reference_energy = c_pe->compute_scalar();
-    snapall[irow][lastcol] = reference_energy;
-  }

  // assign virial stress to last column
  // switch to Voigt notation

  c_virial->compute_vector();
-  if (dgradflag){
+  if (!dgradflag) {
    // no virial terms for dgrad yet
-  }
-  else{
    c_virial->compute_vector();
    int irow = 3*natoms+bik_rows;
    snapall[irow++][lastcol] = c_virial->vector[0];
@ -743,7 +735,7 @@ void ComputeSnap::compute_array()
    snapall[irow++][lastcol] = c_virial->vector[4];
    snapall[irow][lastcol] = c_virial->vector[3];
  }
-
+  
 }

 /* ----------------------------------------------------------------------
@ -753,6 +745,11 @@ void ComputeSnap::compute_array()

 void ComputeSnap::dbdotr_compute()
 {
+
+  // no virial terms for dgrad yet
+
+  if (dgradflag) return;
+  
  double **x = atom->x;

  int irow0 = bik_rows+ndims_force*natoms;
@ -792,7 +789,9 @@ void ComputeSnap::get_dgrad_length()

  memory->destroy(snap);
  memory->destroy(snapall);
+
  // invoke full neighbor list
+
  neighbor->build_one(list);
  dgrad_rows = 0;
  const int inum = list->inum;
@ -807,19 +806,17 @@ void ComputeSnap::get_dgrad_length()
  memory->create(nneighs, natoms, "snap:nneighs");
  memory->create(icounter, natoms, "snap:icounter");
  memory->create(dbiri, 3*natoms,ncoeff+3, "snap:dbiri");
-  if (atom->nlocal != natoms){
+  if (atom->nlocal != natoms)
    error->all(FLERR,"Compute snap dgradflag=1 does not support parallelism yet.");
-  }
-  for (int ii=0; ii<3*natoms; ii++){
-    for (int icoeff=0; icoeff<ncoeff; icoeff++){
-      dbiri[ii][icoeff]=0.0;
-    }
-  }
+
+  for (int ii = 0; ii < 3 * natoms; ii++)
+    for (int icoeff = 0; icoeff < ncoeff; icoeff++)
+      dbiri[ii][icoeff] = 0.0;
+
  for (int ii = 0; ii < inum; ii++) {
    const int i = ilist[ii];
    if (mask[i] & groupbit) {
-      icounter[i]=0;
-      neighsum[i] = 0;
+      icounter[i] = 0;
      nneighs[i] = 0;
      const double xtmp = x[i][0];
      const double ytmp = x[i][1];
@ -827,7 +824,6 @@ void ComputeSnap::get_dgrad_length()
      const int itype = type[i];
      const int* const jlist = firstneigh[i];
      const int jnum = numneigh[i];
-      int jnum_cutoff = 0;
      for (int jj = 0; jj < jnum; jj++) {
        int j = jlist[jj];
        j &= NEIGHMASK;
@ -835,13 +831,12 @@ void ComputeSnap::get_dgrad_length()
        const double delx = x[j][0] - xtmp;
        const double dely = x[j][1] - ytmp;
        const double delz = x[j][2] - ztmp;
-        const double rsq = delx*delx + dely*dely + delz*delz;
+        const double rsq = delx * delx + dely * dely + delz * delz;
        int jtype = type[j];

-        if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
-          dgrad_rows += 1; //jnum + 1;
-          jnum_cutoff += 1;
-          nneighs[i]+=1;
+        if (rsq < cutsq[itype][jtype] && rsq>1e-20) {
+          dgrad_rows++;
+          nneighs[i]++;
        }
      }
    }
@ -849,31 +844,33 @@ void ComputeSnap::get_dgrad_length()

  dgrad_rows *= ndims_force;

-  // Loop over all atoms again to calculate neighsum.
-  for (int ii = 0; ii < inum; ii++) {
-    const int i = ilist[ii];
-    if (mask[i] & groupbit) {
-      if (i==0){
-        neighsum[i]=0;
-      }
-      else{
-        for (int jj=0; jj < ii; jj++){
-          const int j = ilist[jj];
-          if (mask[j] & groupbit) {
-            neighsum[i] += nneighs[j];
-          }
-        }
-      }
-    }
-  }
+  // loop over all atoms again to calculate neighsum

-  memory->create(dgrad, dgrad_rows, ncoeff+3, "snap:dgrad");
-  for (int i=0; i<dgrad_rows; i++){
-    for (int j=0; j<ncoeff+3; j++){
-      dgrad[i][j]=0.0;
-    }
+  // for (int ii = 0; ii < inum; ii++) {
+  //   const int i = ilist[ii];
+  //   if (mask[i] & groupbit) {
+  //     for (int jj = 0; jj < ii; jj++) {
+  // 	const int j = ilist[jj];
+  // 	if (mask[j] & groupbit)
+  // 	  neighsum[i] += nneighs[j];
+  //     }
+  //   }
+  // }
+
+  neighsum[0] = 0;  
+  for (int ii = 1; ii < inum; ii++) {
+    const int i = ilist[ii];
+    if (mask[i] & groupbit)
+      neighsum[i] = neighsum[i-1] + nneighs[i-1];
  }
-  // Set size array rows which now depends on dgrad_rows.
+  
+  memory->create(dgrad, dgrad_rows, ncoeff+3, "snap:dgrad");
+  for (int i = 0; i < dgrad_rows; i++)
+    for (int j = 0; j < ncoeff+3; j++)
+      dgrad[i][j] = 0.0;
+  
+  // set size array rows which now depends on dgrad_rows.
+
  size_array_rows = bik_rows + dgrad_rows + 3*atom->nlocal + 1; // Add 3*N for dBi/dRi. and add 1 for reference energy

  memory->create(snap,size_array_rows,size_array_cols, "snap:snap");