// EMT.cpp -- Implements the EMT potential for multiple elements.
// Copyright (C) 2008-2011 Jakob Schiotz and Center for Individual
// Nanoparticle Functionality, Department of Physics, Technical
// University of Denmark. Email: schiotz@fysik.dtu.dk
//
// This file is part of Asap version 3.
// Asap is released under the GNU Lesser Public License (LGPL) version 3.
// However, the parts of Asap distributed within the OpenKIM project
// (including this file) are also released under the Common Development
// and Distribution License (CDDL) version 1.0.
//
// This program is free software: you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public License
// version 3 as published by the Free Software Foundation. Permission
// to use other versions of the GNU Lesser General Public License may
// granted by Jakob Schiotz or the head of department of the
// Department of Physics, Technical University of Denmark, as
// described in section 14 of the GNU General Public License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// and the GNU Lesser Public License along with this program. If not,
// see .
// Note: The macro ASAP_FOR_KIM is undefined when this file is
// compiled as part of Asap, but is defined when it is part of the
// OpenKIM model driver.
#include "EMT.h"
#include "Asap.h"
#include "EMTParameterProvider.h"
#include "Atoms.h"
#include "Vec.h"
#include "NeighborLocator.h"
#ifndef ASAP_FOR_KIM
#include "NeighborList.h" // Not present in OpenKIM
#endif
#include "Exception.h"
#include "Timing.h"
#include "mass.h"
#include "Debug.h"
#include
#include
#include
#include
#include
#include
#ifdef ASAP_MKL
#include "mkl.h"
#endif //ASAP_MKL
using std::vector;
using std::set;
using std::cerr;
using std::endl;
using std::flush;
using std::sort;
#undef INTERLEAVETHREADS
#if 1
#define VERB(x) if (verbose == 1) cerr << x
#else
#define VERB(x)
#endif
using namespace std;
// Standard mapping of the six independent parts of the stress tensor to
// vector notation
const static int stresscomp[3][3] = {{0, 5, 4}, {5, 1, 3}, {4, 3, 2}};
EMT::EMT(PyObject *self, PyObject *prov, int verbose) : Potential(self, verbose)
{
CONSTRUCTOR;
// Extract the parameter provider from the Python object, and store
// the Python object.
if (prov != NULL)
{
provider = ((PyAsap_EMTParamProvObject *) prov)->cobj;
provider_obj = prov;
Py_INCREF(provider_obj);
}
else
{
provider_obj = NULL;
provider = NULL;
}
DEBUGPRINT;
nblist = NULL;
nblist_obj = NULL;
driftfactor = 0.05; // Drift factor for the neighbor list.
initialized = false;
always_fullnblist = false;
atoms = NULL;
ghostatoms = false;
nAtoms = 0;
nSize = 0;
#ifdef ASAP_FOR_KIM
// Compiling EMT as an OpenKIM model
// OpenKIM assumes that the potential energy is zero at infinite separation.
// subtractE0 = false;
subtractE0 = false;
#else
// Asap defaults to using the fcc crystal as zero for the potential energy.
subtractE0 = true;
#endif
counters.ids = counters.nblist = counters.sigma1 = counters.sigma2 =
counters.energies = counters.forces = counters.virials =
counters.beforeforces = 0;
recalc.ids = recalc.nblist = recalc.sigma1 = recalc.sigma2 =
recalc.energies = recalc.forces = recalc.virials =
recalc.beforeforces = false;
skip_begin = false;
DEBUGPRINT;
}
/// If an EMTParameterProvider was given when constructing the EMT
/// object, it will NOT be deleted, but any default parameter provider
/// and the automatically generated neigbor list will be deleted.
/// XXXX: Handle this with Python refcounting.
EMT::~EMT()
{
DESTRUCTOR;
Py_XDECREF(provider_obj);
Py_XDECREF(nblist_obj);
if (atoms != NULL)
AsapAtoms_DECREF(atoms);
}
string EMT::GetRepresentation() const
{
char buffer[50];
sprintf(buffer, "0x%p", this);
return "GetName() +
") potential object at " + buffer + ">";
}
long EMT::PrintMemory() const
{
long atomsmem = 0;
if (atoms != NULL)
atomsmem = atoms->PrintMemory();
long mem = 0; // Count the big stuff.
for (vector >::const_iterator i = sigma1.begin();
i != sigma1.end(); i++)
mem += i->size() * sizeof(int);
for (vector >::const_iterator i = sigma2.begin();
i != sigma2.end(); i++)
mem += i->size() * sizeof(int);
mem += Ec.size() * sizeof(double);
mem += Eas.size() * sizeof(double);
mem += Epot.size() * sizeof(double);
mem += radius.size() * sizeof(double);
mem += dEds.size() * sizeof(double);
mem += force.size() * sizeof(Vec);
mem += virials.size() * sizeof(SymTensor);
mem += tmp_double.size() * sizeof(double);
mem += id.size() * sizeof(int);
mem = (mem + 512*1024) / (1024*1024);
char buffer[500];
snprintf(buffer, 500,
"*MEM* EMT %ld MB. [ sizeof(int)=%ld sizeof(double)=%ld ]",
mem, (long) sizeof(int), (long) sizeof(double));
cerr << buffer << endl;
if (nblist != NULL)
mem += nblist->PrintMemory();
return mem + atomsmem;
}
void EMT::SetAtoms(PyObject *pyatoms, Atoms* accessobj /* = NULL */)
{
DEBUGPRINT;
VERB("SetAtoms ");
if (atoms != NULL)
{
// SetAtoms should only do anything the first time it is called.
// Subsequent calls should just check for accessobj being NULL.
if (accessobj != NULL && accessobj != atoms)
throw AsapError("EMT::SetAtoms called multiple times with accessobj != NULL");
// If SetAtoms is called multiple times, it should only check that no new element
// is introduced.
set elements_set;
atoms->Begin(pyatoms);
atoms->GetListOfElements(elements_set);
atoms->End();
set parameters_set;
for (int i = 0; i < parameters.size(); i++)
{
parameters_set.insert(parameters[i]->Z);
}
for (set::iterator i = elements_set.begin(); i != elements_set.end(); i++)
if (parameters_set.find(*i) == parameters_set.end())
throw AsapError("You cannot introduce a new element after initializing EMT calculator: Z=") << *i;
return;
}
// The first time SetAtoms is being called some initialization is done.
if (accessobj != NULL)
{
atoms = accessobj;
AsapAtoms_INCREF(atoms);
}
else
atoms = new NormalAtoms();
ASSERT(atoms != NULL);
atoms->Begin(pyatoms);
nAtoms = atoms->GetNumberOfAtoms();
nSize = nAtoms;
InitParameters();
initialized = true;
if (nelements == 1)
{
singleelement = parameters[0];
}
else
{
singleelement = 0;
}
atoms->End();
DEBUGPRINT;
}
void EMT::Allocate()
{
RETURNIFASAPERROR;
USETIMER("EMT::Allocate");
DEBUGPRINT;
VERB(" Allocate[" << nAtoms << "," << nSize << "]" << flush);
// WARNING: Resizing the vector may allocate way too much memory. It
// appears that calling reserve solves this problem. For efficiency,
// reserve is called with 5% extra space. This is only necessary if the
// atoms have ghosts, otherwise no reallocation will happen.
// First, check if reallocation is necessary.
if (nSize != force.size() || nAtoms != Eas.size())
{
DEBUGPRINT;
/* Resize/intialize the internal variables. */
sigma1.resize(nelements);
sigma2.resize(nelements);
// Do the reserve trick if the atoms have ghosts.
if (ghostatoms)
{
if (force.capacity() < nSize)
{
nSizeRes = nSize + nSize/20;
for (int i = 0; i < nelements; i++) {
sigma1[i].reserve(nSizeRes);
sigma2[i].reserve(nSizeRes);
}
id.reserve(nSizeRes);
force.reserve(nSizeRes);
dEds.reserve(nSizeRes);
}
if (Eas.capacity() < nAtoms)
{
nAtomsRes = nAtoms + nAtoms/20;
Eas.reserve(nAtomsRes);
Epot.reserve(nAtomsRes);
Ec.reserve(nAtomsRes);
radius.reserve(nAtomsRes);
}
}
for (int i = 0; i < nelements; i++) {
sigma1[i].resize(nAtoms);
sigma2[i].resize(nAtoms);
}
DEBUGPRINT;
Ec.resize(nAtoms);
Eas.resize(nAtoms);
Epot.resize(nAtoms);
radius.resize(nAtoms);
dEds.resize(nSize);
id.resize(nSize);
force.resize(nSize);
tmp_double.resize(nSize);
ex2.resize(nAtoms);
if (virials.size())
AllocateStress();
// The IDs are set to zero, then they do not have to be calculated if
// there is only one element.
if (nelements == 1)
for (int i = 0; i < nSize; i++)
id[i] = 0;
}
DEBUGPRINT;
}
// Stresses are reallocated if they exist when Allocate is called, or if
// they do not exist, and GetStress is called.
void EMT::AllocateStress()
{
DEBUGPRINT;
if (ghostatoms && (virials.capacity() < nSize))
virials.reserve(nSizeRes);
VERB(" AllocStr[" << nAtoms << "," << nSize << "]"<< flush);
virials.resize(nSize);
DEBUGPRINT;
}
void EMT::InitParameters()
{
DEBUGPRINT;
// Extract the elements from the list of atoms, and set up an array
// of EMT parameters.
set elements_set;
vector elements;
// Extract the elements from the list of atoms.
// elements.clear();
atoms->GetListOfElements(elements_set);
for (set::iterator i = elements_set.begin(); i != elements_set.end();
++i)
elements.push_back(*i);
nelements = elements.size();
ASSERT(nelements == elements_set.size());
sort(elements.begin(), elements.end());
// Get the EMT parameters
parameters.clear();
for (vector::iterator i = elements.begin(); i != elements.end(); ++i)
parameters.push_back(provider->GetParameters(*i));
//ASSERT(nelements == provider->GetNumberOfElements());
// Calculate the quantities that can only be calculated, once the
// elements in the simulations are known.
provider->CalcGammaEtc();
rFermi = provider->GetCutoffDistance();
//rNbCut = 1.04500185048 * rFermi;
rNbCut = provider->GetListCutoffDistance();
cutoffslope = provider->GetCutoffSlope();
chi = provider->GetChi();
if (verbose)
cerr << "EMT::InitParameters: rFermi = " << rFermi << " rNbCut = "
<< rNbCut << " cutoffslope = " << cutoffslope << endl;
DEBUGPRINT;
}
bool EMT::CalcReq_Forces(PyObject *pyatoms)
{
atoms->Begin(pyatoms);
bool required = (counters.forces != atoms->GetPositionsCounter());
atoms->End();
return required;
}
const vector &EMT::GetForces(PyObject *pyatoms)
{
USETIMER("EMT::GetForces")
DEBUGPRINT;
VERB(" Force[");
ASSERT(atoms != NULL);
atoms->Begin(pyatoms);
recalc.nblist = CheckNeighborList();
recalc.forces = (counters.forces != atoms->GetPositionsCounter());
if (recalc.forces)
{
recalc.ids = (counters.ids != atoms->GetPositionsCounter());
recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter());
recalc.beforeforces = (counters.beforeforces !=
atoms->GetPositionsCounter());
DEBUGPRINT;
CalculateForces();
counters.beforeforces = atoms->GetPositionsCounter();
counters.forces = atoms->GetPositionsCounter();
VERB("]" << flush);
}
else
{
VERB("-]");
ASSERT(recalc.nblist == false);
}
DEBUGPRINT;
atoms->End();
MEMORY;
DEBUGPRINT;
return force;
}
void EMT::CalculateForces()
{
CHECKNOASAPERROR;
#ifdef _OPENMP
#pragma omp parallel
#endif // _OPENMP
{
if (recalc.nblist)
UpdateNeighborList();
CalculateIDs();
CalculateSigmas(false); /* Skip sigma2 */
CalculateEnergiesAfterSigmas(false); /* Skip actual energy calc. */
if (nelements > 1)
CalculateForcesAfterEnergies();
else
CalculateForcesAfterEnergiesSingle();
}
PROPAGATEASAPERROR;
}
bool EMT::CalcReq_Virials(PyObject *pyatoms)
{
atoms->Begin(pyatoms);
bool required = (counters.virials != atoms->GetPositionsCounter());
atoms->End();
return required;
}
bool EMT::CalcReq_Virial(PyObject *pyatoms)
{
return CalcReq_Virials(pyatoms);
}
const vector &EMT::GetVirials(PyObject *pyatoms)
{
USETIMER("EMT::GetVirials")
DEBUGPRINT;
VERB(" Virials[");
ASSERT(atoms != NULL);
atoms->Begin(pyatoms);
recalc.nblist = CheckNeighborList();
recalc.virials = (counters.virials != atoms->GetPositionsCounter());
if (recalc.virials)
{
recalc.ids = (counters.ids != atoms->GetPositionsCounter());
recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter());
recalc.beforeforces = (counters.beforeforces !=
atoms->GetPositionsCounter());
recalc.forces = (counters.forces != atoms->GetPositionsCounter());
DEBUGPRINT;
if (this->virials.size() == 0)
AllocateStress();
CalculateVirials();
}
else
{
ASSERT(recalc.nblist == false);
}
VERB("]" << flush);
DEBUGPRINT;
counters.virials = atoms->GetPositionsCounter();
counters.beforeforces = atoms->GetPositionsCounter();
counters.forces = atoms->GetPositionsCounter();
atoms->End();
return virials;
}
void EMT::CalculateVirials()
{
if (recalc.virials)
{
CHECKNOASAPERROR;
#ifdef _OPENMP
#pragma omp parallel
#endif // _OPENMP
{
if (recalc.nblist)
UpdateNeighborList();
CalculateIDs();
CalculateSigmas(false); // Skip sigma2
CalculateEnergiesAfterSigmas(false); // Skip actual energy calc.
if (nelements > 1)
CalculateForcesAfterEnergies();
else
CalculateForcesAfterEnergiesSingle();
}
PROPAGATEASAPERROR;
}
}
void EMT::GetAtomicVolumes(vector &v)
{
v.resize(nAtoms);
const double fourpithird = 4.1887902048; // 4 * PI / 3
for (int i = 0; i < nAtoms; i++)
{
double s0 = parameters[id[i]]->seq;
v[i] = fourpithird * s0 * s0 * s0;
}
}
bool EMT::CalcReq_Energy(PyObject *pyatoms)
{
atoms->Begin(pyatoms);
bool required = (counters.energies != atoms->GetPositionsCounter());
atoms->End();
return required;
}
const vector &EMT::GetPotentialEnergies(PyObject *pyatoms)
{
USETIMER("EMT::GetPotentialEnergies");
DEBUGPRINT;
VERB(" Energies[");
ASSERT(atoms != NULL);
// Atoms may already be open if called from MonteCarloEMT object.
if (!skip_begin)
atoms->Begin(pyatoms);
else
skip_begin = false;
recalc.nblist = CheckNeighborList();
recalc.energies = (counters.energies != atoms->GetPositionsCounter());
if (recalc.energies)
{
recalc.ids = (counters.ids != atoms->GetPositionsCounter());
recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter());
recalc.sigma2 = (counters.sigma2 != atoms->GetPositionsCounter());
recalc.beforeforces = (counters.beforeforces !=
atoms->GetPositionsCounter());
DEBUGPRINT;
CalculateEnergies();
counters.energies = atoms->GetPositionsCounter();
counters.beforeforces = atoms->GetPositionsCounter();
}
else
{
ASSERT(counters.beforeforces == atoms->GetPositionsCounter());
ASSERT(recalc.nblist == false);
if(subtractE0)
for (int i = 0; i < nAtoms; i++)
Epot[i] = Ec[i] + Eas[i] - parameters[id[i]]->e0;
else
for (int i = 0; i < nAtoms; i++)
Epot[i] = Ec[i] + Eas[i];
VERB("-");
}
ASSERT(Epot.size() == nAtoms);
DEBUGPRINT;
VERB("]" << flush);
atoms->End();
return Epot;
}
void EMT::CalculateEnergies()
{
CHECKNOASAPERROR;
#ifdef _OPENMP
#pragma omp parallel
#endif // _OPENMP
{
if (recalc.nblist)
UpdateNeighborList();
CalculateIDs();
CalculateSigmas(true);
CalculateEnergiesAfterSigmas(true);
}
PROPAGATEASAPERROR;
}
double EMT::GetPotentialEnergy(PyObject *pyatoms)
{
USETIMER("EMT::GetPotentialEnergy");
VERB(" Energy[");
DEBUGPRINT;
double etot = 0.0;
const vector &e = GetPotentialEnergies(pyatoms);
for (int i = 0; i < nAtoms; i++)
etot += e[i];
DEBUGPRINT;
VERB("]" << flush);
return etot;
}
bool EMT::CheckNeighborList()
{
RETURNIFASAPERROR2(false);
USETIMER("EMT::CheckNeighborList");
DEBUGPRINT;
ASSERT(atoms != NULL);
bool update = (nblist == NULL) || nblist->IsInvalid(); // Update if invalid
if (!update && (counters.nblist != atoms->GetPositionsCounter()))
{
VERB("n");
update = nblist->CheckNeighborList();
}
// May communicate
update = atoms->UpdateBeforeCalculation(update,
rNbCut * (1 + driftfactor));
counters.nblist = atoms->GetPositionsCounter();
return update;
}
void EMT::UpdateNeighborList()
{
RETURNIFASAPERROR;
USETIMER("EMT::UpdateNeighborList");
DEBUGPRINT;
VERB("N");
DEBUGPRINT;
RETURNIFASAPERROR;
if (nblist)
{
DEBUGPRINT;
nblist->UpdateNeighborList();
RETURNIFASAPERROR;
#ifdef _OPENMP
#pragma omp single
#endif // _OPENMP
{
if ((nAtoms != atoms->GetNumberOfAtoms())
|| (nSize - nAtoms != atoms->GetNumberOfGhostAtoms()))
{
nAtoms = atoms->GetNumberOfAtoms();
nSize = nAtoms + atoms->GetNumberOfGhostAtoms();
ghostatoms = atoms->HasGhostAtoms();
Allocate();
}
}
}
else
{
#ifdef _OPENMP
#pragma omp barrier
#endif // _OPENMP
// First call, create the neighbor list.
DEBUGPRINT;
CreateNeighborList();
RETURNIFASAPERROR;
#ifdef _OPENMP
#pragma omp single
#endif // _OPENMP
{
nAtoms = atoms->GetNumberOfAtoms();
nSize = nAtoms + atoms->GetNumberOfGhostAtoms();
ghostatoms = atoms->HasGhostAtoms();
Allocate();
}
}
DEBUGPRINT;
}
// Will be replaced in MonteCarloEMT
void EMT::CreateNeighborList()
{
#ifdef ASAP_FOR_KIM
// This function is never called when used as an OpenKIM potential
// In that case, the NeighborList object does not exist, and this
// function does not compile.
throw AsapError("Internal inconsistency: EMT::CreateNeighborList called in KIM calculator.");
#else
RETURNIFASAPERROR;
MEMORY;
USETIMER("EMT::CreateNeighborList");
if (!initialized)
THROW_RETURN( AsapError("EMT object has not been initialized!") );
#ifdef _OPENMP
#pragma omp single
#endif // _OPENMP
{
PyAsap_NeighborLocatorObject *nbl;
nbl = PyAsap_NewNeighborList(atoms, rNbCut, driftfactor);
nblist = nbl->cobj;
nblist->verbose = verbose;
nblist_obj = (PyObject *) nbl;
MEMORY;
}
nblist->UpdateNeighborList();
MEMORY;
#endif // not ASAP_FOR_KIM
}
/// IDs are a recoding of the atomic numbers used as indices into
/// various arrays. Atomic numbers are not practical for this as they
/// are not contiguous.
void EMT::CalculateIDs()
{
RETURNIFASAPERROR;
USETIMER("EMT::CalculateIDs");
DEBUGPRINT;
if (!recalc.ids || nelements == 1)
return;
VERB("i");
DEBUGPRINT;
// If there is only one element, all IDs are 0 and do not have to be
// calculated. Otherwise the ID is the offset into the list of elements.
const asap_z_int *RESTRICT z = atoms->GetAtomicNumbers();
int nSize = this->nSize; // These two lines help optimization.
int *RESTRICT id = &(this->id)[0];
for (int i = 0; i < nelements; i++)
{
int zcand = parameters[i]->Z;
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int j = 0; j < nSize; j++)
if (z[j] == zcand)
id[j] = i;
}
#ifdef _OPENMP
#pragma omp master
#endif // _OPENMP
counters.ids = atoms->GetPositionsCounter();
DEBUGPRINT;
}
void EMT::CalculateSigmas(bool calculatesigma2)
{
RETURNIFASAPERROR;
USETIMER("EMT::CalculateSigmas");
DEBUGPRINT;
if (!(recalc.sigma1 || (calculatesigma2 && recalc.sigma2)))
return;
#ifdef _OPENMP
#pragma omp master
#endif // _OPENMP
{
if (calculatesigma2)
{
VERB("2");
}
else
{
VERB("1");
}
}
DEBUGPRINT;
int maxnblen = nblist->MaxNeighborListLength();
if (maxnblen > BUFLEN)
{
const Vec *cell = atoms->GetCell();
THROW_RETURN( AsapError("Neighborlist overrun (did you squeeze your atoms?).\n")
<< " Longest neighbor list is " << maxnblen << ".\n"
<< " Cell is " << cell[0] << ", " << cell[1] << ", " << cell[2] );
}
#ifdef ASAP_FOR_KIM
const int *contributes = atoms->GetParticleContributes();
ASSERT(nAtoms == nSize); // In OpenKIM, non-contributing atoms are not always at the end.
#endif
// Buffer data:
TinyMatrix nbatch(nelements,nelements);
TinyMatrix self(nelements,nelements);
TinyMatrix other(nelements,nelements);
TinyMatrix rnb(nelements,nelements);
TinyMatrix sqdist(nelements,nelements);
vector other_buf(BUFLEN);
vector rnb_buf(BUFLEN);
vector sqdist_buf(BUFLEN);
int nSize = this->nSize; // Helps optimization
DEBUGPRINT;
/* Set sigmas to zero */
for (int i = 0; i < nelements; i++)
{
double *RESTRICT s1 = &sigma1[i][0];
double *RESTRICT s2 = &sigma2[i][0];
#ifdef _OPENMP
#pragma omp for nowait
#endif // _OPENMP
for (int j = 0; j < nSize; j++)
{
s1[j] = 0.0;
s2[j] = 0.0;
}
}
#ifdef _OPENMP
#pragma omp barrier
#endif // _OPENMP
/* No atoms in batch pools */
for (int i = 0; i < nelements; i++)
for (int j = 0; j < nelements; j++)
nbatch[i][j] = 0;
// Loop over atoms
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int atom = 0; atom < nAtoms; atom++)
{
#ifdef ASAP_FOR_KIM
if (!contributes[atom]) continue; // Skip non-contributing atoms.
#endif
int zself = id[atom];
// Get neighbors and loop over them. Simplest if only one element
if (nelements == 1)
{
int nbat = nbatch[0][0]; // only element
int size = BUFLEN-nbat;
int n;
if (always_fullnblist)
n = nblist->GetFullNeighbors(atom, other[0][0]+nbat,
rnb[0][0]+nbat, sqdist[0][0]+nbat,
size);
else
n = nblist->GetNeighbors(atom, other[0][0]+nbat,
rnb[0][0]+nbat, sqdist[0][0]+nbat,
size);
ASSERT(size >= 0); // REMOVE LATER !!!
for (int i = nbat; i < nbat+n; i++)
self[0][0][i] = atom;
nbatch[0][0] += n;
}
else
{
int size = BUFLEN;
int n;
if (always_fullnblist)
n = nblist->GetFullNeighbors(atom, &other_buf[0], &rnb_buf[0],
&sqdist_buf[0], size);
else
n = nblist->GetNeighbors(atom, &other_buf[0], &rnb_buf[0],
&sqdist_buf[0], size);
ASSERT(size >= 0); // REMOVE LATER !!!
int *RESTRICT id = &(this->id)[0];
for (int i = 0; i < n; i++)
{
int zother = id[other_buf[i]];
int nbat = nbatch[zself][zother]++; // Count this atom
self[zself][zother][nbat] = atom;
other[zself][zother][nbat] = other_buf[i];
sqdist[zself][zother][nbat] = sqdist_buf[i];
}
}
// Now process any full batch
for (int zo = 0; zo < nelements; zo++)
if (nbatch[zself][zo] >= BUFLEN - maxnblen)
{
sigma_batch(self[zself][zo], other[zself][zo],
sqdist[zself][zo], zself, zo, nbatch[zself][zo],
calculatesigma2);
nbatch[zself][zo] = 0;
}
} // Loop over atoms
/* Process the remaining incomplete batches */
for (int zs = 0; zs < nelements; zs++)
for (int zo = 0; zo < nelements; zo++)
if (nbatch[zs][zo])
sigma_batch(self[zs][zo], other[zs][zo],
sqdist[zs][zo], zs, zo, nbatch[zs][zo],
calculatesigma2);
#ifdef _OPENMP
#pragma omp single
#endif // _OPENMP
{
sigma2isvalid = calculatesigma2; /* Remember if it was calculated. */
counters.sigma1 = atoms->GetPositionsCounter();
if (calculatesigma2)
counters.sigma2 = atoms->GetPositionsCounter();
}
}
void EMT::sigma_batch(int *RESTRICT self, int *RESTRICT other,
double *RESTRICT sq_dist, int zs, int zo, int n,
bool calculatesigma2, bool partialupdate /* = false */)
{
USETIMER("EMT::sigma_batch");
#ifdef ASAP_MKL
double *temporary = (double *) mkl_malloc(10 * BUFLEN * sizeof(double), ASAP_MKL_ALIGN);
#else // ASAP_MKL
double *temporary = new double[4*BUFLEN];
#endif //ASAP_MKL
double *RESTRICT dsigma1s = &temporary[0];
double *RESTRICT dsigma2s = dsigma1s + BUFLEN;
double *RESTRICT dsigma1o = dsigma2s + BUFLEN;
double *RESTRICT dsigma2o = dsigma1o + BUFLEN;
//ASSERT(dsigma2o - temporary == (4 - 1) * BUFLEN);
#ifdef ASAP_MKL
double *RESTRICT tmp_a = dsigma2o + BUFLEN;
double *RESTRICT tmp_b = tmp_a + BUFLEN;
double *RESTRICT tmp_c = tmp_b + BUFLEN;
double *RESTRICT tmp_d = tmp_c + BUFLEN;
double *RESTRICT tmp_e = tmp_d + BUFLEN;
double *RESTRICT tmp_f = tmp_e + BUFLEN;
//ASSERT(tmp_f - temporary == (10 - 1) * BUFLEN);
#endif //ASAP_MKL
double cutslopecutdist;
double other_eta2betaseq, self_eta2betaseq;
double other_kappaoverbeta, self_kappaoverbeta;
double other_kappaseq, self_kappaseq;
double *s1s, *s1o, *s2s, *s2o;
const emt_parameters *emtself, *emtother;
ASSERT(n <= BUFLEN);
/* Get EMT parameters REWRITE !!! XXXX */
emtself = parameters[zs];
emtother = parameters[zo];
double cutoffslope = this->cutoffslope;
cutslopecutdist = cutoffslope * rFermi;
other_eta2betaseq = emtother->eta2 * Beta * emtother->seq;
self_eta2betaseq = emtself->eta2 * Beta * emtself->seq;
other_kappaoverbeta = emtother->kappa / Beta;
self_kappaoverbeta = emtself->kappa / Beta;
other_kappaseq = emtother->kappa * emtother->seq;
self_kappaseq = emtself->kappa * emtself->seq;
double other_eta2 = emtother->eta2;
double self_eta2 = emtself->eta2;
ASSERT(n <= BUFLEN);
// We have four cases to handled, controlled by two booleans.
// 1) Whether sigma2 needs to be calculated, 2) Whether we need a
// different calculation for the two atoms interacting. The latter is
// the case if the atomic numbers are different, unless operating in
// full neighborlist mode or doing a partial update for MonteCarloEMT,
// in these cases calculations are not reused.
bool calculateother = (zs != zo) && !always_fullnblist && !partialupdate;
if (!calculateother && !calculatesigma2)
{
#ifdef ASAP_MKL
// dist[] = tmp_a[] = sqrt(sq_dist[])
vdSqrt(n, sq_dist, tmp_a);
for (int i = 0; i < n; i++)
{
tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
}
// tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
vdExp(n, tmp_b, tmp_a);
// tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
vdExp(n, tmp_c, tmp_b);
for (int i = 0; i < n; i++)
{
double wght = 1.0 / (1.0 + tmp_a[i]);
dsigma1s[i] = wght * tmp_b[i];
}
#else // ASAP_MKL
for (int i = 0; i < n; i++)
{
/* dist = sqrt(sq_dist), distances between atoms */
double dist = sqrt(sq_dist[i]);
/* Calculate weight factor (cutoff function) */
double wght = 1.0 / (1.0 + exp(cutoffslope * dist
- cutslopecutdist));
/* Contribution to sigma1 */
dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
}
#endif // ASAP_MKL
}
else if (calculateother && !calculatesigma2)
{
#ifdef ASAP_MKL
// dist[] = tmp_a[] = sqrt(sq_dist[])
vdSqrt(n, sq_dist, tmp_a);
for (int i = 0; i < n; i++)
{
tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
tmp_d[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq;
}
// tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
vdExp(n, tmp_b, tmp_a);
// tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
vdExp(n, tmp_c, tmp_b);
// tmp_c[] = exp(tmp_d[]) = exp(-self_eta2 * dist[] + self_eta2betaseq)
vdExp(n, tmp_d, tmp_c);
for (int i = 0; i < n; i++)
{
double wght = 1.0 / (1.0 + tmp_a[i]);
dsigma1s[i] = wght * tmp_b[i];
dsigma1o[i] = wght * tmp_c[i];
}
#else // ASAP_MKL
for (int i = 0; i < n; i++)
{
/* dist = sqrt(sq_dist), distances between atoms */
double dist = sqrt(sq_dist[i]);
/* Calculate weight factor (cutoff function) */
double wght = 1.0 / (1.0 + exp(cutoffslope * dist
- cutslopecutdist));
/* Contributions to sigma1 */
dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
dsigma1o[i] = wght * exp(-self_eta2 * dist + self_eta2betaseq);
}
#endif //ASAP_MKL
}
else if (!calculateother && calculatesigma2)
{
#ifdef ASAP_MKL
// dist[] = tmp_a[] = sqrt(sq_dist[])
vdSqrt(n, sq_dist, tmp_a);
for (int i = 0; i < n; i++)
{
tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
}
// tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
vdExp(n, tmp_b, tmp_a);
// tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
vdExp(n, tmp_c, tmp_b);
// tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
vdExp(n, tmp_d, tmp_c);
for (int i = 0; i < n; i++)
{
double wght = 1.0 / (1.0 + tmp_a[i]);
dsigma1s[i] = wght * tmp_b[i];
dsigma2s[i] = wght * tmp_c[i];
}
#else // ASAP_MKL
for (int i = 0; i < n; i++)
{
/* dist = sqrt(sq_dist), distances between atoms */
double dist = sqrt(sq_dist[i]);
/* Calculate weight factor (cutoff function) */
double wght = 1.0 / (1.0 + exp(cutoffslope * dist
- cutslopecutdist));
/* Contribution to sigma1 */
dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
dsigma2s[i] = wght * exp(-other_kappaoverbeta * dist
+ other_kappaseq);
}
#endif //ASAP_MKL
}
else // calculateother && calculatesigma2
{
#ifdef ASAP_MKL
// dist[] = tmp_a[] = sqrt(sq_dist[])
vdSqrt(n, sq_dist, tmp_a);
for (int i = 0; i < n; i++)
{
tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
tmp_d[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq;
tmp_e[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
tmp_f[i] = -self_kappaoverbeta * tmp_a[i] + self_kappaseq;
}
// tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
vdExp(n, tmp_b, tmp_a);
// tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
vdExp(n, tmp_c, tmp_b);
// tmp_c[] = exp(tmp_d[]) = exp(-self_eta2 * dist[] + self_eta2betaseq)
vdExp(n, tmp_d, tmp_c);
// tmp_d[] = exp(tmp_e[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
vdExp(n, tmp_e, tmp_d);
// tmp_e[] = exp(tmp_f[]) = exp(-self_kappaoverbeta * dist[] + self_kappaseq)
vdExp(n, tmp_f, tmp_e);
for (int i = 0; i < n; i++)
{
double wght = 1.0 / (1.0 + tmp_a[i]);
dsigma1s[i] = wght * tmp_b[i];
dsigma1o[i] = wght * tmp_c[i];
dsigma2s[i] = wght * tmp_d[i];
dsigma2o[i] = wght * tmp_e[i];
}
#else // ASAP_MKL
for (int i = 0; i < n; i++)
{
/* dist = sqrt(sq_dist), distances between atoms */
double dist = sqrt(sq_dist[i]);
/* Calculate weight factor (cutoff function) */
double wght = 1.0 / (1.0 + exp(cutoffslope * dist
- cutslopecutdist));
/* Contributions to sigma1 */
dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
dsigma1o[i] = wght * exp(-self_eta2 * dist + self_eta2betaseq);
dsigma2s[i] = wght * exp(-other_kappaoverbeta * dist
+ other_kappaseq);
dsigma2o[i] = wght * exp(-self_kappaoverbeta * dist + self_kappaseq);
}
#endif // ASAP_MKL
}
// Distribute the results to the atoms.
if (!partialupdate && !always_fullnblist)
{
// This is the normal branch, both atoms need to be updated.
if (calculatesigma2)
{
/* Distribute contributions to sigma1 and sigma2 */
s1o = &sigma1[zo][0];
s1s = &sigma1[zs][0];
s2o = &sigma2[zo][0];
s2s = &sigma2[zs][0];
#ifdef _OPENMP
#pragma omp critical
#endif // _OPENMP
{
if (calculateother)
{
for (int i = 0; i < n; i++)
{
int s = self[i];
int o = other[i];
s1o[s] += dsigma1s[i];
s2o[s] += dsigma2s[i];
if (o < nAtoms) // Dont add to ghost atoms
{
s1s[o] += dsigma1o[i];
s2s[o] += dsigma2o[i];
}
}
}
else
{
for (int i = 0; i < n; i++)
{
int s = self[i];
int o = other[i];
s1o[s] += dsigma1s[i];
s2o[s] += dsigma2s[i];
if (o < nAtoms) // Dont add to ghost atoms
{
s1s[o] += dsigma1s[i];
s2s[o] += dsigma2s[i];
}
}
}
}
}
else
{
/* Distribute contributions to sigma1. */
s1o = &sigma1[zo][0];
s1s = &sigma1[zs][0];
#ifdef _OPENMP
#pragma omp critical
#endif // _OPENMP
{
if (calculateother)
{
for (int i = 0; i < n; i++)
{
int s = self[i];
int o = other[i];
s1o[s] += dsigma1s[i];
if (o < nAtoms)
s1s[o] += dsigma1o[i];
}
}
else
{
for (int i = 0; i < n; i++)
{
int s = self[i];
int o = other[i];
s1o[s] += dsigma1s[i];
if (o < nAtoms)
s1s[o] += dsigma1s[i];
}
}
}
}
}
else // partialupdate OR always_fullnblist
{
// This branch may be taken by MonteCarloEMT during a partial update,
// or by the normal EMT when not using Minimum Image Convention. Since
// full neighbor lists are used, only update the
// 'self' atom, not the 'other' atom.
// In OpenKIM, this branch is always taken.
if (calculatesigma2)
{
/* Distribute contributions to sigma1 and sigma2 */
s1o = &sigma1[zo][0];
s2o = &sigma2[zo][0];
#ifdef _OPENMP
#pragma omp critical
#endif // _OPENMP
{
for (int i = 0; i < n; i++)
{
int s = self[i];
s1o[s] += dsigma1s[i];
s2o[s] += dsigma2s[i];
}
}
}
else
{
/* Distribute contributions to sigma1 only */
s1o = &sigma1[zo][0];
#ifdef _OPENMP
#pragma omp critical
#endif // _OPENMP
{
for (int i = 0; i < n; i++)
{
int s = self[i];
s1o[s] += dsigma1s[i];
}
}
}
}
#ifdef ASAP_MKL
mkl_free(temporary);
#else
delete[] temporary;
#endif
}
// Calculate the energies if calc_Epot is true, otherwise just calculate the
// derivatives needed for the forces.
void EMT::CalculateEnergiesAfterSigmas(bool calc_Epot)
{
RETURNIFASAPERROR;
USETIMER("EMT::CalculateEnergiesAfterSigmas");
DEBUGPRINT;
//vector sigma(nSize);
double *RESTRICT sigma = &tmp_double[0];
bool dosigmapart = (recalc.beforeforces || (calc_Epot && recalc.energies));
bool doepotpart = (calc_Epot && recalc.energies);
int zs, zo;
double s;
// Better performance if static ???
ASSERT(nelements < NMAXELEMENTS);
double inv12gamma1[NMAXELEMENTS];
double neginvbetaeta2[NMAXELEMENTS];
double neglambda[NMAXELEMENTS];
double lambdaseq[NMAXELEMENTS];
double negkappa[NMAXELEMENTS];
double kappaseq[NMAXELEMENTS];
double nege0lambdalambda[NMAXELEMENTS];
double e0lambdalambdaseq[NMAXELEMENTS];
double neg6v0kappa[NMAXELEMENTS];
double e0lambda[NMAXELEMENTS];
double eccnst[NMAXELEMENTS];
double sixv0[NMAXELEMENTS];
double neghalfv0overgamma2[NMAXELEMENTS];
double seq[NMAXELEMENTS];
int *id = &(this->id)[0];
if (dosigmapart)
{
#ifdef _OPENMP
#pragma omp master
#endif // _OPENMP
{
VERB("b");
DEBUGPRINT;
}
/* Calculate total sigma1 */
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = 0; i < nAtoms; i++)
{
double s = 0.0;
int zs = id[i];
for (int zo = 0; zo < nelements; zo++)
s += (*chi)[zs][zo] * sigma1[zo][i];
if (s < 1.0e-40)
s = 1.0e-40;
sigma[i] = s;
}
ASSERT(nAtoms == radius.size() && nAtoms == Ec.size() &&
nSize == dEds.size());
}
/* Calculate conbinations of EMT parameters */
for (int i = 0; i < nelements; i++)
{
inv12gamma1[i] = 1.0 / (12.0 * parameters[i]->gamma1);
neginvbetaeta2[i] = -1.0 / (Beta * parameters[i]->eta2);
neglambda[i] = - parameters[i]->lambda;
lambdaseq[i] = parameters[i]->lambda * parameters[i]->seq;
negkappa[i] = - parameters[i]->kappa;
kappaseq[i] = parameters[i]->kappa * parameters[i]->seq;
nege0lambdalambda[i] = - parameters[i]->e0 * parameters[i]->lambda *
parameters[i]->lambda;
e0lambdalambdaseq[i] = parameters[i]->e0 * parameters[i]->lambda *
parameters[i]->lambda * parameters[i]->seq;
neg6v0kappa[i] = - 6.0 * parameters[i]->V0 * parameters[i]->kappa;
e0lambda[i] = parameters[i]->e0 * parameters[i]->lambda;
eccnst[i] = parameters[i]->e0 * (1.0 - parameters[i]->lambda *
parameters[i]->seq);
sixv0[i] = 6.0 * parameters[i]->V0;
neghalfv0overgamma2[i] = -0.5 * parameters[i]->V0 /
parameters[i]->gamma2;
seq[i] = parameters[i]->seq;
}
int nAtoms = this->nAtoms; // Helps optimization.
#define LOCALPTR(XX) double *RESTRICT XX = &(this->XX)[0]
LOCALPTR(Ec);
LOCALPTR(Eas);
LOCALPTR(Epot);
LOCALPTR(radius);
LOCALPTR(dEds);
LOCALPTR(ex2);
if (dosigmapart)
{
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = 0; i < nAtoms; i++)
{
asap_z_int z = id[i]; // Actually not Z (atomic no) but ID no.
double r = seq[z] + neginvbetaeta2[z] * log(sigma[i] * inv12gamma1[z]);
radius[i] = r;
double ex1 = exp(neglambda[z] * r + lambdaseq[z]);
double e2 = exp(negkappa[z] * r + kappaseq[z]);
ex2[i] = e2;
double tmp = (nege0lambdalambda[z] * r + e0lambdalambdaseq[z]) * ex1
+ neg6v0kappa[z] * e2;
dEds[i] = tmp * neginvbetaeta2[z] / sigma[i];
Ec[i] = (e0lambda[z] * r + eccnst[z]) * ex1;
}
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = nAtoms; i < nSize; i++)
dEds[i] = 0.0;
}
if (calc_Epot)
{
DEBUGPRINT;
if (doepotpart)
{
#ifdef _OPENMP
#pragma omp master
#endif // _OPENMP
{
VERB("e");
DEBUGPRINT;
}
/* We also need Eas, but only for the real atoms */
ASSERT(sigma2isvalid);
ASSERT(counters.sigma2 == atoms->GetPositionsCounter());
/* Calculate total sigma2 */
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = 0; i < nAtoms; i++)
{
s = 0.0;
zs = id[i];
for (zo = 0; zo < nelements; zo++)
s += (*chi)[zs][zo] * sigma2[zo][i];
Eas[i] = sixv0[zs] * ex2[i] + neghalfv0overgamma2[zs] * s;
}
}
/* Add the energies */
DEBUGPRINT;
if(subtractE0)
{
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = 0; i < nAtoms; i++)
Epot[i] = Ec[i] + Eas[i] - parameters[id[i]]->e0;
}
else
{
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = 0; i < nAtoms; i++)
Epot[i] = Ec[i] + Eas[i];
}
} // if (calc_Epot)
#ifdef ASAP_FOR_KIM
// In OpenKIM, nAtoms == nSize and the non-contributing atoms had
// their energies calculates. Zap them.
const int *contributes = atoms->GetParticleContributes();
for (int i = 0; i < nAtoms; i++)
if (!contributes[i])
Epot[i] = Ec[i] = Eas[i] = dEds[i] = 0;
#endif // ASAP_FOR_KIM
DEBUGPRINT;
}
void EMT::CalculateForcesAfterEnergies()
{
RETURNIFASAPERROR;
if (!(recalc.forces || (virials.size() && recalc.virials)))
return;
#ifdef _OPENMP
#pragma omp master
#endif // _OPENMP
{
VERB("f");
if (virials.size())
{
VERB("s");
}
}
#ifdef _OPENMP
#pragma omp barrier
#endif // _OPENMP
DEBUGPRINT;
int maxnblen = nblist->MaxNeighborListLength();
// Buffer data:
TinyMatrix nbatch(nelements,nelements);
TinyMatrixOfVector > self(nelements,nelements, BUFLEN);
TinyMatrixOfVector > other(nelements,nelements, BUFLEN);
TinyMatrixOfVector > rnb(nelements,nelements, BUFLEN);
TinyMatrixOfVector > sqdist(nelements,nelements, BUFLEN);
TinyMatrixOfVector > dEdss(nelements,nelements, BUFLEN);
TinyMatrixOfVector > dEdso(nelements,nelements, BUFLEN);
vector other_buf(BUFLEN);
vector rnb_buf(BUFLEN);
vector sqdist_buf(BUFLEN);
#ifdef ASAP_FOR_KIM
const int *contributes = atoms->GetParticleContributes();
#endif
// If there is only one element, CalculateForcesAfterEnergiesSingle should
// be used.
ASSERT(nelements > 1);
int nSize = this->nSize; // Helps optimization.
Vec *RESTRICT force = &(this->force)[0];
/* Set forces and virials to zero */
if (virials.size())
{
ASSERT(virials.size() == nSize);
#ifdef _OPENMP
#pragma omp for nowait
#endif // _OPENMP
for (int i = 0; i < nSize; i++)
for (int j = 0; j < 6; j++)
virials[i][j] = 0.0;
}
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = 0; i < nSize; i++)
force[i].x = force[i].y = force[i].z = 0.0;
/* Calculate forces */
/* No atoms in batch pools */
for (int i = 0; i < nelements; i++)
for (int j = 0; j < nelements; j++)
nbatch[i][j] = 0;
// Loop over atoms
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int atom = 0; atom < nAtoms; atom++) {
#ifdef ASAP_FOR_KIM
if (!contributes[atom]) continue; // Skip non-contributing atom in OpenKIM, where nAtoms == nSize.
#endif
int zself = id[atom];
// Get neighbors and loop over them.
int size = BUFLEN;
int n;
if (always_fullnblist)
n = nblist->GetFullNeighbors(atom, &other_buf[0], &rnb_buf[0],
&sqdist_buf[0], size);
else
n = nblist->GetNeighbors(atom, &other_buf[0], &rnb_buf[0],
&sqdist_buf[0], size);
ASSERT(size >= 0); // REMOVE LATER
for (int i = 0; i < n; i++) {
int zother = id[other_buf[i]];
int nbat = nbatch[zself][zother]++; // Count this atom
self[zself][zother][nbat] = atom;
other[zself][zother][nbat] = other_buf[i];
rnb[zself][zother][nbat][0] = rnb_buf[i][0];
rnb[zself][zother][nbat][1] = rnb_buf[i][1];
rnb[zself][zother][nbat][2] = rnb_buf[i][2];
sqdist[zself][zother][nbat] = sqdist_buf[i];
dEdss[zself][zother][nbat] = dEds[atom];
dEdso[zself][zother][nbat] = dEds[other_buf[i]];
}
// Now process any full batch
for (int zo = 0; zo < nelements; zo++)
if (nbatch[zself][zo] >= BUFLEN - maxnblen) {
force_batch(&self[zself][zo][0], &other[zself][zo][0],
&rnb[zself][zo][0],
&sqdist[zself][zo][0], &dEdss[zself][zo][0],
&dEdso[zself][zo][0], zself, zo, nbatch[zself][zo]);
nbatch[zself][zo] = 0;
}
} // Loop over atoms
/* Process the remaining incomplete batches */
for (int zs = 0; zs < nelements; zs++)
for (int zo = 0; zo < nelements; zo++)
if (nbatch[zs][zo])
force_batch(&self[zs][zo][0], &other[zs][zo][0], &rnb[zs][zo][0],
&sqdist[zs][zo][0], &dEdss[zs][zo][0],
&dEdso[zs][zo][0], zs, zo, nbatch[zs][zo]);
DEBUGPRINT;
#ifdef _OPENMP
#pragma omp barrier
#endif // _OPENMP
}
void EMT::CalculateForcesAfterEnergiesSingle()
{
RETURNIFASAPERROR;
USETIMER("EMT::CalculateForcesAfterEnergiesSingle");
if (!(recalc.forces || (virials.size() && recalc.virials)))
return;
#ifdef _OPENMP
#pragma omp master
#endif // _OPENMP
{
VERB("f");
if (virials.size())
{
VERB("s");
}
}
#ifdef _OPENMP
#pragma omp barrier
#endif // _OPENMP
int maxnblen = nblist->MaxNeighborListLength();
DEBUGPRINT;
// Buffer data:
int nbatch;
vector self(BUFLEN);
vector other(BUFLEN);
vector rnb(BUFLEN);
vector sqdist(BUFLEN);
vector dEdss(BUFLEN);
vector dEdso(BUFLEN);
#ifdef ASAP_FOR_KIM
const int *contributes = atoms->GetParticleContributes();
#endif
int nSize = this->nSize; // These three lines help optimization.
int nAtoms = this->nAtoms;
Vec *RESTRICT force = &(this->force)[0];
DEBUGPRINT;
// If there is more than one element, CalculateForcesAfterEnergies should
// be used.
ASSERT(nelements == 1);
/* Set forces and virials to zero */
ASSERT(this->force.size() >= nSize);
if (virials.size())
{
ASSERT(virials.size() == nSize);
#ifdef _OPENMP
#pragma omp for nowait
#endif // _OPENMP
for (int i = 0; i < nSize; i++)
for (int j = 0; j < 6; j++)
virials[i][j] = 0.0;
}
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int i = 0; i < nSize; i++)
force[i].x = force[i].y = force[i].z = 0.0;
/* Calculate forces */
/* No atoms in batch pool */
DEBUGPRINT;
nbatch = 0;
// Loop over atoms
DEBUGPRINT;
#ifdef _OPENMP
#pragma omp for
#endif // _OPENMP
for (int atom = 0; atom < nAtoms; atom++) {
#ifdef ASAP_FOR_KIM
if (!contributes[atom]) continue; // Skip non-contributing atom in OpenKIM, where nAtoms == nSize.
#endif
// Get neighbors and loop over them.
int size = BUFLEN-nbatch;
int n;
if (always_fullnblist)
n = nblist->GetFullNeighbors(atom, &other[nbatch], &rnb[nbatch],
&sqdist[nbatch], size);
else
n = nblist->GetNeighbors(atom, &other[nbatch], &rnb[nbatch],
&sqdist[nbatch], size);
double dEdsatom = dEds[atom];
for (int i = nbatch; i < nbatch+n; i++) {
self[i] = atom;
dEdss[i] = dEdsatom;
dEdso[i] = dEds[other[i]];
}
nbatch += n;
// Now process any full batch
if (nbatch >= BUFLEN - maxnblen) {
force_batch(&self[0], &other[0], &rnb[0], &sqdist[0], &dEdss[0],
&dEdso[0], 0, 0, nbatch);
nbatch = 0;
}
} // Loop over atoms
/* Process the remaining incomplete batch */
DEBUGPRINT;
if (nbatch)
force_batch(&self[0], &other[0], &rnb[0], &sqdist[0], &dEdss[0],
&dEdso[0], 0, 0, nbatch);
DEBUGPRINT;
#ifdef _OPENMP
#pragma omp barrier
#endif // _OPENMP
}
// It would be natural to transfer the arrays as vector<> instead of
// as pointes, but that confuses the compiler so it cannot discover
// that various arrays are disjunct, ruining optimization. For the
// same reason, the temporary array is not a vector<>.
void EMT::force_batch(const int *RESTRICT self, const int *RESTRICT other,
const Vec *RESTRICT rnb, const double *RESTRICT sq_dist,
const double *RESTRICT dEdss, const double *RESTRICT dEdso,
int zs, int zo, int n)
{
USETIMER("EMT::force_batch");
#ifdef ASAP_MKL
double *temporary = (double *) mkl_malloc(8 * BUFLEN * sizeof(double), ASAP_MKL_ALIGN);
double *RESTRICT df = temporary;
double *RESTRICT tmp_a = df + BUFLEN;
double *RESTRICT tmp_b = tmp_a + BUFLEN;
double *RESTRICT tmp_c = tmp_b + BUFLEN;
double *RESTRICT tmp_d = tmp_c + BUFLEN;
double *RESTRICT tmp_e = tmp_d + BUFLEN;
double *RESTRICT tmp_f = tmp_e + BUFLEN;
double *RESTRICT tmp_g = tmp_f + BUFLEN;
//ASSERT(tmp_g - temporary == (8 - 1) * BUFLEN);
#else // ASAP_MKL
double *temporary = new double[BUFLEN];
double *RESTRICT df = temporary;
#endif // ASAP_MKL
double cutslopecutdist, other_eta2betaseq, other_kappaoverbeta;
double other_kappaseq, self_eta2betaseq, self_kappaoverbeta;
double self_kappaseq, cnst_s, cnst_o;
const emt_parameters *emtself, *emtother;
//double pairA, exprcut, pairD;
//double *tmp;
ASSERT(n <= BUFLEN);
/* Get EMT parameters */
emtself = parameters[zs];
emtother = parameters[zo];
cutslopecutdist = cutoffslope * rFermi;
other_eta2betaseq = emtother->eta2 * Beta * emtother->seq;
other_kappaoverbeta = emtother->kappa / Beta;
other_kappaseq = emtother->kappa * emtother->seq;
self_eta2betaseq = emtself->eta2 * Beta * emtself->seq;
self_kappaoverbeta = emtself->kappa / Beta;
self_kappaseq = emtself->kappa * emtself->seq;
double other_eta2 = emtother->eta2;
double self_eta2 = emtself->eta2;
cnst_s = -0.5 * emtself->V0 * (*chi)[zs][zo] / emtself->gamma2;
cnst_o = -0.5 * emtother->V0 * (*chi)[zo][zs] / emtother->gamma2;
double chi_zs_zo = (*chi)[zs][zo];
double chi_zo_zs = (*chi)[zo][zs];
// Three cases: 1) The two atoms have the same atomic number,
// 2) they have different atomic number (less reuse of calculations)
// 3) we use full neighbor lists (no reuse).
if ((zs == zo) && !always_fullnblist)
{
#ifdef ASAP_MKL
/* Get the distances from their squares */
// dist[] = tmp_a[] = sqrt(sq_dist[])
vdSqrt(n, sq_dist, tmp_a);
// invdist[] = tmp_e[] = 1.0 / dist[]
vdInv(n, tmp_a, tmp_e);
for (int i = 0; i < n; i++)
{
tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
}
// tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
vdExp(n, tmp_b, tmp_a);
// tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
vdExp(n, tmp_c, tmp_b);
// tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
vdExp(n, tmp_d, tmp_c);
for (int i = 0; i < n; i++)
{
/* Calculate cutoff function (weight factor) */
double wght = 1.0 / (1.0 + tmp_a[i]);
/* Calculate derivative of the cutoff function. */
double dwdr = -cutoffslope * wght * (1 - wght);
double dsigma1dr = (dwdr - wght * other_eta2) * tmp_b[i];
double dsigma2dr = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i];
df[i] = tmp_e[i] * (dsigma1dr * dEdss[i] * chi_zs_zo
+ cnst_s * dsigma2dr
+ dsigma1dr * dEdso[i] * chi_zo_zs
+ cnst_o * dsigma2dr * (other[i] < nAtoms));
}
#else //ASAP_MKL
for (int i = 0; i < n; i++)
{
/* Get the distances from their squares */
double dist = sqrt(sq_dist[i]);
double inv_dist = 1.0 / dist;
/* Calculate cutoff function (weight factor) */
double wght = 1.0 / (1.0 + exp(cutoffslope * dist
- cutslopecutdist));
/* Calculate derivative of the cutoff function. */
double dwdr = -cutoffslope * wght * (1 - wght);
double dsigma1dr = (dwdr - wght * other_eta2) *
exp(-other_eta2 * dist + other_eta2betaseq);
double dsigma2dr = (dwdr + wght * -other_kappaoverbeta) *
exp(-other_kappaoverbeta * dist + other_kappaseq);
df[i] = inv_dist * (dsigma1dr * dEdss[i] * chi_zs_zo
+ cnst_s * dsigma2dr // * (self[i] < nAtoms) always true
+ dsigma1dr * dEdso[i] * chi_zo_zs
+ cnst_o * dsigma2dr * (other[i] < nAtoms));
}
#endif //ASAP_MKL
}
else if (!always_fullnblist)
{
// zs != zo.
#ifdef ASAP_MKL
/* Get the distances from their squares */
// dist[] = tmp_a[] = sqrt(sq_dist[])
vdSqrt(n, sq_dist, tmp_a);
// invdist[] = tmp_g[] = 1.0 / dist[]
vdInv(n, tmp_a, tmp_g);
for (int i = 0; i < n; i++)
{
tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
tmp_e[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq;
tmp_f[i] = -self_kappaoverbeta * tmp_a[i] + self_kappaseq;
}
// tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
vdExp(n, tmp_b, tmp_a);
// tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
vdExp(n, tmp_c, tmp_b);
// tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
vdExp(n, tmp_d, tmp_c);
// tmp_d[] = exp(tmp_e[]) = exp(-self_eta2 * dist[] + self_eta2betaseq)
vdExp(n, tmp_e, tmp_d);
// tmp_e[] = exp(tmp_f[]) = exp(-self_kappaoverbeta * dist[] + self_kappaseq)
vdExp(n, tmp_f, tmp_e);
for (int i = 0; i < n; i++)
{
/* Calculate cutoff function (weight factor) */
double wght = 1.0 / (1.0 + tmp_a[i]);
/* Calculate derivative of the cutoff function. */
double dwdr = -cutoffslope * wght * (1 - wght);
double dsigma1dr_o = (dwdr - wght * other_eta2) * tmp_b[i];
double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i];
double dsigma1dr_s = (dwdr - wght * self_eta2) * tmp_d[i];
double dsigma2dr_s = (dwdr + wght * -self_kappaoverbeta) * tmp_e[i];
df[i] = tmp_g[i] * (dsigma1dr_o * dEdss[i] * chi_zs_zo
+ cnst_s * dsigma2dr_o
+ dsigma1dr_s * dEdso[i] * chi_zo_zs
+ cnst_o * dsigma2dr_s * (other[i] < nAtoms));
}
#else // ASAP_MKL
for (int i = 0; i < n; i++)
{
/* Get the distances from their squares */
double dist = sqrt(sq_dist[i]);
double inv_dist = 1.0 / dist;
/* Calculate cutoff function (weight factor) */
double wght = 1.0 / (1.0 + exp(cutoffslope * dist -
cutslopecutdist));
/* Calculate derivative of the cutoff function. */
double dwdr = -cutoffslope * wght * (1 - wght);
double dsigma1dr_o = (dwdr - wght * other_eta2) *
exp(-other_eta2 * dist + other_eta2betaseq);
double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) *
exp(-other_kappaoverbeta * dist + other_kappaseq);
double dsigma1dr_s = (dwdr - wght * self_eta2) *
exp(-self_eta2 * dist + self_eta2betaseq);
double dsigma2dr_s = (dwdr + wght * -self_kappaoverbeta) *
exp(-self_kappaoverbeta * dist + self_kappaseq);
df[i] = inv_dist * (dsigma1dr_o * dEdss[i] * chi_zs_zo
+ cnst_s * dsigma2dr_o // * (self[i] < nAtoms) always true!
+ dsigma1dr_s * dEdso[i] * chi_zo_zs
+ cnst_o * dsigma2dr_s * (other[i] < nAtoms));
}
#endif //ASAP_MKL
}
else
{
// Using full neighbor lists
#ifdef ASAP_MKL
/* Get the distances from their squares */
// dist[] = tmp_a[] = sqrt(sq_dist[])
vdSqrt(n, sq_dist, tmp_a);
// invdist[] = tmp_e[] = 1.0 / dist[]
vdInv(n, tmp_a, tmp_e);
for (int i = 0; i < n; i++)
{
tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
}
// tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
vdExp(n, tmp_b, tmp_a);
// tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
vdExp(n, tmp_c, tmp_b);
// tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
vdExp(n, tmp_d, tmp_c);
for (int i = 0; i < n; i++)
{
/* Calculate cutoff function (weight factor) */
double wght = 1.0 / (1.0 + tmp_a[i]);
/* Calculate derivative of the cutoff function. */
double dwdr = -cutoffslope * wght * (1 - wght);
double dsigma1dr_o = (dwdr - wght * other_eta2) * tmp_b[i];
double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i];
df[i] = tmp_e[i] * (dsigma1dr_o * dEdss[i] * chi_zs_zo
+ cnst_s * dsigma2dr_o);
}
#else // ASAP_MKL
for (int i = 0; i < n; i++)
{
/* Get the distances from their squares */
double dist = sqrt(sq_dist[i]);
double inv_dist = 1.0 / dist;
/* Calculate cutoff function (weight factor) */
double wght = 1.0 / (1.0 + exp(cutoffslope * dist -
cutslopecutdist));
/* Calculate derivative of the cutoff function. */
double dwdr = -cutoffslope * wght * (1 - wght);
double dsigma1dr_o = (dwdr - wght * other_eta2) *
exp(-other_eta2 * dist + other_eta2betaseq);
double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) *
exp(-other_kappaoverbeta * dist + other_kappaseq);
df[i] = inv_dist * (dsigma1dr_o * dEdss[i] * chi_zs_zo
+ cnst_s * dsigma2dr_o);
}
#endif // ASAP_MKL
}
distribute_force(self, other, df, rnb, n);
#ifdef ASAP_MKL
mkl_free(temporary);
#else
delete[] temporary;
#endif
}
void EMT::distribute_force(const int *RESTRICT self, const int *RESTRICT other,
const double *RESTRICT df, const Vec *RESTRICT rnb, int n)
{
/* Distribute force contributions */
#ifdef _OPENMP
#pragma omp critical
#endif // _OPENMP
{
for (int i = 0; i < n; i++)
for (int j = 0; j < 3; j++)
{
double dfx = df[i] * rnb[i][j];
force[self[i]][j] += dfx;
force[other[i]][j] -= dfx; // force has been extended to nSize.
}
/* Distribute virials contributions */
if (virials.size())
{
for (int i = 0; i < n; i++)
for (int alpha = 0; alpha < 3; alpha++)
for (int beta = alpha; beta < 3; beta++)
{
double dsx = 0.5 * df[i] * rnb[i][alpha] * rnb[i][beta];
int ii = stresscomp[alpha][beta];
virials[self[i]][ii] += dsx;
virials[other[i]][ii] += dsx;
}
}
}
}
double EMT::GetLatticeConstant() const
{
ASSERT(singleelement != 0);
return Beta * singleelement->seq * sqrt(2.0);
}
void EMT::PrintParameters()
{
for (int i = 0; i < nelements; i++)
{
const emt_parameters *p = parameters[i];
cerr << endl;
cerr << "Parameters for element " << i << " (" << p->name << ", Z=" << p->Z << ")" << endl;
cerr << "E0:" << p->e0 << " s0:" << p->seq << " V0:" << p->V0 <<
" eta2:" << p->eta2 << " kappa:" << p->kappa << " lambda:" << p->lambda <<
" rFermi:" << rFermi << " cutSlope" << cutoffslope <<
" gamma1:" << p->gamma1 << " gamma2:" << p->gamma2 << endl <<
endl;
cerr << "Chi:";
for (int j = 0; j < nelements; j++)
cerr << " " << (*chi)[i][j];
cerr << endl;
}
}
void EMT::BoundaryConditionsChanged()
{
if (nblist)
nblist->Invalidate();
}