// EMT.cpp -- Implements the EMT potential for multiple elements. // Copyright (C) 2008-2011 Jakob Schiotz and Center for Individual // Nanoparticle Functionality, Department of Physics, Technical // University of Denmark. Email: schiotz@fysik.dtu.dk // // This file is part of Asap version 3. // Asap is released under the GNU Lesser Public License (LGPL) version 3. // However, the parts of Asap distributed within the OpenKIM project // (including this file) are also released under the Common Development // and Distribution License (CDDL) version 1.0. // // This program is free software: you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public License // version 3 as published by the Free Software Foundation. Permission // to use other versions of the GNU Lesser General Public License may // granted by Jakob Schiotz or the head of department of the // Department of Physics, Technical University of Denmark, as // described in section 14 of the GNU General Public License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // and the GNU Lesser Public License along with this program. If not, // see . // Note: The macro ASAP_FOR_KIM is undefined when this file is // compiled as part of Asap, but is defined when it is part of the // OpenKIM model driver. #include "EMT.h" #include "Asap.h" #include "EMTParameterProvider.h" #include "Atoms.h" #include "Vec.h" #include "NeighborLocator.h" #ifndef ASAP_FOR_KIM #include "NeighborList.h" // Not present in OpenKIM #endif #include "Exception.h" #include "Timing.h" #include "mass.h" #include "Debug.h" #include #include #include #include #include #include #ifdef ASAP_MKL #include "mkl.h" #endif //ASAP_MKL using std::vector; using std::set; using std::cerr; using std::endl; using std::flush; using std::sort; #undef INTERLEAVETHREADS #if 1 #define VERB(x) if (verbose == 1) cerr << x #else #define VERB(x) #endif using namespace std; // Standard mapping of the six independent parts of the stress tensor to // vector notation const static int stresscomp[3][3] = {{0, 5, 4}, {5, 1, 3}, {4, 3, 2}}; EMT::EMT(PyObject *self, PyObject *prov, int verbose) : Potential(self, verbose) { CONSTRUCTOR; // Extract the parameter provider from the Python object, and store // the Python object. if (prov != NULL) { provider = ((PyAsap_EMTParamProvObject *) prov)->cobj; provider_obj = prov; Py_INCREF(provider_obj); } else { provider_obj = NULL; provider = NULL; } DEBUGPRINT; nblist = NULL; nblist_obj = NULL; driftfactor = 0.05; // Drift factor for the neighbor list. initialized = false; always_fullnblist = false; atoms = NULL; ghostatoms = false; nAtoms = 0; nSize = 0; #ifdef ASAP_FOR_KIM // Compiling EMT as an OpenKIM model // OpenKIM assumes that the potential energy is zero at infinite separation. // subtractE0 = false; subtractE0 = false; #else // Asap defaults to using the fcc crystal as zero for the potential energy. subtractE0 = true; #endif counters.ids = counters.nblist = counters.sigma1 = counters.sigma2 = counters.energies = counters.forces = counters.virials = counters.beforeforces = 0; recalc.ids = recalc.nblist = recalc.sigma1 = recalc.sigma2 = recalc.energies = recalc.forces = recalc.virials = recalc.beforeforces = false; skip_begin = false; DEBUGPRINT; } /// If an EMTParameterProvider was given when constructing the EMT /// object, it will NOT be deleted, but any default parameter provider /// and the automatically generated neigbor list will be deleted. /// XXXX: Handle this with Python refcounting. EMT::~EMT() { DESTRUCTOR; Py_XDECREF(provider_obj); Py_XDECREF(nblist_obj); if (atoms != NULL) AsapAtoms_DECREF(atoms); } string EMT::GetRepresentation() const { char buffer[50]; sprintf(buffer, "0x%p", this); return "GetName() + ") potential object at " + buffer + ">"; } long EMT::PrintMemory() const { long atomsmem = 0; if (atoms != NULL) atomsmem = atoms->PrintMemory(); long mem = 0; // Count the big stuff. for (vector >::const_iterator i = sigma1.begin(); i != sigma1.end(); i++) mem += i->size() * sizeof(int); for (vector >::const_iterator i = sigma2.begin(); i != sigma2.end(); i++) mem += i->size() * sizeof(int); mem += Ec.size() * sizeof(double); mem += Eas.size() * sizeof(double); mem += Epot.size() * sizeof(double); mem += radius.size() * sizeof(double); mem += dEds.size() * sizeof(double); mem += force.size() * sizeof(Vec); mem += virials.size() * sizeof(SymTensor); mem += tmp_double.size() * sizeof(double); mem += id.size() * sizeof(int); mem = (mem + 512*1024) / (1024*1024); char buffer[500]; snprintf(buffer, 500, "*MEM* EMT %ld MB. [ sizeof(int)=%ld sizeof(double)=%ld ]", mem, (long) sizeof(int), (long) sizeof(double)); cerr << buffer << endl; if (nblist != NULL) mem += nblist->PrintMemory(); return mem + atomsmem; } void EMT::SetAtoms(PyObject *pyatoms, Atoms* accessobj /* = NULL */) { DEBUGPRINT; VERB("SetAtoms "); if (atoms != NULL) { // SetAtoms should only do anything the first time it is called. // Subsequent calls should just check for accessobj being NULL. if (accessobj != NULL && accessobj != atoms) throw AsapError("EMT::SetAtoms called multiple times with accessobj != NULL"); // If SetAtoms is called multiple times, it should only check that no new element // is introduced. set elements_set; atoms->Begin(pyatoms); atoms->GetListOfElements(elements_set); atoms->End(); set parameters_set; for (int i = 0; i < parameters.size(); i++) { parameters_set.insert(parameters[i]->Z); } for (set::iterator i = elements_set.begin(); i != elements_set.end(); i++) if (parameters_set.find(*i) == parameters_set.end()) throw AsapError("You cannot introduce a new element after initializing EMT calculator: Z=") << *i; return; } // The first time SetAtoms is being called some initialization is done. if (accessobj != NULL) { atoms = accessobj; AsapAtoms_INCREF(atoms); } else atoms = new NormalAtoms(); ASSERT(atoms != NULL); atoms->Begin(pyatoms); nAtoms = atoms->GetNumberOfAtoms(); nSize = nAtoms; InitParameters(); initialized = true; if (nelements == 1) { singleelement = parameters[0]; } else { singleelement = 0; } atoms->End(); DEBUGPRINT; } void EMT::Allocate() { RETURNIFASAPERROR; USETIMER("EMT::Allocate"); DEBUGPRINT; VERB(" Allocate[" << nAtoms << "," << nSize << "]" << flush); // WARNING: Resizing the vector may allocate way too much memory. It // appears that calling reserve solves this problem. For efficiency, // reserve is called with 5% extra space. This is only necessary if the // atoms have ghosts, otherwise no reallocation will happen. // First, check if reallocation is necessary. if (nSize != force.size() || nAtoms != Eas.size()) { DEBUGPRINT; /* Resize/intialize the internal variables. */ sigma1.resize(nelements); sigma2.resize(nelements); // Do the reserve trick if the atoms have ghosts. if (ghostatoms) { if (force.capacity() < nSize) { nSizeRes = nSize + nSize/20; for (int i = 0; i < nelements; i++) { sigma1[i].reserve(nSizeRes); sigma2[i].reserve(nSizeRes); } id.reserve(nSizeRes); force.reserve(nSizeRes); dEds.reserve(nSizeRes); } if (Eas.capacity() < nAtoms) { nAtomsRes = nAtoms + nAtoms/20; Eas.reserve(nAtomsRes); Epot.reserve(nAtomsRes); Ec.reserve(nAtomsRes); radius.reserve(nAtomsRes); } } for (int i = 0; i < nelements; i++) { sigma1[i].resize(nAtoms); sigma2[i].resize(nAtoms); } DEBUGPRINT; Ec.resize(nAtoms); Eas.resize(nAtoms); Epot.resize(nAtoms); radius.resize(nAtoms); dEds.resize(nSize); id.resize(nSize); force.resize(nSize); tmp_double.resize(nSize); ex2.resize(nAtoms); if (virials.size()) AllocateStress(); // The IDs are set to zero, then they do not have to be calculated if // there is only one element. if (nelements == 1) for (int i = 0; i < nSize; i++) id[i] = 0; } DEBUGPRINT; } // Stresses are reallocated if they exist when Allocate is called, or if // they do not exist, and GetStress is called. void EMT::AllocateStress() { DEBUGPRINT; if (ghostatoms && (virials.capacity() < nSize)) virials.reserve(nSizeRes); VERB(" AllocStr[" << nAtoms << "," << nSize << "]"<< flush); virials.resize(nSize); DEBUGPRINT; } void EMT::InitParameters() { DEBUGPRINT; // Extract the elements from the list of atoms, and set up an array // of EMT parameters. set elements_set; vector elements; // Extract the elements from the list of atoms. // elements.clear(); atoms->GetListOfElements(elements_set); for (set::iterator i = elements_set.begin(); i != elements_set.end(); ++i) elements.push_back(*i); nelements = elements.size(); ASSERT(nelements == elements_set.size()); sort(elements.begin(), elements.end()); // Get the EMT parameters parameters.clear(); for (vector::iterator i = elements.begin(); i != elements.end(); ++i) parameters.push_back(provider->GetParameters(*i)); //ASSERT(nelements == provider->GetNumberOfElements()); // Calculate the quantities that can only be calculated, once the // elements in the simulations are known. provider->CalcGammaEtc(); rFermi = provider->GetCutoffDistance(); //rNbCut = 1.04500185048 * rFermi; rNbCut = provider->GetListCutoffDistance(); cutoffslope = provider->GetCutoffSlope(); chi = provider->GetChi(); if (verbose) cerr << "EMT::InitParameters: rFermi = " << rFermi << " rNbCut = " << rNbCut << " cutoffslope = " << cutoffslope << endl; DEBUGPRINT; } bool EMT::CalcReq_Forces(PyObject *pyatoms) { atoms->Begin(pyatoms); bool required = (counters.forces != atoms->GetPositionsCounter()); atoms->End(); return required; } const vector &EMT::GetForces(PyObject *pyatoms) { USETIMER("EMT::GetForces") DEBUGPRINT; VERB(" Force["); ASSERT(atoms != NULL); atoms->Begin(pyatoms); recalc.nblist = CheckNeighborList(); recalc.forces = (counters.forces != atoms->GetPositionsCounter()); if (recalc.forces) { recalc.ids = (counters.ids != atoms->GetPositionsCounter()); recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter()); recalc.beforeforces = (counters.beforeforces != atoms->GetPositionsCounter()); DEBUGPRINT; CalculateForces(); counters.beforeforces = atoms->GetPositionsCounter(); counters.forces = atoms->GetPositionsCounter(); VERB("]" << flush); } else { VERB("-]"); ASSERT(recalc.nblist == false); } DEBUGPRINT; atoms->End(); MEMORY; DEBUGPRINT; return force; } void EMT::CalculateForces() { CHECKNOASAPERROR; #ifdef _OPENMP #pragma omp parallel #endif // _OPENMP { if (recalc.nblist) UpdateNeighborList(); CalculateIDs(); CalculateSigmas(false); /* Skip sigma2 */ CalculateEnergiesAfterSigmas(false); /* Skip actual energy calc. */ if (nelements > 1) CalculateForcesAfterEnergies(); else CalculateForcesAfterEnergiesSingle(); } PROPAGATEASAPERROR; } bool EMT::CalcReq_Virials(PyObject *pyatoms) { atoms->Begin(pyatoms); bool required = (counters.virials != atoms->GetPositionsCounter()); atoms->End(); return required; } bool EMT::CalcReq_Virial(PyObject *pyatoms) { return CalcReq_Virials(pyatoms); } const vector &EMT::GetVirials(PyObject *pyatoms) { USETIMER("EMT::GetVirials") DEBUGPRINT; VERB(" Virials["); ASSERT(atoms != NULL); atoms->Begin(pyatoms); recalc.nblist = CheckNeighborList(); recalc.virials = (counters.virials != atoms->GetPositionsCounter()); if (recalc.virials) { recalc.ids = (counters.ids != atoms->GetPositionsCounter()); recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter()); recalc.beforeforces = (counters.beforeforces != atoms->GetPositionsCounter()); recalc.forces = (counters.forces != atoms->GetPositionsCounter()); DEBUGPRINT; if (this->virials.size() == 0) AllocateStress(); CalculateVirials(); } else { ASSERT(recalc.nblist == false); } VERB("]" << flush); DEBUGPRINT; counters.virials = atoms->GetPositionsCounter(); counters.beforeforces = atoms->GetPositionsCounter(); counters.forces = atoms->GetPositionsCounter(); atoms->End(); return virials; } void EMT::CalculateVirials() { if (recalc.virials) { CHECKNOASAPERROR; #ifdef _OPENMP #pragma omp parallel #endif // _OPENMP { if (recalc.nblist) UpdateNeighborList(); CalculateIDs(); CalculateSigmas(false); // Skip sigma2 CalculateEnergiesAfterSigmas(false); // Skip actual energy calc. if (nelements > 1) CalculateForcesAfterEnergies(); else CalculateForcesAfterEnergiesSingle(); } PROPAGATEASAPERROR; } } void EMT::GetAtomicVolumes(vector &v) { v.resize(nAtoms); const double fourpithird = 4.1887902048; // 4 * PI / 3 for (int i = 0; i < nAtoms; i++) { double s0 = parameters[id[i]]->seq; v[i] = fourpithird * s0 * s0 * s0; } } bool EMT::CalcReq_Energy(PyObject *pyatoms) { atoms->Begin(pyatoms); bool required = (counters.energies != atoms->GetPositionsCounter()); atoms->End(); return required; } const vector &EMT::GetPotentialEnergies(PyObject *pyatoms) { USETIMER("EMT::GetPotentialEnergies"); DEBUGPRINT; VERB(" Energies["); ASSERT(atoms != NULL); // Atoms may already be open if called from MonteCarloEMT object. if (!skip_begin) atoms->Begin(pyatoms); else skip_begin = false; recalc.nblist = CheckNeighborList(); recalc.energies = (counters.energies != atoms->GetPositionsCounter()); if (recalc.energies) { recalc.ids = (counters.ids != atoms->GetPositionsCounter()); recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter()); recalc.sigma2 = (counters.sigma2 != atoms->GetPositionsCounter()); recalc.beforeforces = (counters.beforeforces != atoms->GetPositionsCounter()); DEBUGPRINT; CalculateEnergies(); counters.energies = atoms->GetPositionsCounter(); counters.beforeforces = atoms->GetPositionsCounter(); } else { ASSERT(counters.beforeforces == atoms->GetPositionsCounter()); ASSERT(recalc.nblist == false); if(subtractE0) for (int i = 0; i < nAtoms; i++) Epot[i] = Ec[i] + Eas[i] - parameters[id[i]]->e0; else for (int i = 0; i < nAtoms; i++) Epot[i] = Ec[i] + Eas[i]; VERB("-"); } ASSERT(Epot.size() == nAtoms); DEBUGPRINT; VERB("]" << flush); atoms->End(); return Epot; } void EMT::CalculateEnergies() { CHECKNOASAPERROR; #ifdef _OPENMP #pragma omp parallel #endif // _OPENMP { if (recalc.nblist) UpdateNeighborList(); CalculateIDs(); CalculateSigmas(true); CalculateEnergiesAfterSigmas(true); } PROPAGATEASAPERROR; } double EMT::GetPotentialEnergy(PyObject *pyatoms) { USETIMER("EMT::GetPotentialEnergy"); VERB(" Energy["); DEBUGPRINT; double etot = 0.0; const vector &e = GetPotentialEnergies(pyatoms); for (int i = 0; i < nAtoms; i++) etot += e[i]; DEBUGPRINT; VERB("]" << flush); return etot; } bool EMT::CheckNeighborList() { RETURNIFASAPERROR2(false); USETIMER("EMT::CheckNeighborList"); DEBUGPRINT; ASSERT(atoms != NULL); bool update = (nblist == NULL) || nblist->IsInvalid(); // Update if invalid if (!update && (counters.nblist != atoms->GetPositionsCounter())) { VERB("n"); update = nblist->CheckNeighborList(); } // May communicate update = atoms->UpdateBeforeCalculation(update, rNbCut * (1 + driftfactor)); counters.nblist = atoms->GetPositionsCounter(); return update; } void EMT::UpdateNeighborList() { RETURNIFASAPERROR; USETIMER("EMT::UpdateNeighborList"); DEBUGPRINT; VERB("N"); DEBUGPRINT; RETURNIFASAPERROR; if (nblist) { DEBUGPRINT; nblist->UpdateNeighborList(); RETURNIFASAPERROR; #ifdef _OPENMP #pragma omp single #endif // _OPENMP { if ((nAtoms != atoms->GetNumberOfAtoms()) || (nSize - nAtoms != atoms->GetNumberOfGhostAtoms())) { nAtoms = atoms->GetNumberOfAtoms(); nSize = nAtoms + atoms->GetNumberOfGhostAtoms(); ghostatoms = atoms->HasGhostAtoms(); Allocate(); } } } else { #ifdef _OPENMP #pragma omp barrier #endif // _OPENMP // First call, create the neighbor list. DEBUGPRINT; CreateNeighborList(); RETURNIFASAPERROR; #ifdef _OPENMP #pragma omp single #endif // _OPENMP { nAtoms = atoms->GetNumberOfAtoms(); nSize = nAtoms + atoms->GetNumberOfGhostAtoms(); ghostatoms = atoms->HasGhostAtoms(); Allocate(); } } DEBUGPRINT; } // Will be replaced in MonteCarloEMT void EMT::CreateNeighborList() { #ifdef ASAP_FOR_KIM // This function is never called when used as an OpenKIM potential // In that case, the NeighborList object does not exist, and this // function does not compile. throw AsapError("Internal inconsistency: EMT::CreateNeighborList called in KIM calculator."); #else RETURNIFASAPERROR; MEMORY; USETIMER("EMT::CreateNeighborList"); if (!initialized) THROW_RETURN( AsapError("EMT object has not been initialized!") ); #ifdef _OPENMP #pragma omp single #endif // _OPENMP { PyAsap_NeighborLocatorObject *nbl; nbl = PyAsap_NewNeighborList(atoms, rNbCut, driftfactor); nblist = nbl->cobj; nblist->verbose = verbose; nblist_obj = (PyObject *) nbl; MEMORY; } nblist->UpdateNeighborList(); MEMORY; #endif // not ASAP_FOR_KIM } /// IDs are a recoding of the atomic numbers used as indices into /// various arrays. Atomic numbers are not practical for this as they /// are not contiguous. void EMT::CalculateIDs() { RETURNIFASAPERROR; USETIMER("EMT::CalculateIDs"); DEBUGPRINT; if (!recalc.ids || nelements == 1) return; VERB("i"); DEBUGPRINT; // If there is only one element, all IDs are 0 and do not have to be // calculated. Otherwise the ID is the offset into the list of elements. const asap_z_int *RESTRICT z = atoms->GetAtomicNumbers(); int nSize = this->nSize; // These two lines help optimization. int *RESTRICT id = &(this->id)[0]; for (int i = 0; i < nelements; i++) { int zcand = parameters[i]->Z; #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int j = 0; j < nSize; j++) if (z[j] == zcand) id[j] = i; } #ifdef _OPENMP #pragma omp master #endif // _OPENMP counters.ids = atoms->GetPositionsCounter(); DEBUGPRINT; } void EMT::CalculateSigmas(bool calculatesigma2) { RETURNIFASAPERROR; USETIMER("EMT::CalculateSigmas"); DEBUGPRINT; if (!(recalc.sigma1 || (calculatesigma2 && recalc.sigma2))) return; #ifdef _OPENMP #pragma omp master #endif // _OPENMP { if (calculatesigma2) { VERB("2"); } else { VERB("1"); } } DEBUGPRINT; int maxnblen = nblist->MaxNeighborListLength(); if (maxnblen > BUFLEN) { const Vec *cell = atoms->GetCell(); THROW_RETURN( AsapError("Neighborlist overrun (did you squeeze your atoms?).\n") << " Longest neighbor list is " << maxnblen << ".\n" << " Cell is " << cell[0] << ", " << cell[1] << ", " << cell[2] ); } #ifdef ASAP_FOR_KIM const int *contributes = atoms->GetParticleContributes(); ASSERT(nAtoms == nSize); // In OpenKIM, non-contributing atoms are not always at the end. #endif // Buffer data: TinyMatrix nbatch(nelements,nelements); TinyMatrix self(nelements,nelements); TinyMatrix other(nelements,nelements); TinyMatrix rnb(nelements,nelements); TinyMatrix sqdist(nelements,nelements); vector other_buf(BUFLEN); vector rnb_buf(BUFLEN); vector sqdist_buf(BUFLEN); int nSize = this->nSize; // Helps optimization DEBUGPRINT; /* Set sigmas to zero */ for (int i = 0; i < nelements; i++) { double *RESTRICT s1 = &sigma1[i][0]; double *RESTRICT s2 = &sigma2[i][0]; #ifdef _OPENMP #pragma omp for nowait #endif // _OPENMP for (int j = 0; j < nSize; j++) { s1[j] = 0.0; s2[j] = 0.0; } } #ifdef _OPENMP #pragma omp barrier #endif // _OPENMP /* No atoms in batch pools */ for (int i = 0; i < nelements; i++) for (int j = 0; j < nelements; j++) nbatch[i][j] = 0; // Loop over atoms #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int atom = 0; atom < nAtoms; atom++) { #ifdef ASAP_FOR_KIM if (!contributes[atom]) continue; // Skip non-contributing atoms. #endif int zself = id[atom]; // Get neighbors and loop over them. Simplest if only one element if (nelements == 1) { int nbat = nbatch[0][0]; // only element int size = BUFLEN-nbat; int n; if (always_fullnblist) n = nblist->GetFullNeighbors(atom, other[0][0]+nbat, rnb[0][0]+nbat, sqdist[0][0]+nbat, size); else n = nblist->GetNeighbors(atom, other[0][0]+nbat, rnb[0][0]+nbat, sqdist[0][0]+nbat, size); ASSERT(size >= 0); // REMOVE LATER !!! for (int i = nbat; i < nbat+n; i++) self[0][0][i] = atom; nbatch[0][0] += n; } else { int size = BUFLEN; int n; if (always_fullnblist) n = nblist->GetFullNeighbors(atom, &other_buf[0], &rnb_buf[0], &sqdist_buf[0], size); else n = nblist->GetNeighbors(atom, &other_buf[0], &rnb_buf[0], &sqdist_buf[0], size); ASSERT(size >= 0); // REMOVE LATER !!! int *RESTRICT id = &(this->id)[0]; for (int i = 0; i < n; i++) { int zother = id[other_buf[i]]; int nbat = nbatch[zself][zother]++; // Count this atom self[zself][zother][nbat] = atom; other[zself][zother][nbat] = other_buf[i]; sqdist[zself][zother][nbat] = sqdist_buf[i]; } } // Now process any full batch for (int zo = 0; zo < nelements; zo++) if (nbatch[zself][zo] >= BUFLEN - maxnblen) { sigma_batch(self[zself][zo], other[zself][zo], sqdist[zself][zo], zself, zo, nbatch[zself][zo], calculatesigma2); nbatch[zself][zo] = 0; } } // Loop over atoms /* Process the remaining incomplete batches */ for (int zs = 0; zs < nelements; zs++) for (int zo = 0; zo < nelements; zo++) if (nbatch[zs][zo]) sigma_batch(self[zs][zo], other[zs][zo], sqdist[zs][zo], zs, zo, nbatch[zs][zo], calculatesigma2); #ifdef _OPENMP #pragma omp single #endif // _OPENMP { sigma2isvalid = calculatesigma2; /* Remember if it was calculated. */ counters.sigma1 = atoms->GetPositionsCounter(); if (calculatesigma2) counters.sigma2 = atoms->GetPositionsCounter(); } } void EMT::sigma_batch(int *RESTRICT self, int *RESTRICT other, double *RESTRICT sq_dist, int zs, int zo, int n, bool calculatesigma2, bool partialupdate /* = false */) { USETIMER("EMT::sigma_batch"); #ifdef ASAP_MKL double *temporary = (double *) mkl_malloc(10 * BUFLEN * sizeof(double), ASAP_MKL_ALIGN); #else // ASAP_MKL double *temporary = new double[4*BUFLEN]; #endif //ASAP_MKL double *RESTRICT dsigma1s = &temporary[0]; double *RESTRICT dsigma2s = dsigma1s + BUFLEN; double *RESTRICT dsigma1o = dsigma2s + BUFLEN; double *RESTRICT dsigma2o = dsigma1o + BUFLEN; //ASSERT(dsigma2o - temporary == (4 - 1) * BUFLEN); #ifdef ASAP_MKL double *RESTRICT tmp_a = dsigma2o + BUFLEN; double *RESTRICT tmp_b = tmp_a + BUFLEN; double *RESTRICT tmp_c = tmp_b + BUFLEN; double *RESTRICT tmp_d = tmp_c + BUFLEN; double *RESTRICT tmp_e = tmp_d + BUFLEN; double *RESTRICT tmp_f = tmp_e + BUFLEN; //ASSERT(tmp_f - temporary == (10 - 1) * BUFLEN); #endif //ASAP_MKL double cutslopecutdist; double other_eta2betaseq, self_eta2betaseq; double other_kappaoverbeta, self_kappaoverbeta; double other_kappaseq, self_kappaseq; double *s1s, *s1o, *s2s, *s2o; const emt_parameters *emtself, *emtother; ASSERT(n <= BUFLEN); /* Get EMT parameters REWRITE !!! XXXX */ emtself = parameters[zs]; emtother = parameters[zo]; double cutoffslope = this->cutoffslope; cutslopecutdist = cutoffslope * rFermi; other_eta2betaseq = emtother->eta2 * Beta * emtother->seq; self_eta2betaseq = emtself->eta2 * Beta * emtself->seq; other_kappaoverbeta = emtother->kappa / Beta; self_kappaoverbeta = emtself->kappa / Beta; other_kappaseq = emtother->kappa * emtother->seq; self_kappaseq = emtself->kappa * emtself->seq; double other_eta2 = emtother->eta2; double self_eta2 = emtself->eta2; ASSERT(n <= BUFLEN); // We have four cases to handled, controlled by two booleans. // 1) Whether sigma2 needs to be calculated, 2) Whether we need a // different calculation for the two atoms interacting. The latter is // the case if the atomic numbers are different, unless operating in // full neighborlist mode or doing a partial update for MonteCarloEMT, // in these cases calculations are not reused. bool calculateother = (zs != zo) && !always_fullnblist && !partialupdate; if (!calculateother && !calculatesigma2) { #ifdef ASAP_MKL // dist[] = tmp_a[] = sqrt(sq_dist[]) vdSqrt(n, sq_dist, tmp_a); for (int i = 0; i < n; i++) { tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist; tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq; } // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist) vdExp(n, tmp_b, tmp_a); // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq) vdExp(n, tmp_c, tmp_b); for (int i = 0; i < n; i++) { double wght = 1.0 / (1.0 + tmp_a[i]); dsigma1s[i] = wght * tmp_b[i]; } #else // ASAP_MKL for (int i = 0; i < n; i++) { /* dist = sqrt(sq_dist), distances between atoms */ double dist = sqrt(sq_dist[i]); /* Calculate weight factor (cutoff function) */ double wght = 1.0 / (1.0 + exp(cutoffslope * dist - cutslopecutdist)); /* Contribution to sigma1 */ dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq); } #endif // ASAP_MKL } else if (calculateother && !calculatesigma2) { #ifdef ASAP_MKL // dist[] = tmp_a[] = sqrt(sq_dist[]) vdSqrt(n, sq_dist, tmp_a); for (int i = 0; i < n; i++) { tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist; tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq; tmp_d[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq; } // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist) vdExp(n, tmp_b, tmp_a); // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq) vdExp(n, tmp_c, tmp_b); // tmp_c[] = exp(tmp_d[]) = exp(-self_eta2 * dist[] + self_eta2betaseq) vdExp(n, tmp_d, tmp_c); for (int i = 0; i < n; i++) { double wght = 1.0 / (1.0 + tmp_a[i]); dsigma1s[i] = wght * tmp_b[i]; dsigma1o[i] = wght * tmp_c[i]; } #else // ASAP_MKL for (int i = 0; i < n; i++) { /* dist = sqrt(sq_dist), distances between atoms */ double dist = sqrt(sq_dist[i]); /* Calculate weight factor (cutoff function) */ double wght = 1.0 / (1.0 + exp(cutoffslope * dist - cutslopecutdist)); /* Contributions to sigma1 */ dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq); dsigma1o[i] = wght * exp(-self_eta2 * dist + self_eta2betaseq); } #endif //ASAP_MKL } else if (!calculateother && calculatesigma2) { #ifdef ASAP_MKL // dist[] = tmp_a[] = sqrt(sq_dist[]) vdSqrt(n, sq_dist, tmp_a); for (int i = 0; i < n; i++) { tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist; tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq; tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq; } // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist) vdExp(n, tmp_b, tmp_a); // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq) vdExp(n, tmp_c, tmp_b); // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq) vdExp(n, tmp_d, tmp_c); for (int i = 0; i < n; i++) { double wght = 1.0 / (1.0 + tmp_a[i]); dsigma1s[i] = wght * tmp_b[i]; dsigma2s[i] = wght * tmp_c[i]; } #else // ASAP_MKL for (int i = 0; i < n; i++) { /* dist = sqrt(sq_dist), distances between atoms */ double dist = sqrt(sq_dist[i]); /* Calculate weight factor (cutoff function) */ double wght = 1.0 / (1.0 + exp(cutoffslope * dist - cutslopecutdist)); /* Contribution to sigma1 */ dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq); dsigma2s[i] = wght * exp(-other_kappaoverbeta * dist + other_kappaseq); } #endif //ASAP_MKL } else // calculateother && calculatesigma2 { #ifdef ASAP_MKL // dist[] = tmp_a[] = sqrt(sq_dist[]) vdSqrt(n, sq_dist, tmp_a); for (int i = 0; i < n; i++) { tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist; tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq; tmp_d[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq; tmp_e[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq; tmp_f[i] = -self_kappaoverbeta * tmp_a[i] + self_kappaseq; } // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist) vdExp(n, tmp_b, tmp_a); // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq) vdExp(n, tmp_c, tmp_b); // tmp_c[] = exp(tmp_d[]) = exp(-self_eta2 * dist[] + self_eta2betaseq) vdExp(n, tmp_d, tmp_c); // tmp_d[] = exp(tmp_e[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq) vdExp(n, tmp_e, tmp_d); // tmp_e[] = exp(tmp_f[]) = exp(-self_kappaoverbeta * dist[] + self_kappaseq) vdExp(n, tmp_f, tmp_e); for (int i = 0; i < n; i++) { double wght = 1.0 / (1.0 + tmp_a[i]); dsigma1s[i] = wght * tmp_b[i]; dsigma1o[i] = wght * tmp_c[i]; dsigma2s[i] = wght * tmp_d[i]; dsigma2o[i] = wght * tmp_e[i]; } #else // ASAP_MKL for (int i = 0; i < n; i++) { /* dist = sqrt(sq_dist), distances between atoms */ double dist = sqrt(sq_dist[i]); /* Calculate weight factor (cutoff function) */ double wght = 1.0 / (1.0 + exp(cutoffslope * dist - cutslopecutdist)); /* Contributions to sigma1 */ dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq); dsigma1o[i] = wght * exp(-self_eta2 * dist + self_eta2betaseq); dsigma2s[i] = wght * exp(-other_kappaoverbeta * dist + other_kappaseq); dsigma2o[i] = wght * exp(-self_kappaoverbeta * dist + self_kappaseq); } #endif // ASAP_MKL } // Distribute the results to the atoms. if (!partialupdate && !always_fullnblist) { // This is the normal branch, both atoms need to be updated. if (calculatesigma2) { /* Distribute contributions to sigma1 and sigma2 */ s1o = &sigma1[zo][0]; s1s = &sigma1[zs][0]; s2o = &sigma2[zo][0]; s2s = &sigma2[zs][0]; #ifdef _OPENMP #pragma omp critical #endif // _OPENMP { if (calculateother) { for (int i = 0; i < n; i++) { int s = self[i]; int o = other[i]; s1o[s] += dsigma1s[i]; s2o[s] += dsigma2s[i]; if (o < nAtoms) // Dont add to ghost atoms { s1s[o] += dsigma1o[i]; s2s[o] += dsigma2o[i]; } } } else { for (int i = 0; i < n; i++) { int s = self[i]; int o = other[i]; s1o[s] += dsigma1s[i]; s2o[s] += dsigma2s[i]; if (o < nAtoms) // Dont add to ghost atoms { s1s[o] += dsigma1s[i]; s2s[o] += dsigma2s[i]; } } } } } else { /* Distribute contributions to sigma1. */ s1o = &sigma1[zo][0]; s1s = &sigma1[zs][0]; #ifdef _OPENMP #pragma omp critical #endif // _OPENMP { if (calculateother) { for (int i = 0; i < n; i++) { int s = self[i]; int o = other[i]; s1o[s] += dsigma1s[i]; if (o < nAtoms) s1s[o] += dsigma1o[i]; } } else { for (int i = 0; i < n; i++) { int s = self[i]; int o = other[i]; s1o[s] += dsigma1s[i]; if (o < nAtoms) s1s[o] += dsigma1s[i]; } } } } } else // partialupdate OR always_fullnblist { // This branch may be taken by MonteCarloEMT during a partial update, // or by the normal EMT when not using Minimum Image Convention. Since // full neighbor lists are used, only update the // 'self' atom, not the 'other' atom. // In OpenKIM, this branch is always taken. if (calculatesigma2) { /* Distribute contributions to sigma1 and sigma2 */ s1o = &sigma1[zo][0]; s2o = &sigma2[zo][0]; #ifdef _OPENMP #pragma omp critical #endif // _OPENMP { for (int i = 0; i < n; i++) { int s = self[i]; s1o[s] += dsigma1s[i]; s2o[s] += dsigma2s[i]; } } } else { /* Distribute contributions to sigma1 only */ s1o = &sigma1[zo][0]; #ifdef _OPENMP #pragma omp critical #endif // _OPENMP { for (int i = 0; i < n; i++) { int s = self[i]; s1o[s] += dsigma1s[i]; } } } } #ifdef ASAP_MKL mkl_free(temporary); #else delete[] temporary; #endif } // Calculate the energies if calc_Epot is true, otherwise just calculate the // derivatives needed for the forces. void EMT::CalculateEnergiesAfterSigmas(bool calc_Epot) { RETURNIFASAPERROR; USETIMER("EMT::CalculateEnergiesAfterSigmas"); DEBUGPRINT; //vector sigma(nSize); double *RESTRICT sigma = &tmp_double[0]; bool dosigmapart = (recalc.beforeforces || (calc_Epot && recalc.energies)); bool doepotpart = (calc_Epot && recalc.energies); int zs, zo; double s; // Better performance if static ??? ASSERT(nelements < NMAXELEMENTS); double inv12gamma1[NMAXELEMENTS]; double neginvbetaeta2[NMAXELEMENTS]; double neglambda[NMAXELEMENTS]; double lambdaseq[NMAXELEMENTS]; double negkappa[NMAXELEMENTS]; double kappaseq[NMAXELEMENTS]; double nege0lambdalambda[NMAXELEMENTS]; double e0lambdalambdaseq[NMAXELEMENTS]; double neg6v0kappa[NMAXELEMENTS]; double e0lambda[NMAXELEMENTS]; double eccnst[NMAXELEMENTS]; double sixv0[NMAXELEMENTS]; double neghalfv0overgamma2[NMAXELEMENTS]; double seq[NMAXELEMENTS]; int *id = &(this->id)[0]; if (dosigmapart) { #ifdef _OPENMP #pragma omp master #endif // _OPENMP { VERB("b"); DEBUGPRINT; } /* Calculate total sigma1 */ #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = 0; i < nAtoms; i++) { double s = 0.0; int zs = id[i]; for (int zo = 0; zo < nelements; zo++) s += (*chi)[zs][zo] * sigma1[zo][i]; if (s < 1.0e-40) s = 1.0e-40; sigma[i] = s; } ASSERT(nAtoms == radius.size() && nAtoms == Ec.size() && nSize == dEds.size()); } /* Calculate conbinations of EMT parameters */ for (int i = 0; i < nelements; i++) { inv12gamma1[i] = 1.0 / (12.0 * parameters[i]->gamma1); neginvbetaeta2[i] = -1.0 / (Beta * parameters[i]->eta2); neglambda[i] = - parameters[i]->lambda; lambdaseq[i] = parameters[i]->lambda * parameters[i]->seq; negkappa[i] = - parameters[i]->kappa; kappaseq[i] = parameters[i]->kappa * parameters[i]->seq; nege0lambdalambda[i] = - parameters[i]->e0 * parameters[i]->lambda * parameters[i]->lambda; e0lambdalambdaseq[i] = parameters[i]->e0 * parameters[i]->lambda * parameters[i]->lambda * parameters[i]->seq; neg6v0kappa[i] = - 6.0 * parameters[i]->V0 * parameters[i]->kappa; e0lambda[i] = parameters[i]->e0 * parameters[i]->lambda; eccnst[i] = parameters[i]->e0 * (1.0 - parameters[i]->lambda * parameters[i]->seq); sixv0[i] = 6.0 * parameters[i]->V0; neghalfv0overgamma2[i] = -0.5 * parameters[i]->V0 / parameters[i]->gamma2; seq[i] = parameters[i]->seq; } int nAtoms = this->nAtoms; // Helps optimization. #define LOCALPTR(XX) double *RESTRICT XX = &(this->XX)[0] LOCALPTR(Ec); LOCALPTR(Eas); LOCALPTR(Epot); LOCALPTR(radius); LOCALPTR(dEds); LOCALPTR(ex2); if (dosigmapart) { #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = 0; i < nAtoms; i++) { asap_z_int z = id[i]; // Actually not Z (atomic no) but ID no. double r = seq[z] + neginvbetaeta2[z] * log(sigma[i] * inv12gamma1[z]); radius[i] = r; double ex1 = exp(neglambda[z] * r + lambdaseq[z]); double e2 = exp(negkappa[z] * r + kappaseq[z]); ex2[i] = e2; double tmp = (nege0lambdalambda[z] * r + e0lambdalambdaseq[z]) * ex1 + neg6v0kappa[z] * e2; dEds[i] = tmp * neginvbetaeta2[z] / sigma[i]; Ec[i] = (e0lambda[z] * r + eccnst[z]) * ex1; } #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = nAtoms; i < nSize; i++) dEds[i] = 0.0; } if (calc_Epot) { DEBUGPRINT; if (doepotpart) { #ifdef _OPENMP #pragma omp master #endif // _OPENMP { VERB("e"); DEBUGPRINT; } /* We also need Eas, but only for the real atoms */ ASSERT(sigma2isvalid); ASSERT(counters.sigma2 == atoms->GetPositionsCounter()); /* Calculate total sigma2 */ #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = 0; i < nAtoms; i++) { s = 0.0; zs = id[i]; for (zo = 0; zo < nelements; zo++) s += (*chi)[zs][zo] * sigma2[zo][i]; Eas[i] = sixv0[zs] * ex2[i] + neghalfv0overgamma2[zs] * s; } } /* Add the energies */ DEBUGPRINT; if(subtractE0) { #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = 0; i < nAtoms; i++) Epot[i] = Ec[i] + Eas[i] - parameters[id[i]]->e0; } else { #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = 0; i < nAtoms; i++) Epot[i] = Ec[i] + Eas[i]; } } // if (calc_Epot) #ifdef ASAP_FOR_KIM // In OpenKIM, nAtoms == nSize and the non-contributing atoms had // their energies calculates. Zap them. const int *contributes = atoms->GetParticleContributes(); for (int i = 0; i < nAtoms; i++) if (!contributes[i]) Epot[i] = Ec[i] = Eas[i] = dEds[i] = 0; #endif // ASAP_FOR_KIM DEBUGPRINT; } void EMT::CalculateForcesAfterEnergies() { RETURNIFASAPERROR; if (!(recalc.forces || (virials.size() && recalc.virials))) return; #ifdef _OPENMP #pragma omp master #endif // _OPENMP { VERB("f"); if (virials.size()) { VERB("s"); } } #ifdef _OPENMP #pragma omp barrier #endif // _OPENMP DEBUGPRINT; int maxnblen = nblist->MaxNeighborListLength(); // Buffer data: TinyMatrix nbatch(nelements,nelements); TinyMatrixOfVector > self(nelements,nelements, BUFLEN); TinyMatrixOfVector > other(nelements,nelements, BUFLEN); TinyMatrixOfVector > rnb(nelements,nelements, BUFLEN); TinyMatrixOfVector > sqdist(nelements,nelements, BUFLEN); TinyMatrixOfVector > dEdss(nelements,nelements, BUFLEN); TinyMatrixOfVector > dEdso(nelements,nelements, BUFLEN); vector other_buf(BUFLEN); vector rnb_buf(BUFLEN); vector sqdist_buf(BUFLEN); #ifdef ASAP_FOR_KIM const int *contributes = atoms->GetParticleContributes(); #endif // If there is only one element, CalculateForcesAfterEnergiesSingle should // be used. ASSERT(nelements > 1); int nSize = this->nSize; // Helps optimization. Vec *RESTRICT force = &(this->force)[0]; /* Set forces and virials to zero */ if (virials.size()) { ASSERT(virials.size() == nSize); #ifdef _OPENMP #pragma omp for nowait #endif // _OPENMP for (int i = 0; i < nSize; i++) for (int j = 0; j < 6; j++) virials[i][j] = 0.0; } #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = 0; i < nSize; i++) force[i].x = force[i].y = force[i].z = 0.0; /* Calculate forces */ /* No atoms in batch pools */ for (int i = 0; i < nelements; i++) for (int j = 0; j < nelements; j++) nbatch[i][j] = 0; // Loop over atoms #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int atom = 0; atom < nAtoms; atom++) { #ifdef ASAP_FOR_KIM if (!contributes[atom]) continue; // Skip non-contributing atom in OpenKIM, where nAtoms == nSize. #endif int zself = id[atom]; // Get neighbors and loop over them. int size = BUFLEN; int n; if (always_fullnblist) n = nblist->GetFullNeighbors(atom, &other_buf[0], &rnb_buf[0], &sqdist_buf[0], size); else n = nblist->GetNeighbors(atom, &other_buf[0], &rnb_buf[0], &sqdist_buf[0], size); ASSERT(size >= 0); // REMOVE LATER for (int i = 0; i < n; i++) { int zother = id[other_buf[i]]; int nbat = nbatch[zself][zother]++; // Count this atom self[zself][zother][nbat] = atom; other[zself][zother][nbat] = other_buf[i]; rnb[zself][zother][nbat][0] = rnb_buf[i][0]; rnb[zself][zother][nbat][1] = rnb_buf[i][1]; rnb[zself][zother][nbat][2] = rnb_buf[i][2]; sqdist[zself][zother][nbat] = sqdist_buf[i]; dEdss[zself][zother][nbat] = dEds[atom]; dEdso[zself][zother][nbat] = dEds[other_buf[i]]; } // Now process any full batch for (int zo = 0; zo < nelements; zo++) if (nbatch[zself][zo] >= BUFLEN - maxnblen) { force_batch(&self[zself][zo][0], &other[zself][zo][0], &rnb[zself][zo][0], &sqdist[zself][zo][0], &dEdss[zself][zo][0], &dEdso[zself][zo][0], zself, zo, nbatch[zself][zo]); nbatch[zself][zo] = 0; } } // Loop over atoms /* Process the remaining incomplete batches */ for (int zs = 0; zs < nelements; zs++) for (int zo = 0; zo < nelements; zo++) if (nbatch[zs][zo]) force_batch(&self[zs][zo][0], &other[zs][zo][0], &rnb[zs][zo][0], &sqdist[zs][zo][0], &dEdss[zs][zo][0], &dEdso[zs][zo][0], zs, zo, nbatch[zs][zo]); DEBUGPRINT; #ifdef _OPENMP #pragma omp barrier #endif // _OPENMP } void EMT::CalculateForcesAfterEnergiesSingle() { RETURNIFASAPERROR; USETIMER("EMT::CalculateForcesAfterEnergiesSingle"); if (!(recalc.forces || (virials.size() && recalc.virials))) return; #ifdef _OPENMP #pragma omp master #endif // _OPENMP { VERB("f"); if (virials.size()) { VERB("s"); } } #ifdef _OPENMP #pragma omp barrier #endif // _OPENMP int maxnblen = nblist->MaxNeighborListLength(); DEBUGPRINT; // Buffer data: int nbatch; vector self(BUFLEN); vector other(BUFLEN); vector rnb(BUFLEN); vector sqdist(BUFLEN); vector dEdss(BUFLEN); vector dEdso(BUFLEN); #ifdef ASAP_FOR_KIM const int *contributes = atoms->GetParticleContributes(); #endif int nSize = this->nSize; // These three lines help optimization. int nAtoms = this->nAtoms; Vec *RESTRICT force = &(this->force)[0]; DEBUGPRINT; // If there is more than one element, CalculateForcesAfterEnergies should // be used. ASSERT(nelements == 1); /* Set forces and virials to zero */ ASSERT(this->force.size() >= nSize); if (virials.size()) { ASSERT(virials.size() == nSize); #ifdef _OPENMP #pragma omp for nowait #endif // _OPENMP for (int i = 0; i < nSize; i++) for (int j = 0; j < 6; j++) virials[i][j] = 0.0; } #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int i = 0; i < nSize; i++) force[i].x = force[i].y = force[i].z = 0.0; /* Calculate forces */ /* No atoms in batch pool */ DEBUGPRINT; nbatch = 0; // Loop over atoms DEBUGPRINT; #ifdef _OPENMP #pragma omp for #endif // _OPENMP for (int atom = 0; atom < nAtoms; atom++) { #ifdef ASAP_FOR_KIM if (!contributes[atom]) continue; // Skip non-contributing atom in OpenKIM, where nAtoms == nSize. #endif // Get neighbors and loop over them. int size = BUFLEN-nbatch; int n; if (always_fullnblist) n = nblist->GetFullNeighbors(atom, &other[nbatch], &rnb[nbatch], &sqdist[nbatch], size); else n = nblist->GetNeighbors(atom, &other[nbatch], &rnb[nbatch], &sqdist[nbatch], size); double dEdsatom = dEds[atom]; for (int i = nbatch; i < nbatch+n; i++) { self[i] = atom; dEdss[i] = dEdsatom; dEdso[i] = dEds[other[i]]; } nbatch += n; // Now process any full batch if (nbatch >= BUFLEN - maxnblen) { force_batch(&self[0], &other[0], &rnb[0], &sqdist[0], &dEdss[0], &dEdso[0], 0, 0, nbatch); nbatch = 0; } } // Loop over atoms /* Process the remaining incomplete batch */ DEBUGPRINT; if (nbatch) force_batch(&self[0], &other[0], &rnb[0], &sqdist[0], &dEdss[0], &dEdso[0], 0, 0, nbatch); DEBUGPRINT; #ifdef _OPENMP #pragma omp barrier #endif // _OPENMP } // It would be natural to transfer the arrays as vector<> instead of // as pointes, but that confuses the compiler so it cannot discover // that various arrays are disjunct, ruining optimization. For the // same reason, the temporary array is not a vector<>. void EMT::force_batch(const int *RESTRICT self, const int *RESTRICT other, const Vec *RESTRICT rnb, const double *RESTRICT sq_dist, const double *RESTRICT dEdss, const double *RESTRICT dEdso, int zs, int zo, int n) { USETIMER("EMT::force_batch"); #ifdef ASAP_MKL double *temporary = (double *) mkl_malloc(8 * BUFLEN * sizeof(double), ASAP_MKL_ALIGN); double *RESTRICT df = temporary; double *RESTRICT tmp_a = df + BUFLEN; double *RESTRICT tmp_b = tmp_a + BUFLEN; double *RESTRICT tmp_c = tmp_b + BUFLEN; double *RESTRICT tmp_d = tmp_c + BUFLEN; double *RESTRICT tmp_e = tmp_d + BUFLEN; double *RESTRICT tmp_f = tmp_e + BUFLEN; double *RESTRICT tmp_g = tmp_f + BUFLEN; //ASSERT(tmp_g - temporary == (8 - 1) * BUFLEN); #else // ASAP_MKL double *temporary = new double[BUFLEN]; double *RESTRICT df = temporary; #endif // ASAP_MKL double cutslopecutdist, other_eta2betaseq, other_kappaoverbeta; double other_kappaseq, self_eta2betaseq, self_kappaoverbeta; double self_kappaseq, cnst_s, cnst_o; const emt_parameters *emtself, *emtother; //double pairA, exprcut, pairD; //double *tmp; ASSERT(n <= BUFLEN); /* Get EMT parameters */ emtself = parameters[zs]; emtother = parameters[zo]; cutslopecutdist = cutoffslope * rFermi; other_eta2betaseq = emtother->eta2 * Beta * emtother->seq; other_kappaoverbeta = emtother->kappa / Beta; other_kappaseq = emtother->kappa * emtother->seq; self_eta2betaseq = emtself->eta2 * Beta * emtself->seq; self_kappaoverbeta = emtself->kappa / Beta; self_kappaseq = emtself->kappa * emtself->seq; double other_eta2 = emtother->eta2; double self_eta2 = emtself->eta2; cnst_s = -0.5 * emtself->V0 * (*chi)[zs][zo] / emtself->gamma2; cnst_o = -0.5 * emtother->V0 * (*chi)[zo][zs] / emtother->gamma2; double chi_zs_zo = (*chi)[zs][zo]; double chi_zo_zs = (*chi)[zo][zs]; // Three cases: 1) The two atoms have the same atomic number, // 2) they have different atomic number (less reuse of calculations) // 3) we use full neighbor lists (no reuse). if ((zs == zo) && !always_fullnblist) { #ifdef ASAP_MKL /* Get the distances from their squares */ // dist[] = tmp_a[] = sqrt(sq_dist[]) vdSqrt(n, sq_dist, tmp_a); // invdist[] = tmp_e[] = 1.0 / dist[] vdInv(n, tmp_a, tmp_e); for (int i = 0; i < n; i++) { tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist; tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq; tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq; } // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist) vdExp(n, tmp_b, tmp_a); // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq) vdExp(n, tmp_c, tmp_b); // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq) vdExp(n, tmp_d, tmp_c); for (int i = 0; i < n; i++) { /* Calculate cutoff function (weight factor) */ double wght = 1.0 / (1.0 + tmp_a[i]); /* Calculate derivative of the cutoff function. */ double dwdr = -cutoffslope * wght * (1 - wght); double dsigma1dr = (dwdr - wght * other_eta2) * tmp_b[i]; double dsigma2dr = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i]; df[i] = tmp_e[i] * (dsigma1dr * dEdss[i] * chi_zs_zo + cnst_s * dsigma2dr + dsigma1dr * dEdso[i] * chi_zo_zs + cnst_o * dsigma2dr * (other[i] < nAtoms)); } #else //ASAP_MKL for (int i = 0; i < n; i++) { /* Get the distances from their squares */ double dist = sqrt(sq_dist[i]); double inv_dist = 1.0 / dist; /* Calculate cutoff function (weight factor) */ double wght = 1.0 / (1.0 + exp(cutoffslope * dist - cutslopecutdist)); /* Calculate derivative of the cutoff function. */ double dwdr = -cutoffslope * wght * (1 - wght); double dsigma1dr = (dwdr - wght * other_eta2) * exp(-other_eta2 * dist + other_eta2betaseq); double dsigma2dr = (dwdr + wght * -other_kappaoverbeta) * exp(-other_kappaoverbeta * dist + other_kappaseq); df[i] = inv_dist * (dsigma1dr * dEdss[i] * chi_zs_zo + cnst_s * dsigma2dr // * (self[i] < nAtoms) always true + dsigma1dr * dEdso[i] * chi_zo_zs + cnst_o * dsigma2dr * (other[i] < nAtoms)); } #endif //ASAP_MKL } else if (!always_fullnblist) { // zs != zo. #ifdef ASAP_MKL /* Get the distances from their squares */ // dist[] = tmp_a[] = sqrt(sq_dist[]) vdSqrt(n, sq_dist, tmp_a); // invdist[] = tmp_g[] = 1.0 / dist[] vdInv(n, tmp_a, tmp_g); for (int i = 0; i < n; i++) { tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist; tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq; tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq; tmp_e[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq; tmp_f[i] = -self_kappaoverbeta * tmp_a[i] + self_kappaseq; } // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist) vdExp(n, tmp_b, tmp_a); // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq) vdExp(n, tmp_c, tmp_b); // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq) vdExp(n, tmp_d, tmp_c); // tmp_d[] = exp(tmp_e[]) = exp(-self_eta2 * dist[] + self_eta2betaseq) vdExp(n, tmp_e, tmp_d); // tmp_e[] = exp(tmp_f[]) = exp(-self_kappaoverbeta * dist[] + self_kappaseq) vdExp(n, tmp_f, tmp_e); for (int i = 0; i < n; i++) { /* Calculate cutoff function (weight factor) */ double wght = 1.0 / (1.0 + tmp_a[i]); /* Calculate derivative of the cutoff function. */ double dwdr = -cutoffslope * wght * (1 - wght); double dsigma1dr_o = (dwdr - wght * other_eta2) * tmp_b[i]; double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i]; double dsigma1dr_s = (dwdr - wght * self_eta2) * tmp_d[i]; double dsigma2dr_s = (dwdr + wght * -self_kappaoverbeta) * tmp_e[i]; df[i] = tmp_g[i] * (dsigma1dr_o * dEdss[i] * chi_zs_zo + cnst_s * dsigma2dr_o + dsigma1dr_s * dEdso[i] * chi_zo_zs + cnst_o * dsigma2dr_s * (other[i] < nAtoms)); } #else // ASAP_MKL for (int i = 0; i < n; i++) { /* Get the distances from their squares */ double dist = sqrt(sq_dist[i]); double inv_dist = 1.0 / dist; /* Calculate cutoff function (weight factor) */ double wght = 1.0 / (1.0 + exp(cutoffslope * dist - cutslopecutdist)); /* Calculate derivative of the cutoff function. */ double dwdr = -cutoffslope * wght * (1 - wght); double dsigma1dr_o = (dwdr - wght * other_eta2) * exp(-other_eta2 * dist + other_eta2betaseq); double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * exp(-other_kappaoverbeta * dist + other_kappaseq); double dsigma1dr_s = (dwdr - wght * self_eta2) * exp(-self_eta2 * dist + self_eta2betaseq); double dsigma2dr_s = (dwdr + wght * -self_kappaoverbeta) * exp(-self_kappaoverbeta * dist + self_kappaseq); df[i] = inv_dist * (dsigma1dr_o * dEdss[i] * chi_zs_zo + cnst_s * dsigma2dr_o // * (self[i] < nAtoms) always true! + dsigma1dr_s * dEdso[i] * chi_zo_zs + cnst_o * dsigma2dr_s * (other[i] < nAtoms)); } #endif //ASAP_MKL } else { // Using full neighbor lists #ifdef ASAP_MKL /* Get the distances from their squares */ // dist[] = tmp_a[] = sqrt(sq_dist[]) vdSqrt(n, sq_dist, tmp_a); // invdist[] = tmp_e[] = 1.0 / dist[] vdInv(n, tmp_a, tmp_e); for (int i = 0; i < n; i++) { tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist; tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq; tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq; } // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist) vdExp(n, tmp_b, tmp_a); // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq) vdExp(n, tmp_c, tmp_b); // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq) vdExp(n, tmp_d, tmp_c); for (int i = 0; i < n; i++) { /* Calculate cutoff function (weight factor) */ double wght = 1.0 / (1.0 + tmp_a[i]); /* Calculate derivative of the cutoff function. */ double dwdr = -cutoffslope * wght * (1 - wght); double dsigma1dr_o = (dwdr - wght * other_eta2) * tmp_b[i]; double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i]; df[i] = tmp_e[i] * (dsigma1dr_o * dEdss[i] * chi_zs_zo + cnst_s * dsigma2dr_o); } #else // ASAP_MKL for (int i = 0; i < n; i++) { /* Get the distances from their squares */ double dist = sqrt(sq_dist[i]); double inv_dist = 1.0 / dist; /* Calculate cutoff function (weight factor) */ double wght = 1.0 / (1.0 + exp(cutoffslope * dist - cutslopecutdist)); /* Calculate derivative of the cutoff function. */ double dwdr = -cutoffslope * wght * (1 - wght); double dsigma1dr_o = (dwdr - wght * other_eta2) * exp(-other_eta2 * dist + other_eta2betaseq); double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * exp(-other_kappaoverbeta * dist + other_kappaseq); df[i] = inv_dist * (dsigma1dr_o * dEdss[i] * chi_zs_zo + cnst_s * dsigma2dr_o); } #endif // ASAP_MKL } distribute_force(self, other, df, rnb, n); #ifdef ASAP_MKL mkl_free(temporary); #else delete[] temporary; #endif } void EMT::distribute_force(const int *RESTRICT self, const int *RESTRICT other, const double *RESTRICT df, const Vec *RESTRICT rnb, int n) { /* Distribute force contributions */ #ifdef _OPENMP #pragma omp critical #endif // _OPENMP { for (int i = 0; i < n; i++) for (int j = 0; j < 3; j++) { double dfx = df[i] * rnb[i][j]; force[self[i]][j] += dfx; force[other[i]][j] -= dfx; // force has been extended to nSize. } /* Distribute virials contributions */ if (virials.size()) { for (int i = 0; i < n; i++) for (int alpha = 0; alpha < 3; alpha++) for (int beta = alpha; beta < 3; beta++) { double dsx = 0.5 * df[i] * rnb[i][alpha] * rnb[i][beta]; int ii = stresscomp[alpha][beta]; virials[self[i]][ii] += dsx; virials[other[i]][ii] += dsx; } } } } double EMT::GetLatticeConstant() const { ASSERT(singleelement != 0); return Beta * singleelement->seq * sqrt(2.0); } void EMT::PrintParameters() { for (int i = 0; i < nelements; i++) { const emt_parameters *p = parameters[i]; cerr << endl; cerr << "Parameters for element " << i << " (" << p->name << ", Z=" << p->Z << ")" << endl; cerr << "E0:" << p->e0 << " s0:" << p->seq << " V0:" << p->V0 << " eta2:" << p->eta2 << " kappa:" << p->kappa << " lambda:" << p->lambda << " rFermi:" << rFermi << " cutSlope" << cutoffslope << " gamma1:" << p->gamma1 << " gamma2:" << p->gamma2 << endl << endl; cerr << "Chi:"; for (int j = 0; j < nelements; j++) cerr << " " << (*chi)[i][j]; cerr << endl; } } void EMT::BoundaryConditionsChanged() { if (nblist) nblist->Invalidate(); }