Source code for utils

import os
import numpy as np
import pandas as pd
import re
from ase.io import gen


[docs]def normalize(y,x):
    """
    Takes y, x of data and returns normalized y
    """
    return y/np.trapz(y,x)

[docs]def flatten(l):
    """
    flattens a 2d list
    """
    return [item for sublist in l for item in sublist] 

[docs]def KE(v_tot):
    """
    Returns KE of Ar+ in eV given total velocity
    """
    return 6.24E18 * 0.5 * 1.66E-27*39.95*(v_tot*1E5)**2

[docs]def v_from_KE(E):
    """
    Returns v(z) of Ar+ in eV given KE
    """
    return np.sqrt(E/(6.24E18 * 0.5 * 1.66E-27*39.95))/1E5

[docs]def readStructs(datadir, shallow = True, name = "output"):
    """
        Currently designed for output from single layer directory trees.
        Reads in final adsorption geometries and energy data, returns dataframe with geometry and energy data

        Input:
            datadir: string that points to directory containing the following:
                - convergence: each line i has convergence status of run i
                - energies: each line i has total energy and ads energy from run i
                - output{indices}.gen: final geometries for each index
                
            slabEnergy: energy of slab
            adsorbateEnergy: energy of the adsorbate in the system

        Returns:
            output: pd Dataframe with:
                - index: indices for runs that worked
                - geometry: final geometry of run
                - total energy: raw energy from file
                - adsorption energy: energy as adjusted by adsorbate_energy
    """
    geometries = {}
    if shallow:
        pattern = r"{}(\d+).gen".format(name)
    else:
        pattern = r"{}(\d+-\d+).gen".format(name)
    files = os.listdir(datadir) 

    if "energies" in files and "convergence" in files:
        convergence = pd.read_csv(datadir + "convergence", header = None)
        energies = pd.read_csv(datadir + "energies", header = None)
        output =  pd.concat([energies, convergence], axis = 1)
        output.columns = ["E", "E_ads", "conv"]

        for i in files:
            key = re.search(pattern, i)
            if key:
                if shallow:
                    key = int(key.group(1))
                else:
                    key = key.group(1)
                geometries[key] =  gen.read_gen(datadir + i)
        output['geom'] = pd.Series(geometries)

        output = output[output['conv'] == "Geometry converged"]
        output = output.drop("conv", axis = 1)

    else:
        for i in files:
            key = re.search(pattern, i)
            if key:
                if shallow:
                    key = int(key.group(1))
                else:
                    key = key.group(1)
                geometries[key] =  gen.read_gen(datadir + i)
        output = pd.DataFrame(pd.Series(geometries))
        output.columns = ['geom']
    return output

[docs]def readData(outdir, indir, useRemoval = True, useFrags = True, useBonds = True, wrapStructs = True):
    """
    Specifically made for data with BOTH input and output structures
    Read structures; append removal data, fragment data, and bond data optionally
    Wrap structures optionally

    Returns:
        df of the above with columns (struct, in), (struct, out), 
        optionally, (frags, <fragnames>), (bonds, <bondnames>), (removal, <removedelems>)
        where each <x> corresponds to a column label for a specific subgroup (ie, NH3, N-H, or NH3 resp.)
    """

    # read structures, initialize df

    data = pd.concat([readStructs(indir, shallow = False, name = 'input'),
                         readStructs(outdir, shallow = False)
                        ], axis = 1)
    data.columns = ['in', 'out']
    for i in data['in']:
        i.wrap()

    if useRemoval:
        bombdata, quenchdata, eqdata = postprocessResults(outdir)

        newdata = {}
        for idx in data.index:
            i, j = idx.split("-") # for i-j type output
            i = int(i)
            newdata[idx] = data.loc[idx].append(bombdata[i][j])
        data = pd.DataFrame(newdata).T
        ncols = data.shape[1]
        data.columns = pd.MultiIndex.from_arrays(
            [["struct"] * 2 + ["removal"] * (ncols - 2), data.columns],
            names = ['source', 'data']
        )
    else:
        data.columns = pd.MultiIndex.from_arrays(
            [["struct"] * 2, data.columns],
            names = ['source', 'data']
        )


    if useFrags:
        infrags = pd.read_csv(indir+"fragdata.csv", index_col=0)
        outfrags = pd.read_csv(outdir+"fragdata.csv", index_col=0)

        fragdiffs = outfrags.subtract(infrags, fill_value = 0)
        ncols = fragdiffs.shape[1]
        fragdiffs.columns = pd.MultiIndex.from_arrays(
            [['frags'] * ncols, fragdiffs.columns], names = ['source', 'data']
        )

        data = pd.concat([data, fragdiffs], axis = 1)


    if useBonds:
        outbonds = pd.read_csv(outdir + "bondcounts.csv", index_col=0)

        inbonds = pd.read_csv(indir + "bondcounts.csv", index_col=0)

        bondDiffs = outbonds.subtract(inbonds)

        ncols = bondDiffs.shape[1]
        bondDiffs.columns = pd.MultiIndex.from_arrays(
            [['bonds'] * ncols, bondDiffs.columns], names = ['source', 'data']
        )

        data = pd.concat([data, bondDiffs], axis = 1)
    return data

[docs]def calculateVASPtemp(velos, nfixed, POSCAR):
    """
    Take in a 3N matrix of velocities, subtract of nfixed from DOF
    POSCAR (Atoms object) used for getting masses from atoms
    Return T as calculated by VASP
    """
    masses = [atom.mass for atom in POSCAR]
    KE = 0
    dof = len(velos) * 3 - nfixed   #natoms * 3 - (fixed atoms) * 3
    for i, v in enumerate(velos):
        vx, vy, vz = v
        m = masses[i]
        v = np.sqrt(vx**2 + vy**2  + vz**2)
        KE += 1/2 * m * v**2
    KE *= 103.642697 # amu * Å^2/fs^2 to eV conversion
    return 2 * KE/kb_evK/dof

#############
# constants #
#############
amu2g = 1.66054e-24 #multiply by this
a2cm = 1e-8 # multiply by this
kb_evK = 8.617333262145E-5 #ev/K