Source code for analysis

from ase.neighborlist import NewPrimitiveNeighborList, natural_cutoffs
from utils import readStructs
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd


[docs]def coordLabeller(atoms, fullCoordinations = {"Si": 4, "N":3, "H":1, "F": 1}, minAngles = {"Si": 90, "N": 109.5, "H": 360, "F": 360}, # 360 for atoms with max 1 bond maxBonds_per_element = {"Si": 6, "N":4, "H":1, "F":1}, angle_tolerance = 0.15, #tolerance for valid bond angles bond_tolerance = 0.25, #tolerance for valid bond angles minz = 0 #minimum height above which to compute ): """ Takes a structure, returns two dictionaries, the keys of which are identical: the index of the atom for which the statistic is calculated relativeCoordinations: -1 if atom i is undercoordinated, 1 if overcoordinated, 0 else bonds: list of bonds for atom i Defaults for angle and bond tolerance based on amorphous SiN """ nl = NewPrimitiveNeighborList( cutoffs = np.array(natural_cutoffs(atoms)) * (bond_tolerance + 1), bothways = True, self_interaction = False) nl.build(pbc = [True, True, False] , cell = atoms.cell, positions = atoms.positions) coordinations = {} relativeCoordinations = {} newBonds = {} for atom in atoms: idx = atom.index if atom.symbol == 'Ar' or atom.position[2] < minz: coordinations[idx] = 0 relativeCoordinations[idx] = 0 newBonds[idx] = [] continue minAngle = minAngles[atom.symbol] * (1 - angle_tolerance) # set minimum required angle to keep bonds = nl.get_neighbors(idx)[0] maxBonds = maxBonds_per_element[atom.symbol] bonds = sorted(bonds, key = lambda b: atoms.get_distance(idx, b, mic = True), reverse = False) #sort bonds list in ascending order of bond length keptBonds = set() skipBonds = set() if len(bonds) == 0: print("No bonds detected for atom %d" % idx) else: keptBonds.add(bonds[0]) if len(bonds) > 1: for i, bond1 in enumerate(bonds[1:]): # iterate over every detected bond except shortest angles = np.array([]) for j, bond2 in enumerate(keptBonds): # compare to bonds we've already seen angles = np.append(angles, atoms.get_angle(bond1, idx, bond2, mic = True)) if np.all(angles > minAngle): # keep if the angle of the new bond is large enough # wrt the bonds we've decided to keep keptBonds.add(bond1) coordinations[idx] = len(keptBonds) if coordinations[idx] < fullCoordinations[atom.symbol]: relativeCoordinations[idx] = -1 elif coordinations[idx] > fullCoordinations[atom.symbol]: relativeCoordinations[idx] = 1 else: relativeCoordinations[idx] = 0 newBonds[idx] = keptBonds return relativeCoordinations, newBonds
[docs]def analyzeFragments(datadir, **kwargs): """ Pass in `name` and `shallow` kwargs if needed for utils.readStruct function """ geometries = readStructs(datadir, **kwargs) analyses = {key: Analysis(item) for key, item in geometries.items()} analyses = pd.Series(analyses) ##################### ### fragmentation ### ##################### fragmentLists = [] for struct in geometries: adjmat = Analysis(struct).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i,j]: g.addEdge(i,j) cc = g.connectedComponents() isSmallgraph = np.array([len(i) for i in cc]) < 10 smallgraphs = [] for i, subgraph in enumerate(cc): if isSmallgraph[i]: smallgraphs += [struct[[atom.index for atom in struct if atom.index in subgraph]]] fragmentLists += [smallgraphs] flatten = lambda t: [item for sublist in t for item in sublist] fragmentTypes = np.unique([i.symbols.get_chemical_formula() for i in flatten(fragmentLists)]) fragdict = {i:j for i, j in zip(geometries.keys(), fragmentLists)} fragmentData = pd.DataFrame({key: [0] * len(geometries) for key in fragmentTypes}) fragmentData.index = fragdict.keys() for key, fragmentList in fragdict.items(): for fragment in fragmentList: _symbol = fragment.symbols.get_chemical_formula() fragmentData[_symbol].loc[key] += 1 fragmentData.to_csv(datadir + "fragdata.csv") print(fragmentData.sum(axis = 0)) ################### ### bond counts ### ################### totalbonds = [] bondcounts = {} e1 = "Si" e2 = "N" form = False for key, analysis in analyses.items(): try: totalbonds += [len(analysis.get_bonds(e1, e2, unique = True)[0])] bondcounts[key] = len(analysis.get_bonds(e1, e2, unique = True)[0]) except: print('error on {}'.format(key)) totalbonds = np.array(totalbonds) if form: print('percent runs with {}-{} bond formation = {}'.format(e1, e2, np.sum(totalbonds > 0)/170)) else: print('average number of final {}-{} bonds = {}'.format(e1, e2, np.sum(totalbonds)/170)) # plt.hist(totalbonds, bins = np.arange(0, np.max(totalbonds) + 1)) # plt.hist(totalbonds, bins = np.arange(5, 14)) if form: plt.title('distribution of # of {}-{} bonds formed'.format(e1, e2)); else: plt.title('distribution of # of {}-{} bonds count'.format(e1, e2)); plt.show() from itertools import combinations elems = ["Si", "F", "N", "C", "H", "Ar"] result = {} for e1, e2 in combinations(elems, 2): bondcounts = {} for key, analysis in analyses.items(): bondcounts[key] = len(analysis.get_bonds(e1, e2, unique = True)[0]) bondcounts = pd.Series(bondcounts) result["{}-{}".format(e1, e2)] = bondcounts pd.DataFrame(result).to_csv(datadir+"bondcounts.csv")
[docs]def plotBondDistributionComposition(data, a='Si', b='N'): """ Requires ``data``, a pd df with 'geom', 'coordlabels', and (optionally) 'wantedIndices' Gives overall distribution and percent makeup of elements A and B in that distribution """ if 'wantedIndices' in data.columns: sibonds = pd.Series({idx: [( np.array([data['geom'][idx][i].symbol for i in value]) ) for key, value in pd.Series(data['coordlabels'][idx][1])[data['wantedIndices'][idx]].items() ] for idx in data.index }) else: sibonds = pd.Series({idx: [( np.array([data['geom'][idx][i].symbol for i in value]) ) for key, value in pd.Series(data['coordlabels'][idx][1]).items() ] for idx in data.index }) nfrac = np.zeros(7) sifrac = np.zeros(7) ndatapoints = np.zeros(7) for key, item in sibonds.items(): for i in item: idx = len(i) ndatapoints[idx] += 1 nfrac[idx] += np.sum(np.array(i) == a)/idx sifrac[idx] += np.sum(np.array(i) == b)/idx ndatapoints = np.array([i if i > 0 else 1 for i in ndatapoints]) nfrac /= ndatapoints sifrac /= ndatapoints mask = np.array([i > 10 for i in ndatapoints]) nfrac, sifrac, ndatapoints = nfrac[mask], sifrac[mask], ndatapoints[mask] plt.figure(figsize = (10,5)) plt.hist( [i for sublist in pd.Series({idx: [len( np.array([data['geom'][idx][i].symbol for i in value]) ) for key, value in pd.Series(data['coordlabels'][idx][1])[data['wantedIndices'][idx]].items() ] for idx in data.index }) for i in sublist], bins = np.arange(7), density = True, label = 'total density' ); plt.plot(np.arange(7)[mask], nfrac, marker = 'o', label = '{} fraction'.format(a)) plt.plot(np.arange(7)[mask], sifrac, marker = 'o', label = '{} fraction'.format(b)) plt.title("Normed hist of bond counts with {}/{} fraction".format(a,b)) plt.legend()
[docs]def getBondSubset(data, elem): """ Requires ``data`` pd df that has columns 'geom', 'coordlabels', 'wantedIndices' Returns pd Series with the subsetted bond counts of (A, elem) where A should be set by the input wantedIndices """ result = pd.Series({idx: [np.sum( np.array([data['geom'][idx][i].symbol for i in value]) == elem ) for key, value in pd.Series(data['coordlabels'][idx][1])[data['wantedIndices'][idx]].items() ] for idx in data.index }) return result