from ase.neighborlist import NewPrimitiveNeighborList, natural_cutoffs
from utils import readStructs
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
[docs]def coordLabeller(atoms, fullCoordinations = {"Si": 4, "N":3, "H":1, "F": 1},
minAngles = {"Si": 90, "N": 109.5, "H": 360, "F": 360}, # 360 for atoms with max 1 bond
maxBonds_per_element = {"Si": 6, "N":4, "H":1, "F":1},
angle_tolerance = 0.15, #tolerance for valid bond angles
bond_tolerance = 0.25, #tolerance for valid bond angles
minz = 0 #minimum height above which to compute
):
"""
Takes a structure, returns two dictionaries, the keys of which are identical:
the index of the atom for which the statistic is calculated
relativeCoordinations: -1 if atom i is undercoordinated, 1 if overcoordinated, 0 else
bonds: list of bonds for atom i
Defaults for angle and bond tolerance based on amorphous SiN
"""
nl = NewPrimitiveNeighborList(
cutoffs = np.array(natural_cutoffs(atoms)) * (bond_tolerance + 1),
bothways = True,
self_interaction = False)
nl.build(pbc = [True, True, False] , cell = atoms.cell, positions = atoms.positions)
coordinations = {}
relativeCoordinations = {}
newBonds = {}
for atom in atoms:
idx = atom.index
if atom.symbol == 'Ar' or atom.position[2] < minz:
coordinations[idx] = 0
relativeCoordinations[idx] = 0
newBonds[idx] = []
continue
minAngle = minAngles[atom.symbol] * (1 - angle_tolerance) # set minimum required angle to keep
bonds = nl.get_neighbors(idx)[0]
maxBonds = maxBonds_per_element[atom.symbol]
bonds = sorted(bonds,
key = lambda b: atoms.get_distance(idx, b, mic = True),
reverse = False) #sort bonds list in ascending order of bond length
keptBonds = set()
skipBonds = set()
if len(bonds) == 0:
print("No bonds detected for atom %d" % idx)
else:
keptBonds.add(bonds[0])
if len(bonds) > 1:
for i, bond1 in enumerate(bonds[1:]): # iterate over every detected bond except shortest
angles = np.array([])
for j, bond2 in enumerate(keptBonds): # compare to bonds we've already seen
angles = np.append(angles, atoms.get_angle(bond1, idx, bond2, mic = True))
if np.all(angles > minAngle):
# keep if the angle of the new bond is large enough
# wrt the bonds we've decided to keep
keptBonds.add(bond1)
coordinations[idx] = len(keptBonds)
if coordinations[idx] < fullCoordinations[atom.symbol]:
relativeCoordinations[idx] = -1
elif coordinations[idx] > fullCoordinations[atom.symbol]:
relativeCoordinations[idx] = 1
else:
relativeCoordinations[idx] = 0
newBonds[idx] = keptBonds
return relativeCoordinations, newBonds
[docs]def analyzeFragments(datadir, **kwargs):
"""
Pass in `name` and `shallow` kwargs if needed for utils.readStruct function
"""
geometries = readStructs(datadir, **kwargs)
analyses = {key: Analysis(item) for key, item in geometries.items()}
analyses = pd.Series(analyses)
#####################
### fragmentation ###
#####################
fragmentLists = []
for struct in geometries:
adjmat = Analysis(struct).adjacency_matrix[0]
numnodes = adjmat.shape[0]
g = Graph(numnodes)
for i in range(numnodes):
for j in range(numnodes):
if adjmat[i,j]:
g.addEdge(i,j)
cc = g.connectedComponents()
isSmallgraph = np.array([len(i) for i in cc]) < 10
smallgraphs = []
for i, subgraph in enumerate(cc):
if isSmallgraph[i]:
smallgraphs += [struct[[atom.index for atom in struct if atom.index in subgraph]]]
fragmentLists += [smallgraphs]
flatten = lambda t: [item for sublist in t for item in sublist]
fragmentTypes = np.unique([i.symbols.get_chemical_formula() for i in flatten(fragmentLists)])
fragdict = {i:j for i, j in zip(geometries.keys(), fragmentLists)}
fragmentData = pd.DataFrame({key: [0] * len(geometries) for key in fragmentTypes})
fragmentData.index = fragdict.keys()
for key, fragmentList in fragdict.items():
for fragment in fragmentList:
_symbol = fragment.symbols.get_chemical_formula()
fragmentData[_symbol].loc[key] += 1
fragmentData.to_csv(datadir + "fragdata.csv")
print(fragmentData.sum(axis = 0))
###################
### bond counts ###
###################
totalbonds = []
bondcounts = {}
e1 = "Si"
e2 = "N"
form = False
for key, analysis in analyses.items():
try:
totalbonds += [len(analysis.get_bonds(e1, e2, unique = True)[0])]
bondcounts[key] = len(analysis.get_bonds(e1, e2, unique = True)[0])
except:
print('error on {}'.format(key))
totalbonds = np.array(totalbonds)
if form:
print('percent runs with {}-{} bond formation = {}'.format(e1, e2, np.sum(totalbonds > 0)/170))
else:
print('average number of final {}-{} bonds = {}'.format(e1, e2, np.sum(totalbonds)/170))
# plt.hist(totalbonds, bins = np.arange(0, np.max(totalbonds) + 1))
# plt.hist(totalbonds, bins = np.arange(5, 14))
if form:
plt.title('distribution of # of {}-{} bonds formed'.format(e1, e2));
else:
plt.title('distribution of # of {}-{} bonds count'.format(e1, e2));
plt.show()
from itertools import combinations
elems = ["Si", "F", "N", "C", "H", "Ar"]
result = {}
for e1, e2 in combinations(elems, 2):
bondcounts = {}
for key, analysis in analyses.items():
bondcounts[key] = len(analysis.get_bonds(e1, e2, unique = True)[0])
bondcounts = pd.Series(bondcounts)
result["{}-{}".format(e1, e2)] = bondcounts
pd.DataFrame(result).to_csv(datadir+"bondcounts.csv")
[docs]def plotBondDistributionComposition(data, a='Si', b='N'):
"""
Requires ``data``, a pd df with 'geom', 'coordlabels', and (optionally) 'wantedIndices'
Gives overall distribution and percent makeup of elements A and B in that distribution
"""
if 'wantedIndices' in data.columns:
sibonds = pd.Series({idx:
[(
np.array([data['geom'][idx][i].symbol for i in value])
) for key, value in
pd.Series(data['coordlabels'][idx][1])[data['wantedIndices'][idx]].items()
]
for idx in data.index
})
else:
sibonds = pd.Series({idx:
[(
np.array([data['geom'][idx][i].symbol for i in value])
) for key, value in
pd.Series(data['coordlabels'][idx][1]).items()
]
for idx in data.index
})
nfrac = np.zeros(7)
sifrac = np.zeros(7)
ndatapoints = np.zeros(7)
for key, item in sibonds.items():
for i in item:
idx = len(i)
ndatapoints[idx] += 1
nfrac[idx] += np.sum(np.array(i) == a)/idx
sifrac[idx] += np.sum(np.array(i) == b)/idx
ndatapoints = np.array([i if i > 0 else 1 for i in ndatapoints])
nfrac /= ndatapoints
sifrac /= ndatapoints
mask = np.array([i > 10 for i in ndatapoints])
nfrac, sifrac, ndatapoints = nfrac[mask], sifrac[mask], ndatapoints[mask]
plt.figure(figsize = (10,5))
plt.hist(
[i for sublist in
pd.Series({idx:
[len(
np.array([data['geom'][idx][i].symbol for i in value])
) for key, value in
pd.Series(data['coordlabels'][idx][1])[data['wantedIndices'][idx]].items()
]
for idx in data.index
})
for i in sublist], bins = np.arange(7), density = True, label = 'total density'
);
plt.plot(np.arange(7)[mask], nfrac, marker = 'o', label = '{} fraction'.format(a))
plt.plot(np.arange(7)[mask], sifrac, marker = 'o', label = '{} fraction'.format(b))
plt.title("Normed hist of bond counts with {}/{} fraction".format(a,b))
plt.legend()
[docs]def getBondSubset(data, elem):
"""
Requires ``data`` pd df that has columns 'geom', 'coordlabels', 'wantedIndices'
Returns pd Series with the subsetted bond counts of (A, elem) where A should be set by the input wantedIndices
"""
result = pd.Series({idx:
[np.sum(
np.array([data['geom'][idx][i].symbol for i in value]) == elem
) for key, value in
pd.Series(data['coordlabels'][idx][1])[data['wantedIndices'][idx]].items()
]
for idx in data.index
})
return result