"""
Module providing functions for reading the input and other related files
"""
from __future__ import print_function, absolute_import, division
import argparse
import io
import os
import re
import six
import sys
from collections import defaultdict, OrderedDict
import numpy as np
import pandas as pd
from ase.io.vasp import read_vasp
from ase.io.trajectory import Trajectory
if sys.version_info.major == 3:
import configparser as cp
else:
import ConfigParser as cp
[docs]def parse_arguments():
"""
Parse the input/config file name from the command line, parse the config
and return the parameters.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"command", choices=("harmonic", "anharmonic"), help="choose what to do"
)
parser.add_argument(
"config", help="file with the configuration parameters for thermo"
)
args = parser.parse_args()
if not os.path.exists(args.config):
raise ValueError("Specified file <{}> does not exist".format(args.config))
defaults = {
"Tinitial": "303.15",
"Tfinal": "303.15",
"Tstep": "0.0",
"pressure": "0.1",
"translations": "true",
"rotations": "true",
"pointgroup": "C1",
"phase": "gas",
"code": None,
}
config = cp.ConfigParser(defaults=defaults, allow_no_value=True)
config.read(args.config)
conditions = {"Tinitial": config.getfloat("conditions", "Tinitial")}
conditions["Tfinal"] = config.getfloat("conditions", "Tfinal")
conditions["Tstep"] = config.getfloat("conditions", "Tstep")
conditions["pressure"] = config.getfloat("conditions", "pressure")
job = {"proj_translations": config.getboolean("job", "translations")}
job["proj_rotations"] = config.getboolean("job", "rotations")
job["code"] = config.get("job", "code")
system = {"phase": config.get("system", "phase")}
system["pointgroup"] = config.get("system", "pointgroup")
system["symmetrynumber"] = get_symmetry_number(system["pointgroup"])
return args, conditions, job, system
[docs]def get_symmetry_number(pointgroup):
"""
Return the symmetry number for a given point group
.. seealso::
C. J. Cramer, `Essentials of Computational Chemistry, Theories
and Models`, 2nd Edition, p. 363
Parameters
----------
pointgroup : str
Symbol of the point group
"""
symmetrynumbers = {
"Ci": 1,
"Cs": 1,
"Coov": 1,
"Dooh": 2,
"T": 12,
"Td": 12,
"Oh": 24,
"Ih": 60,
}
cpatt = re.compile(r"C(\d+)[vh]?")
dpatt = re.compile(r"D(\d+)[dh]?")
spatt = re.compile(r"S(\d+)")
if pointgroup in symmetrynumbers:
return symmetrynumbers[pointgroup]
mc = cpatt.match(pointgroup)
md = dpatt.match(pointgroup)
ms = spatt.match(pointgroup)
if mc:
return int(mc.group(1))
elif md:
return 2 * int(md.group(1))
elif ms:
return int(ms.group(1)) // 2
else:
raise ValueError(
'Point group label "{}" unknown, cannot assign '
"a rotational symmetry number".format(pointgroup)
)
[docs]def read_vasp_hessian(
outcar="OUTCAR", symmetrize=True, convert_to_au=True, dof_labels=False
):
"""
Parse the hessian from the VASP ``OUTCAR`` file into a numpy array
Parameters
----------
outcar : str
Name of the VASP output, default is ``OUTCAR``
symmetrize : bool
If ``True`` the hessian will be symmetrized
convert_to_au : bool
If ``True`` convert the hessian to atomic units, in the other
case hessian is returned in [eV/Angstrom**2]
dof_labels : bool, default is False
If ``True`` a list of labels corresponding to the degrees of
freedom will also be returned
Returns
-------
hessian : numpy.array
Hessian matrix
Notes
-----
.. note::
By default VASP prints negative hessian so the elements should be
multiplied by -1 to restore the original hessian, this is done by
default, hessian in the XML file is **NOT** symmetrized by default
"""
from ase.units import Hartree, Bohr
with open(outcar, "r") as foutcar:
for line in foutcar:
if "Degrees of freedom DOF" in line:
dof = int(line.split()[-1])
hessian = np.zeros((dof, dof), dtype=float)
labels = []
break
for line in foutcar:
if "SECOND DERIVATIVES " in line:
for _ in range(2):
line = next(foutcar)
for i in range(dof):
line = next(foutcar)
linesplit = line.split()
labels.append(linesplit[0])
hessian[i] = [float(x) for x in linesplit[1:]]
break
else:
raise ValueError("No hessian found in file: {}".format(outcar))
if symmetrize:
hessian = (hessian + hessian.T) * 0.5
if convert_to_au:
hessian = hessian * Bohr ** 2 / Hartree
if dof_labels:
pat = re.compile(r"(\d+)([XYZ])")
return -1.0 * hessian, [pat.match(x).groups() for x in labels]
else:
return -1.0 * hessian
[docs]def read_vasp_hessian_xml(xml="vasprun.xml", convert_to_au=True, stripmass=True):
"""
Parse the hessian from the VASP ``vasprun.xml`` file into a numpy array
Parameters
----------
xml : str
Name of the VASP output, default is ``vasprun.xml``
convert_to_au : bool
If ``True`` convert the hessian to atomic units, in the other
case hessian is returned in [eV/Angstrom**2]
dof_labels : bool, default is False
If ``True`` a list of labels corresponding to the degrees of
freedom will also be returned
stripmass : bool
If ``True`` use VASP default masses to transform hessian to
non-mass-weighted form
Returns
-------
hessian : numpy.array
Hessian matrix
Notes
-----
.. note::
By default VASP prints negative hessian so the elements should be
multiplied by -1 to restore the original hessian, this is done by
default, hessian in the XML file is symmetrized by default
"""
from lxml.etree import parse
from ase.units import Hartree, Bohr
doc = parse(xml)
root = doc.getroot()
species = [
entry[0].text.strip()
for entry in root.find("atominfo/array[@name='atoms']/set")
]
natoms = int(root.find("atominfo/atoms").text)
dof = 3 * natoms
constblock = root.find('structure[@name="initialpos"]/varray[@name="selective"]')
if constblock is not None:
# partial hessian calculation
selective = np.ones((natoms, 3), dtype=bool)
for i, v in enumerate(constblock):
for j, fixed in enumerate(v.text.split()):
selective[i, j] = fixed == "T"
index = np.flatnonzero(selective.ravel())
hess_size = index.size
else:
# complete hessian calculation
index = np.arange(dof, dtype=int)
hess_size = dof
hessian = np.zeros((hess_size, hess_size), dtype=float)
for i, v in enumerate(root.find('calculation/dynmat/varray[@name="hessian"]')):
hessian[i] = -np.array([float(val) for val in v.text.split()])
if stripmass:
# get the masses that VASP uses by default
vasp_mass = {
element[1].text.strip(): float(element[2].text)
for element in root.find("atominfo/array[@name='atomtypes']/set")
}
vasp_massvec = np.zeros(hess_size, dtype=float)
for i, j in enumerate(np.floor_divide(index, 3)):
vasp_massvec[i] = vasp_mass[species[j]]
hessian *= np.sqrt(np.outer(vasp_massvec, vasp_massvec))
if convert_to_au:
hessian = hessian * Bohr ** 2 / Hartree
return hessian
[docs]def print_modeinfo(mi, output=None):
"""
Print the vibrational population data
Parameters
----------
mi : pandas.DataFrame
output : str
Name of the file to store the printout, if ``None`` stdout will be used
"""
fobj = open(output, "w") if output is not None else None
fmts = {
"HOfreq": "{:12.6f}".format,
"effective_mass": "{:12.6f}".format,
"displacement": "{:12.6f}".format,
"P_stretch": "{:>8.2%}".format,
"P_bend": "{:>8.2%}".format,
"P_torsion": "{:>8.2%}".format,
"P_longrange": "{:>8.2%}".format,
}
print(mi[mi["vibration"]].to_string(formatters=fmts), file=fobj)
if output is not None:
fobj.close()
[docs]def print_mode_thermo(df, info=False):
"""
After calculating all the anharmonic modes print the per mode themochemical
functions
"""
fmts = {
"freq": "{:12.6f}".format,
"zpve": "{:12.6f}".format,
"qvib": "{:14.6e}".format,
"U": "{:12.6f}".format,
"S": "{:14.6e}".format,
}
# header with the units
header = " {0:>12s} {1:>12s} {2:>14s} {3:>12s} {4:>14s}"
print(header.format("[cm^-1]", "[kJ/mol]", " ", "[kJ/mol]", "[kJ/mol*K]"))
print(df.to_string(formatters=fmts))
if info:
print("INFO codes")
print("-" * 10)
print("OK : Succesfully converged the anharmonic eigenproblem")
print("AGTH : Anharmonic frequency greater than the harmonic")
print("MAXITER : Maximum number of iterations exhausted")
# parsers and write methods for legacy EIGEN_HESS files
[docs]def read_bmatdat():
"""
Read the ``bmat.dat`` file with internal coordiantes and the B matrix
produced by the original ``writeBmat`` code
Returns
-------
internals, Bmatrix : tuple
Internal coordiantes and B matrix
"""
with open("bmat.dat", "r") as fdat:
lines = fdat.readlines()
nint, ndof = (int(x) for x in lines[1].split())
coordlno = next(i for i, line in enumerate(lines) if "Coordinates (au):" in line)
bmatlno = next(i for i, line in enumerate(lines) if "Bmatrix(ij):" in line)
internals = np.array(
[tuple(row.split()[:2]) for row in lines[coordlno + 1 : coordlno + nint + 1]],
dtype=[("type", "S4"), ("value", np.float32)],
)
Bmatrix = np.zeros((nint, ndof), dtype=float)
for line in lines[bmatlno + 1 :]:
i, j, val = line.split()
Bmatrix[int(i) - 1, int(j) - 1] = float(val)
return internals, Bmatrix
[docs]def read_em_freq(fname):
"""
Read the file ``fname`` with the frequencies, reduced masses and fitted
fitted coefficients for the potential into a pandas DataFrame.
Parameters
----------
fname : str
Name of the file with PES
"""
cols = ["type", "freq", "mass", "a0", "a1", "a2", "a3", "a4", "a5", "a6"]
data = pd.read_csv(fname, sep=r"\s+", engine="python", names=cols)
data.set_index(np.arange(1, data.shape[0] + 1), inplace=True)
for col in cols[1:]:
data[col] = data[col].astype(float)
return data
[docs]def read_pes(fname):
"""
Parse the file with the potential energy surface (PES) into a dict of
numpy arrays with mode numbers as keys
Parameters
----------
fname : str
Name of the file with PES
"""
with open(fname, "r") as fobj:
data = fobj.read()
pat = re.compile(" Scan along mode # =\s*(\d+)")
parsed = [x for x in pat.split(data) if x != ""]
it = iter(parsed)
parsed = {
int(mode): np.loadtxt(io.StringIO(six.text_type(pes)))
for mode, pes in zip(it, it)
}
return parsed
[docs]def read_poscars(filename):
"""
Read POSCARs file with the displaced structures and return
an OrderedDict with the Atoms objects
"""
pat = re.compile(r"Mode\s*=\s*(\d+)\s*point\s*=\s*(-?\d+)")
if not os.path.exists(filename):
raise OSError('File "{}" does not exist'.format(filename))
with open(filename, "r") as fdata:
poscars = fdata.read()
parsed = [x for x in pat.split(poscars) if x != " "]
images = OrderedDict()
it = iter(parsed)
for mode, point, geometry in zip(it, it, it):
images[tuple([int(mode) - 1, int(point)])] = read_vasp(
io.StringIO(six.text_type(geometry))
)
return images
[docs]def write_modes(filename="POSCARs"):
"""
Convert a file with multiple geometries representing vibrational modes
in ``POSCAR``/``CONTCAR`` format into trajectory files with modes.
"""
pat = re.compile(r"Mode\s*=\s*(\d+)\s*point\s*=\s*(-?\d+)")
if not os.path.exists(filename):
raise OSError('File "{}" does not exist'.format(filename))
with open(filename, "r") as fdata:
poscars = fdata.read()
parsed = [x for x in pat.split(poscars) if x != " "]
it = iter(parsed)
dd = defaultdict(list)
for i, j, item in zip(it, it, it):
dd[i].append(item)
for mode, geometries in dd.items():
traj = Trajectory("mode.{}.traj".format(mode), "w")
for geometry in geometries:
atoms = read_vasp(io.StringIO(six.text_type(geometry)))
traj.write(atoms)
traj.close()