Table Of Contents

Search

Enter search terms or a module, class or function name.

Source code for panthera.io

"""
Module providing functions for reading the input and other related files
"""

from __future__ import print_function, absolute_import, division

import argparse
import io
import os
import re
import six
import sys

from collections import defaultdict, OrderedDict

import numpy as np
import pandas as pd

from ase.io.vasp import read_vasp
from ase.io.trajectory import Trajectory

if sys.version_info.major == 3:
    import configparser as cp
else:
    import ConfigParser as cp


[docs]def parse_arguments(): """ Parse the input/config file name from the command line, parse the config and return the parameters. """ parser = argparse.ArgumentParser() parser.add_argument( "command", choices=("harmonic", "anharmonic"), help="choose what to do" ) parser.add_argument( "config", help="file with the configuration parameters for thermo" ) args = parser.parse_args() if not os.path.exists(args.config): raise ValueError("Specified file <{}> does not exist".format(args.config)) defaults = { "Tinitial": "303.15", "Tfinal": "303.15", "Tstep": "0.0", "pressure": "0.1", "translations": "true", "rotations": "true", "pointgroup": "C1", "phase": "gas", "code": None, } config = cp.ConfigParser(defaults=defaults, allow_no_value=True) config.read(args.config) conditions = {"Tinitial": config.getfloat("conditions", "Tinitial")} conditions["Tfinal"] = config.getfloat("conditions", "Tfinal") conditions["Tstep"] = config.getfloat("conditions", "Tstep") conditions["pressure"] = config.getfloat("conditions", "pressure") job = {"proj_translations": config.getboolean("job", "translations")} job["proj_rotations"] = config.getboolean("job", "rotations") job["code"] = config.get("job", "code") system = {"phase": config.get("system", "phase")} system["pointgroup"] = config.get("system", "pointgroup") system["symmetrynumber"] = get_symmetry_number(system["pointgroup"]) return args, conditions, job, system
[docs]def get_symmetry_number(pointgroup): """ Return the symmetry number for a given point group .. seealso:: C. J. Cramer, `Essentials of Computational Chemistry, Theories and Models`, 2nd Edition, p. 363 Parameters ---------- pointgroup : str Symbol of the point group """ symmetrynumbers = { "Ci": 1, "Cs": 1, "Coov": 1, "Dooh": 2, "T": 12, "Td": 12, "Oh": 24, "Ih": 60, } cpatt = re.compile(r"C(\d+)[vh]?") dpatt = re.compile(r"D(\d+)[dh]?") spatt = re.compile(r"S(\d+)") if pointgroup in symmetrynumbers: return symmetrynumbers[pointgroup] mc = cpatt.match(pointgroup) md = dpatt.match(pointgroup) ms = spatt.match(pointgroup) if mc: return int(mc.group(1)) elif md: return 2 * int(md.group(1)) elif ms: return int(ms.group(1)) // 2 else: raise ValueError( 'Point group label "{}" unknown, cannot assign ' "a rotational symmetry number".format(pointgroup) )
[docs]def read_vasp_hessian( outcar="OUTCAR", symmetrize=True, convert_to_au=True, dof_labels=False ): """ Parse the hessian from the VASP ``OUTCAR`` file into a numpy array Parameters ---------- outcar : str Name of the VASP output, default is ``OUTCAR`` symmetrize : bool If ``True`` the hessian will be symmetrized convert_to_au : bool If ``True`` convert the hessian to atomic units, in the other case hessian is returned in [eV/Angstrom**2] dof_labels : bool, default is False If ``True`` a list of labels corresponding to the degrees of freedom will also be returned Returns ------- hessian : numpy.array Hessian matrix Notes ----- .. note:: By default VASP prints negative hessian so the elements should be multiplied by -1 to restore the original hessian, this is done by default, hessian in the XML file is **NOT** symmetrized by default """ from ase.units import Hartree, Bohr with open(outcar, "r") as foutcar: for line in foutcar: if "Degrees of freedom DOF" in line: dof = int(line.split()[-1]) hessian = np.zeros((dof, dof), dtype=float) labels = [] break for line in foutcar: if "SECOND DERIVATIVES " in line: for _ in range(2): line = next(foutcar) for i in range(dof): line = next(foutcar) linesplit = line.split() labels.append(linesplit[0]) hessian[i] = [float(x) for x in linesplit[1:]] break else: raise ValueError("No hessian found in file: {}".format(outcar)) if symmetrize: hessian = (hessian + hessian.T) * 0.5 if convert_to_au: hessian = hessian * Bohr ** 2 / Hartree if dof_labels: pat = re.compile(r"(\d+)([XYZ])") return -1.0 * hessian, [pat.match(x).groups() for x in labels] else: return -1.0 * hessian
[docs]def read_vasp_hessian_xml(xml="vasprun.xml", convert_to_au=True, stripmass=True): """ Parse the hessian from the VASP ``vasprun.xml`` file into a numpy array Parameters ---------- xml : str Name of the VASP output, default is ``vasprun.xml`` convert_to_au : bool If ``True`` convert the hessian to atomic units, in the other case hessian is returned in [eV/Angstrom**2] dof_labels : bool, default is False If ``True`` a list of labels corresponding to the degrees of freedom will also be returned stripmass : bool If ``True`` use VASP default masses to transform hessian to non-mass-weighted form Returns ------- hessian : numpy.array Hessian matrix Notes ----- .. note:: By default VASP prints negative hessian so the elements should be multiplied by -1 to restore the original hessian, this is done by default, hessian in the XML file is symmetrized by default """ from lxml.etree import parse from ase.units import Hartree, Bohr doc = parse(xml) root = doc.getroot() species = [ entry[0].text.strip() for entry in root.find("atominfo/array[@name='atoms']/set") ] natoms = int(root.find("atominfo/atoms").text) dof = 3 * natoms constblock = root.find('structure[@name="initialpos"]/varray[@name="selective"]') if constblock is not None: # partial hessian calculation selective = np.ones((natoms, 3), dtype=bool) for i, v in enumerate(constblock): for j, fixed in enumerate(v.text.split()): selective[i, j] = fixed == "T" index = np.flatnonzero(selective.ravel()) hess_size = index.size else: # complete hessian calculation index = np.arange(dof, dtype=int) hess_size = dof hessian = np.zeros((hess_size, hess_size), dtype=float) for i, v in enumerate(root.find('calculation/dynmat/varray[@name="hessian"]')): hessian[i] = -np.array([float(val) for val in v.text.split()]) if stripmass: # get the masses that VASP uses by default vasp_mass = { element[1].text.strip(): float(element[2].text) for element in root.find("atominfo/array[@name='atomtypes']/set") } vasp_massvec = np.zeros(hess_size, dtype=float) for i, j in enumerate(np.floor_divide(index, 3)): vasp_massvec[i] = vasp_mass[species[j]] hessian *= np.sqrt(np.outer(vasp_massvec, vasp_massvec)) if convert_to_au: hessian = hessian * Bohr ** 2 / Hartree return hessian
# parsers and write methods for legacy EIGEN_HESS files
[docs]def read_bmatdat(): """ Read the ``bmat.dat`` file with internal coordiantes and the B matrix produced by the original ``writeBmat`` code Returns ------- internals, Bmatrix : tuple Internal coordiantes and B matrix """ with open("bmat.dat", "r") as fdat: lines = fdat.readlines() nint, ndof = (int(x) for x in lines[1].split()) coordlno = next(i for i, line in enumerate(lines) if "Coordinates (au):" in line) bmatlno = next(i for i, line in enumerate(lines) if "Bmatrix(ij):" in line) internals = np.array( [tuple(row.split()[:2]) for row in lines[coordlno + 1 : coordlno + nint + 1]], dtype=[("type", "S4"), ("value", np.float32)], ) Bmatrix = np.zeros((nint, ndof), dtype=float) for line in lines[bmatlno + 1 :]: i, j, val = line.split() Bmatrix[int(i) - 1, int(j) - 1] = float(val) return internals, Bmatrix
[docs]def read_em_freq(fname): """ Read the file ``fname`` with the frequencies, reduced masses and fitted fitted coefficients for the potential into a pandas DataFrame. Parameters ---------- fname : str Name of the file with PES """ cols = ["type", "freq", "mass", "a0", "a1", "a2", "a3", "a4", "a5", "a6"] data = pd.read_csv(fname, sep=r"\s+", engine="python", names=cols) data.set_index(np.arange(1, data.shape[0] + 1), inplace=True) for col in cols[1:]: data[col] = data[col].astype(float) return data
[docs]def read_pes(fname): """ Parse the file with the potential energy surface (PES) into a dict of numpy arrays with mode numbers as keys Parameters ---------- fname : str Name of the file with PES """ with open(fname, "r") as fobj: data = fobj.read() pat = re.compile(" Scan along mode # =\s*(\d+)") parsed = [x for x in pat.split(data) if x != ""] it = iter(parsed) parsed = { int(mode): np.loadtxt(io.StringIO(six.text_type(pes))) for mode, pes in zip(it, it) } return parsed
[docs]def read_poscars(filename): """ Read POSCARs file with the displaced structures and return an OrderedDict with the Atoms objects """ pat = re.compile(r"Mode\s*=\s*(\d+)\s*point\s*=\s*(-?\d+)") if not os.path.exists(filename): raise OSError('File "{}" does not exist'.format(filename)) with open(filename, "r") as fdata: poscars = fdata.read() parsed = [x for x in pat.split(poscars) if x != " "] images = OrderedDict() it = iter(parsed) for mode, point, geometry in zip(it, it, it): images[tuple([int(mode) - 1, int(point)])] = read_vasp( io.StringIO(six.text_type(geometry)) ) return images
[docs]def write_modes(filename="POSCARs"): """ Convert a file with multiple geometries representing vibrational modes in ``POSCAR``/``CONTCAR`` format into trajectory files with modes. """ pat = re.compile(r"Mode\s*=\s*(\d+)\s*point\s*=\s*(-?\d+)") if not os.path.exists(filename): raise OSError('File "{}" does not exist'.format(filename)) with open(filename, "r") as fdata: poscars = fdata.read() parsed = [x for x in pat.split(poscars) if x != " "] it = iter(parsed) dd = defaultdict(list) for i, j, item in zip(it, it, it): dd[i].append(item) for mode, geometries in dd.items(): traj = Trajectory("mode.{}.traj".format(mode), "w") for geometry in geometries: atoms = read_vasp(io.StringIO(six.text_type(geometry))) traj.write(atoms) traj.close()