diff --git a/src/mpmorph/analysis/diffusion.py b/src/mpmorph/analysis/diffusion.py index 879572ad..bb06bd9a 100644 --- a/src/mpmorph/analysis/diffusion.py +++ b/src/mpmorph/analysis/diffusion.py @@ -290,7 +290,7 @@ def plot(self, title=None, annotate=True, el="", **kwargs): self.y, yerr=self.yerr.T, label="Q[{}]: ".format(el) + tx + " K", - **kwargs + **kwargs, ) plt.ylabel("ln(D cm$^2$/s)", fontsize=15) plt.xlabel("1000/T K$^{-1}$", fontsize=15) diff --git a/src/mpmorph/analysis/melting_points.py b/src/mpmorph/analysis/melting_points.py index 2b42e97f..30cfcbec 100644 --- a/src/mpmorph/analysis/melting_points.py +++ b/src/mpmorph/analysis/melting_points.py @@ -1,73 +1,334 @@ import numpy as np from scipy.stats import linregress import matplotlib.pyplot as plt - +from sklearn.cluster import AgglomerativeClustering +from sklearn.metrics import mean_squared_error +import numpy as np +import matplotlib.pyplot as plt import math +from abc import ABC, abstractmethod +import numpy.polynomial.polynomial as poly +from numpy.polynomial import Polynomial as P -class MeltingPointAnalyzer(): +class AbstractMeltingPointEstimator(ABC): - def split_dset(self, pts, split_idx): - return pts[0:split_idx], pts[split_idx:] + def plot(self, ts, vs, plot_title=None): + fig, axs = self._plot_ts_vs(ts, vs) + Tm = self.estimate(ts, vs) + axs.plot([Tm, Tm], [min(vs), max(vs)], color="r") - def get_split_fit(self, xs, ys, split_idx): - leftx, rightx = self.split_dset(xs, split_idx) - lefty, righty = self.split_dset(ys, split_idx) + if plot_title is None: + axs.set_title("Volume vs Temperature by Polynomial Fit") + else: + axs.set_title(plot_title) - leftfit = linregress(leftx, lefty) - lefterr = leftfit.stderr + return fig, axs + + @abstractmethod + def estimate(self, temps, vols): + pass + + def _plot_ts_vs(self, ts, vs): + fig, axs = plt.subplots() + axs.scatter(ts, vs) + axs.set_xlabel("Temperature (K)") + axs.set_ylabel("Volume (A^3)") + return fig, axs + +class MeltingPointEnsembleEstimator(AbstractMeltingPointEstimator): + + def __init__(self, estimators): + self._estimators = estimators + + def estimate(self, temps, vols): + tm_estimates = [e.estimate(temps, vols) for e in self._estimators] + return np.mean(tm_estimates) + + def plot(self, ts, vs, plot_title = None): + _, axs = self._plot_ts_vs(ts, vs) + tm = self.estimate(ts, vs) + axs.plot([tm, tm], [np.min(vs), np.max(vs)]) + + def plot_all_estimates(self, ts, vs): + _, axs = self._plot_ts_vs(ts, vs) + for e in self._estimators: + tm = e.estimate(ts, vs) + axs.plot([tm, tm], [np.min(vs), np.max(vs)], label=e.name) + + avg = self.estimate(ts, vs) + axs.plot([avg , avg ], [np.min(vs), np.max(vs)], label="Mean Estimate") + axs.set_title("Ensemble Tm Estimates") + axs.legend() + + + +class MeltingPointClusterEstimator(AbstractMeltingPointEstimator): + + name: str = "Clustering" + + def plot_clusters(self, ts, vs, plot_title=None): + points = np.array(list(zip(ts, vs))) + cluster1, cluster2 = self._get_clusters(points) + fig, axs = plt.subplots() + axs.scatter(*cluster1) + axs.scatter(*cluster2) + axs.set_xlabel("Temperature (K)") + axs.set_ylabel("Volume (A^3)") + Tm = self.estimate(ts, vs) + axs.plot([Tm, Tm], [min(vs), max(vs)], color="r") + + if plot_title is None: + axs.title("Volume vs Temperature by Clustering") + else: + axs.title(plot_title) - rightfit = linregress(rightx, righty) - righterr = rightfit.stderr + return fig, axs + + def estimate(self, temps, vols): + points = np.array(list(zip(temps, vols))) + cluster1, cluster2 = self._get_clusters(points) + if min(cluster1[0]) < min(cluster2[0]): + solid_range = cluster1[0] + liquid_range = cluster2[0] + else: + solid_range = cluster2[0] + liquid_range = cluster1[0] + + return np.mean([max(solid_range), min(liquid_range)]) + + def _get_clusters(self, points): + clustering = AgglomerativeClustering(n_clusters=2).fit(points) + cluster1 = points[np.argwhere(clustering.labels_ == 1).squeeze()].T + cluster2 = points[np.argwhere(clustering.labels_ == 0).squeeze()].T + return cluster1, cluster2 + +class MeltingPointTrisectionEstimator(AbstractMeltingPointEstimator): + + def _get_fit_error_total(self, xs, ys): + slope, intercept, r_value, p_value, std_err = linregress(xs, ys) + y_pred = intercept + slope * np.array(xs) + err = np.sum(np.abs(y_pred - ys)) + return slope, intercept, err + + def _unzip_pts(self, pts): + ts = [pt[0] for pt in pts] + vs = [pt[1] for pt in pts] + return ts, vs + + def _plot_pts(self, pts): + ts, vs = self._unzip_pts(pts) + plt.scatter(ts,vs, color='grey') + + + def _split_pts(self, pts, x1, x2): + set1 = [pt for pt in pts if pt[0] < x1] + set2 = [pt for pt in pts if pt[0] > x1 and pt[0] < x2] + set3 = [pt for pt in pts if pt[0] > x2] + return set1, set2, set3 + + def _plot_split(self, pts, x1, x2): + set1, set2, set3 = self._plit_pts(pts, x1, x2) + self._plot_pts(set1) + self._plot_pts(set2) + self._plot_pts(set3) + + def _get_linear_ys(self, m, b, xs): + return [m * x + b for x in xs] + + def _plot_fit_line(self, m, b, xs): + fit_ys = self._get_linear_ys(m, b, xs) + plt.plot(xs, fit_ys) + + def _plot_fits(self, pts, x1, x2): + set1, set2, set3 = self._split_pts(pts, x1, x2) + for dset in [set1, set2, set3]: + xs, ys = self._unzip_pts(dset) + m, b, err = self._get_fit_error_total(xs, ys) + self._plot_pts(dset) + self._plot_fit_line(m, b, xs) + + + def _find_best_trisection(self, points, min_x = None, max_x = None, min_window_size = 100, step_size = 50): + lowest_err = math.inf - combined_err = math.sqrt(lefterr ** 2 + righterr ** 2) - combined_err = lefterr + righterr - return leftfit.slope, leftfit.intercept, rightfit.slope, rightfit.intercept, combined_err + xs, ys = self._unzip_pts(points) + if min_x is None: + min_x = math.floor(np.min(xs)) + if max_x is None: + max_x = math.ceil(np.max(xs)) + + for pt1 in range(min_x + min_window_size, max_x - 2 * min_window_size, step_size): + for pt2 in range(pt1 + min_window_size, max_x - min_window_size, step_size): + set1, set2, set3 = self._split_pts(points, pt1, pt2) + errs_total = 0 + errs = [] + try: + for dset in [set1, set2, set3]: + xs, ys = self._unzip_pts(dset) + m, b, err = self._get_fit_error_total(xs, ys) + errs.append(err) + errs_total += err ** 2 + + errs_total = math.sqrt(errs_total) + + if errs_total < lowest_err: + lowest_err = errs_total + best_pt1 = pt1 + best_pt2 = pt2 + except: + print("problem encountered") - def assess_splits(self, xs, ys): + return best_pt1, best_pt2 + + def _estimate_melting_pt(self, points): + pt1, pt2 = self._find_best_trisection(points, step_size=100) + # print(f'Coarse guess: {pt1, pt2}') + pt1, pt2 = self._find_best_trisection(points, pt1 - 150, pt2 + 150, step_size = 5) + # print(f'Fine guess: {pt1, pt2}') + return pt1, pt2, (pt2 - pt1) / 2 + pt1 + + def estimate(self, temps, vols): + pts = list(zip(temps, vols)) + pt1, pt2, tm = self._estimate_melting_pt(pts) + return tm + + def plot(self, ts, vs, plot_title=None): + pts = list(zip(ts, vs)) + pt1, pt2, tm = self._estimate_melting_pt(pts) + self._plot_fits(pts, pt1, pt2) + + +class MeltingPointBisectionEstimator(AbstractMeltingPointEstimator): + + def plot_best_split(self, temps, vols): + split_idx = self.get_best_split(temps, vols) + fig, axs = self._plot_split(temps, vols, split_idx) + Tm = self.estimate(temps, vols) + axs.plot([Tm, Tm], [min(vols), max(vols)], color="r") + return fig, axs + + def estimate(self, temps, vols): + best_split_idx = self.get_best_split(temps, vols) + return np.mean([temps[best_split_idx], temps[best_split_idx - 1]]) + + def _split_dset(self, pts, split_idx): + return pts[0:split_idx], pts[split_idx:] + + def _assess_splits(self, xs, ys): dset_size = len(xs) buffer = max(round(dset_size / 10), 3) pt_idxs = list(range(buffer + 1, len(xs) - buffer - 1)) errs = [] for idx in pt_idxs: - _, _, _, _, total_err = self.get_split_fit(xs, ys, idx) + _, _, _, _, total_err = self._get_split_fit(xs, ys, idx) errs.append(total_err) - + return list(zip(pt_idxs, errs)) - def get_linear_ys(self, m, b, xs): + def _get_linear_ys(self, m, b, xs): return [m * x + b for x in xs] - def plot_split(self, xs, ys, split_idx): - m1, b1, m2, b2, _ = self.get_split_fit(xs, ys, split_idx) - leftxs, rightxs = self.split_dset(xs, split_idx) - leftys, rightys = self.split_dset(ys, split_idx) + def _plot_split(self, xs, ys, split_idx): + m1, b1, m2, b2, _ = self._get_split_fit(xs, ys, split_idx) + leftxs, rightxs = self._split_dset(xs, split_idx) + leftys, rightys = self._split_dset(ys, split_idx) - left_fit_ys = self.get_linear_ys(m1, b1, leftxs) + fig, axs = plt.subplots() - plt.scatter(leftxs, leftys) - plt.plot(leftxs, left_fit_ys) - plt.title("Volume vs Temperature (w/ best fits)") - plt.xlabel("Temperature (K)") - plt.ylabel("Equil. Volume (cubic Angstroms)") + left_fit_ys = self._get_linear_ys(m1, b1, leftxs) + axs.scatter(leftxs, leftys) + axs.plot(leftxs, left_fit_ys) - right_fit_ys = self.get_linear_ys(m2, b2, rightxs) - plt.scatter(rightxs, rightys) - plt.plot(rightxs, right_fit_ys) - + right_fit_ys = self._get_linear_ys(m2, b2, rightxs) + axs.scatter(rightxs, rightys) + axs.plot(rightxs, right_fit_ys) + + axs.title("Volume vs Temperature (w/ best fits by Slope Method") + axs.xlabel("Temperature (K)") + axs.ylabel("Equil. Volume (cubic Angstroms)") + return fig, axs + def get_best_split(self, xs, ys): - split_errs = self.assess_splits(xs, ys) + split_errs = self._assess_splits(xs, ys) errs = [pt[1] for pt in split_errs] idxs = [pt[0] for pt in split_errs] best_split_idx = idxs[np.argmin(errs)] return best_split_idx - - def plot_vol_vs_temp(self, temps, vols): - split_idx = self.get_best_split(temps, vols) - self.plot_split(temps, vols, split_idx) - def estimate_melting_temp(self, temps, vols): - best_split_idx = self.get_best_split(temps, vols) - return temps[best_split_idx] \ No newline at end of file + +class MeltingPointSlopeRMSEEstimator(MeltingPointBisectionEstimator): + + name: str = "RMSE Bisection" + + def _get_fit_error(self, xs, ys): + slope, intercept, r_value, p_value, std_err = linregress(xs, ys) + y_pred = intercept + slope * np.array(xs) + err = mean_squared_error(y_true=ys, y_pred=y_pred, squared=False) + return slope, intercept, err + + def _get_split_fit(self, xs, ys, split_idx): + leftx, rightx = self._split_dset(xs, split_idx) + lefty, righty = self._split_dset(ys, split_idx) + + lslope, lintercept, r_value, p_value, std_err = linregress(leftx, lefty) + left_y_pred = lintercept + lslope * np.array(leftx) + lefterr = mean_squared_error(y_true=lefty, y_pred=left_y_pred, squared=False) + + rslope, rintercept, r_value, p_value, std_err = linregress(rightx, righty) + right_y_pred = rintercept + rslope * np.array(rightx) + righterr = mean_squared_error(y_true=righty, y_pred=right_y_pred, squared=False) + + combined_err = math.sqrt(lefterr**2 + righterr**2) + combined_err = lefterr + righterr + return lslope, lintercept, rslope, rintercept, combined_err + + +class MeltingPointSlopeStdErrEstimator(MeltingPointBisectionEstimator): + + name: str = "StdErr Bisection" + + def _get_split_fit(self, xs, ys, split_idx): + leftx, rightx = self._split_dset(xs, split_idx) + lefty, righty = self._split_dset(ys, split_idx) + + leftfit = linregress(leftx, lefty) + lefterr = leftfit.stderr + + rightfit = linregress(rightx, righty) + righterr = rightfit.stderr + + combined_err = math.sqrt(lefterr**2 + righterr**2) + combined_err = lefterr + righterr + return ( + leftfit.slope, + leftfit.intercept, + rightfit.slope, + rightfit.intercept, + combined_err, + ) + +class MeltingPointPolyFitEstimator(AbstractMeltingPointEstimator): + + name: str = "Polynomial Fit" + + def plot_fit(self, ts, vs, plot_title=None): + fig, axs = super().plot(ts, vs, plot_title=plot_title) + p = self._polyfit(ts, vs) + fit_ts = p(vs) + axs.plot(fit_ts, vs, color='r') + return fig, axs + + def estimate(self, temps, vols): + p = self._polyfit(temps, vols) + second = p.deriv(2) + melting_v = second.roots()[0] + melting_t = p(melting_v) + return melting_t + + def _polyfit(self, temps, vols): + coefs = poly.polyfit(vols, temps, 3) + p = P(coefs) + return p \ No newline at end of file diff --git a/src/mpmorph/analysis/structural_analysis.py b/src/mpmorph/analysis/structural_analysis.py index 2becc70f..ef2884cf 100644 --- a/src/mpmorph/analysis/structural_analysis.py +++ b/src/mpmorph/analysis/structural_analysis.py @@ -47,7 +47,6 @@ def polyhedra_connectivity(structures, pair, cutoff, step_freq=1): polyhedra_list.append(set(current_poly)) for polypair in itertools.combinations(polyhedra_list, 2): - polyhedra_pair_type = (len(polypair[0]), len(polypair[1])) shared_vertices = len(polypair[0].intersection(polypair[1])) @@ -143,7 +142,6 @@ class BondAngleDistribution(object): """ def __init__(self, structures, cutoffs, step_freq=1): - self.bond_angle_distribution = None self.structures = structures self.step_freq = step_freq @@ -251,7 +249,6 @@ def get_bond_angle_distribution(self): # get all pair combinations of neoghbor sites of i: for p in itertools.combinations(neighbors[i], 2): - # check if pairs are within the defined cutoffs if self._cutoff_type == "dict": if self._check_skip_triplet(s_index, i, p[0][2], p[1][2]): diff --git a/src/mpmorph/database.py b/src/mpmorph/database.py index 20684cba..272fff15 100644 --- a/src/mpmorph/database.py +++ b/src/mpmorph/database.py @@ -30,7 +30,7 @@ def __init__( collection="tasks", user=None, password=None, - **kwargs + **kwargs, ): super(VaspMDCalcDb, self).__init__( host, port, database, collection, user, password, **kwargs @@ -75,7 +75,6 @@ def insert_task( # insert structures at each ionic step into GridFS if parse_ionic_steps and "calcs_reversed" in task_doc: - # Convert from ionic steps dictionary to pymatgen.core.trajectory.Trajectory object ionic_steps_dict = task_doc["calcs_reversed"][0]["output"]["ionic_steps"] time_step = task_doc["input"]["incar"]["POTIM"] diff --git a/src/mpmorph/firetasks/dbtasks.py b/src/mpmorph/firetasks/dbtasks.py index 1e07f82d..0f660f99 100644 --- a/src/mpmorph/firetasks/dbtasks.py +++ b/src/mpmorph/firetasks/dbtasks.py @@ -222,7 +222,6 @@ def load_trajectories_from_gfs(runs, mmdb, gfs_keys=None): trajectory = None for i, (fs_id, fs) in enumerate(gfs_keys): - if fs == "trajectories_fs" or fs == "rebuild_trajectories_fs": # Load stored Trajectory print(fs_id, "is stored in trajectories_fs") diff --git a/src/mpmorph/fireworks/powerups.py b/src/mpmorph/fireworks/powerups.py index 22f16257..4d244c33 100644 --- a/src/mpmorph/fireworks/powerups.py +++ b/src/mpmorph/fireworks/powerups.py @@ -46,7 +46,7 @@ def aggregate_trajectory(fw, **kwargs): def add_cont_structure(fw): prev_struct_task = PreviousStructureTask() insert_i = 2 - for (i, task) in enumerate(fw.tasks): + for i, task in enumerate(fw.tasks): if task.fw_name == "{{atomate.vasp.firetasks.run_calc.RunVaspCustodian}}": insert_i = i break @@ -68,7 +68,7 @@ def add_pass_pv(fw, **kwargs): def add_pv_volume_rescale(fw): insert_i = 2 - for (i, task) in enumerate(fw.tasks): + for i, task in enumerate(fw.tasks): if task.fw_name == "{{atomate.vasp.firetasks.run_calc.RunVaspCustodian}}": insert_i = i break @@ -80,7 +80,7 @@ def add_pv_volume_rescale(fw): def add_rescale_volume(fw, **kwargs): rsv_task = RescaleVolumeTask(**kwargs) insert_i = 2 - for (i, task) in enumerate(fw.tasks): + for i, task in enumerate(fw.tasks): if task.fw_name == "{{atomate.vasp.firetasks.run_calc.RunVaspCustodian}}": insert_i = i break diff --git a/src/mpmorph/flows/md_flow.py b/src/mpmorph/flows/md_flow.py index e7bbc016..376fbf89 100644 --- a/src/mpmorph/flows/md_flow.py +++ b/src/mpmorph/flows/md_flow.py @@ -3,6 +3,7 @@ from mpmorph.jobs.core import M3GNetMDMaker from mpmorph.jobs.equilibrate_volume import EquilibriumVolumeSearchMaker +from mpmorph.jobs.lammps_volume import LammpsCalcMaker from pymatgen.core.structure import Structure from mpmorph.jobs.pv_from_calc import PVFromCalc, PVFromM3GNet, PVFromVasp @@ -11,11 +12,17 @@ EQUILIBRATE_VOLUME_FLOW = "EQUILIBRATE_VOLUME_FLOW" M3GNET_MD_FLOW = "M3GNET_MD_FLOW" M3GNET_MD_CONVERGED_VOL_FLOW = "M3GNET_MD_CONVERGED_VOL_FLOW" +LAMMPS_VOL_FLOW = "LAMMPS_VOL_FLOW" -def get_md_flow_m3gnet(structure, temp, steps, converge_first = True, initial_vol_scale = 1): + +# def get_md_temperature_sweeping(structure, temp, steps, converge_first = True, initial_vol_scale = 1, **input_kwargs): + + +def get_md_flow_m3gnet(structure, temp, steps, converge_first = True, initial_vol_scale = 1, **input_kwargs): inputs = M3GNetMDInputs( temperature=temp, - steps=steps + steps=steps, + **input_kwargs ) m3gnet_maker = M3GNetMDMaker(parameters = inputs) @@ -28,6 +35,18 @@ def get_md_flow_m3gnet(structure, temp, steps, converge_first = True, initial_vo initial_vol_scale=initial_vol_scale ) +def get_equil_vol_flow_lammps(structure, + temp, + steps): + vol_maker = LammpsCalcMaker() + vol_job = vol_maker.make( + temp, + steps, + structure + ) + flow = Flow([vol_job], output=vol_job, name=LAMMPS_VOL_FLOW) + return flow + def get_equil_vol_flow(structure, temp, steps): inputs = M3GNetMDInputs( temperature=temp, @@ -76,3 +95,5 @@ def _get_converge_flow(structure: Structure, pv_md_maker: PVFromCalc, production flow = Flow([equil_vol_job, final_md_job], output=final_md_job.output, name=M3GNET_MD_CONVERGED_VOL_FLOW) return flow + + diff --git a/src/mpmorph/flows/vt_flow.py b/src/mpmorph/flows/vt_flow.py index 4b789363..7c1ee123 100644 --- a/src/mpmorph/flows/vt_flow.py +++ b/src/mpmorph/flows/vt_flow.py @@ -1,11 +1,14 @@ from jobflow import Flow, job import json +import uuid -from .md_flow import get_equil_vol_flow +from .md_flow import get_equil_vol_flow, get_equil_vol_flow_lammps from ..jobs.pv_from_calc import m3gnet_calc_to_vol +import pandas as pd VOLUME_TEMPERATURE_SWEEP = "VOLUME_TEMPERATURE_SWEEP" + def get_vt_sweep_flow( structure, lower_bound=100, @@ -14,30 +17,77 @@ def get_vt_sweep_flow( output_name="vt.out", steps=2000, ): - vs = [] volume_jobs = [] temps = list(range(lower_bound, upper_bound, temp_step)) for temp in temps: - job = get_equil_vol_flow( + job = get_equil_vol_flow(structure=structure, temp=temp, steps=steps) + volume_jobs.append(job) + vs.append(job.output.volume) + + collect_job = _collect_vt_results(vs, temps, structure, output_name) + + new_flow = Flow( + [*volume_jobs, collect_job], + output=collect_job.output, + name=VOLUME_TEMPERATURE_SWEEP, + ) + return new_flow + + +def get_vt_sweep_flow_lammps( + structure, + lower_bound=100, + upper_bound=1100, + temp_step=100, + output_name="vt.out", + steps=2000, + mp_id=None, +): + v_outputs = [] + volume_jobs = [] + temps = list(range(lower_bound, upper_bound, temp_step)) + + for temp in temps: + job = get_equil_vol_flow_lammps( structure=structure, temp=temp, - steps=steps + steps=steps, ) volume_jobs.append(job) - vs.append(job.output.volume) + v_outputs.append(job.output.output) - collect_job = _collect_vt_results(vs, temps, structure, output_name) + collect_job = _collect_vt_results(v_outputs, temps, structure, output_name, mp_id) - new_flow = Flow([*volume_jobs, collect_job], output=collect_job.output, name=VOLUME_TEMPERATURE_SWEEP) + flow_name = f"{structure.composition.reduced_formula}-Melting Point" + new_flow = Flow( + [*volume_jobs, collect_job], output=collect_job.output, name=flow_name + ) return new_flow @job -def _collect_vt_results(vs, ts, structure, output_fn): - result = {"structure": structure.as_dict(), "volumes": vs, "temps": ts} +def _collect_vt_results(v_outputs, ts, structure, output_fn, mp_id): + result = { + "structure": structure.as_dict(), + "volumes": [get_converged_vol(v) for v in v_outputs], + "temps": ts, + "mp_id": mp_id, + "reduced_formula": structure.composition.reduced_formula, + "formula": structure.composition.formula, + "uuid": str(uuid.uuid4()), + } with open(output_fn, "+w") as f: f.write(json.dumps(result)) return result + + +def get_converged_vol(v_output): + df = pd.DataFrame.from_dict(v_output) + total_steps = (len(df) - 1) * 10 + avging_window = int(total_steps / 30) + vols = df.iloc[-avging_window::]["vol"] + eq_vol = vols.values.mean() + return float(eq_vol) diff --git a/src/mpmorph/io.py b/src/mpmorph/io.py index d095ecbe..8a43529c 100644 --- a/src/mpmorph/io.py +++ b/src/mpmorph/io.py @@ -17,13 +17,13 @@ def get_string_from_struct( ): format_str = "{{:.{0}f}}".format(significant_figures) - for (si, structure) in enumerate(structures): + for si, structure in enumerate(structures): lines = [system, "1.0", str(structure.lattice)] lines.append(" ".join(self.get_site_symbols(structure))) lines.append(" ".join([str(x) for x in self.get_natoms(structure)])) lines.append("Direct configuration= " + str(si + 1)) - for (i, site) in enumerate(structure): + for i, site in enumerate(structure): coords = site.frac_coords line = " ".join([format_str.format(c) for c in coords]) line += " " + site.species_string @@ -72,9 +72,9 @@ def get_string(self, system="unknown system", significant_figures=6): # positions = np.add(self.trajectory[0].frac_coords, self.trajectory.displacements) atoms = [site.specie.symbol for site in self.trajectory[0]] - for (si, position_array) in enumerate(positions): + for si, position_array in enumerate(positions): lines.append("Direct configuration= " + str(si + 1)) - for (i, coords) in enumerate(position_array): + for i, coords in enumerate(position_array): line = " ".join([format_str.format(c) for c in coords]) line += " " + atoms[i] lines.append(line) diff --git a/src/mpmorph/jobs/core.py b/src/mpmorph/jobs/core.py index 50ef7524..ecb1c828 100644 --- a/src/mpmorph/jobs/core.py +++ b/src/mpmorph/jobs/core.py @@ -38,5 +38,6 @@ def make(self, structure: Structure, **kwargs): """ calc_doc = run_m3gnet(structure, self.parameters, self.name, **kwargs) + return calc_doc diff --git a/src/mpmorph/jobs/equilibrate_volume.py b/src/mpmorph/jobs/equilibrate_volume.py index 646fe719..74533c35 100644 --- a/src/mpmorph/jobs/equilibrate_volume.py +++ b/src/mpmorph/jobs/equilibrate_volume.py @@ -29,7 +29,6 @@ class EquilibriumVolumeSearchMaker(Maker): def make( self, original_structure: Structure, md_pv_data_docs: List[MDPVDataDoc] = None ): - if md_pv_data_docs is not None and len(md_pv_data_docs) > MAX_MD_JOBS: raise RuntimeError( "Maximum number of jobs for equilibrium volume search exceeded" diff --git a/src/mpmorph/jobs/lammps/helpers.py b/src/mpmorph/jobs/lammps/helpers.py new file mode 100644 index 00000000..b5fa38ab --- /dev/null +++ b/src/mpmorph/jobs/lammps/helpers.py @@ -0,0 +1,37 @@ +from ase.io.lammpsrun import read_lammps_dump_text +from pymatgen.io.ase import AseAtomsAdaptor +from pymatgen.core.trajectory import Trajectory +from pymatgen.io.lammps.inputs import LammpsTemplateGen +from pymatgen.io.lammps.data import LammpsData +from subprocess import PIPE, Popen + +def trajectory_from_lammps_dump(dump_path): + with open(dump_path, "r+") as f: + atoms = read_lammps_dump_text(f, index=slice(0, None)) + + structs = [] + + for a in atoms: + structs.append(AseAtomsAdaptor().get_structure(a)) + + return Trajectory.from_structures(structs, constant_lattice=False) + +def run_lammps(structure, template_path, template_opts, lammps_bin): + data_filename: str = "data.lammps" + data = LammpsData.from_structure(structure, atom_style='atomic') + # Write the input files + linp = LammpsTemplateGen().get_input_set(script_template=template_path, + settings=template_opts, + data=data, + data_filename=data_filename) + + linp.write_input(directory=".") + input_name = "in.lammps" + # Run LAMMPS + + lammps_cmd = [lammps_bin, "-in", input_name] + print(f"Running: {' '.join(lammps_cmd)}") + with Popen(lammps_cmd, stdout=PIPE, stderr=PIPE) as p: + (stdout, stderr) = p.communicate() + + print(f"LAMMPS finished running: {stdout} \n {stderr}") \ No newline at end of file diff --git a/src/mpmorph/jobs/lammps/lammps_basic_const_temp.py b/src/mpmorph/jobs/lammps/lammps_basic_const_temp.py new file mode 100644 index 00000000..566fc189 --- /dev/null +++ b/src/mpmorph/jobs/lammps/lammps_basic_const_temp.py @@ -0,0 +1,72 @@ +import os +from jobflow import Maker, job + +import pandas as pd +from pymatgen.core.structure import Structure +from .helpers import run_lammps, trajectory_from_lammps_dump + +from mpmorph.schemas.lammps_calc import LammpsCalc + +from pkg_resources import resource_filename + +class BasicLammpsConstantTempMaker(Maker): + """ + Run LAMMPS directly using m3gnet at a constant temperature. + Required params: + lammsps_cmd (str): lammps command to run sans the input file name. + e.g. 'mpirun -n 4 lmp_mpi' + """ + + name = "LAMMPS_CALCULATION" + + @job(trajectory="trajectory", output_schema=LammpsCalc) + def make(self, temperature: int, ensemble:str, total_steps: int, structure: Structure = None, + barostat:str =None, Pstart:str = None, Pstop:str = None, Pdamp:str = None): + lammps_bin = os.environ.get("LAMMPS_CMD") + m3gnet_path = os.environ.get("M3GNET_PATH") + + chem_sys_str = " ".join(el.symbol for el in structure.composition.elements) + script_options ={"temperature": temperature, + "m3gnet_path": m3gnet_path, + "species": chem_sys_str, + "total_steps": total_steps, + "print_every_n_step": 10} + + if ensemble =='nvt': + script_options.update({ + "ensemble": ensemble + }) + elif ensemble =='npt': + script_options.update({ + "ensemble": ensemble, + "barostat":barostat, + "Pstart":Pstart, + "Pstop":Pstop, + "Pdamp":Pdamp, + } ) + + template_path = resource_filename('mpmorph', 'jobs/lammps/templates/basic_constant_temp.lammps') + + run_lammps(structure, template_path, script_options, lammps_bin) + + trajectory = trajectory_from_lammps_dump("trajectory.lammpstrj") + + df = pd.read_csv( + "step_temp_vol_density.txt", + delimiter=" ", + index_col="step", + skiprows=1, + names=["step", "temp", "vol", "density"], + ) + + metadata = {"temperature": temperature, "total_steps": total_steps} + + output = LammpsCalc( + dir_name=os.getcwd(), + trajectory=trajectory, + composition=structure.composition, + reduced_formula=structure.composition.reduced_formula, + metadata=metadata, + dump_data=df.to_dict(), + ) + return output diff --git a/src/mpmorph/jobs/lammps/lammps_basic_temp_sweep.py b/src/mpmorph/jobs/lammps/lammps_basic_temp_sweep.py new file mode 100644 index 00000000..943fd6a8 --- /dev/null +++ b/src/mpmorph/jobs/lammps/lammps_basic_temp_sweep.py @@ -0,0 +1,65 @@ +import os +from jobflow import Maker, job + +import pandas as pd +from pymatgen.core.structure import Structure + +from mpmorph.schemas.lammps_calc import LammpsCalc +from .helpers import run_lammps, trajectory_from_lammps_dump + +from pkg_resources import resource_filename + +class BasicLammpsTempSweepMaker(Maker): + """ + Run LAMMPS directly using m3gnet sweeping over a range of temperatures. + Required params: + lammsps_cmd (str): lammps command to run sans the input file name. + e.g. 'mpirun -n 4 lmp_mpi' + """ + + name = "LAMMPS_CALCULATION" + + @job(trajectory="trajectory", output_schema=LammpsCalc) + def make(self, temp_initial: int, + temp_final: int, + total_steps: int, + structure: Structure = None): + + lammps_bin = os.environ.get("LAMMPS_CMD") + m3gnet_path = os.environ.get("M3GNET_PATH") + + chem_sys_str = " ".join(el.symbol for el in structure.composition.elements) + + script_options = { + "tempstart": temp_initial, + "tempstop": temp_final, + "m3gnet_path": m3gnet_path, + "species": chem_sys_str, + "total_steps": total_steps, + "print_every_n_step": 10 + } + + + template_path = resource_filename('mpmorph', 'jobs/lammps/templates/basic_temp_sweep.lammps') + + run_lammps(structure, template_path, script_options, lammps_bin) + + trajectory = trajectory_from_lammps_dump("trajectory.lammpstrj") + + # df = pd.read_csv("step_temp_vol_density.txt", delimiter=" ", index_col="step", skiprows=1, names=["step", "temp", "vol", "density"]) + + metadata = { + "temp_initial": temp_initial, + "temp_final": temp_final, + "total_steps": total_steps + } + + output = LammpsCalc( + dir_name=os.getcwd(), + trajectory=trajectory, + composition=structure.composition, + reduced_formula=structure.composition.reduced_formula, + metadata=metadata, + dump_data={} + ) + return output \ No newline at end of file diff --git a/src/mpmorph/jobs/lammps/templates/basic_constant_temp.lammps b/src/mpmorph/jobs/lammps/templates/basic_constant_temp.lammps new file mode 100644 index 00000000..488f6546 --- /dev/null +++ b/src/mpmorph/jobs/lammps/templates/basic_constant_temp.lammps @@ -0,0 +1,39 @@ +# +# This is an example of the driver of `M3GNet' , +# which contains a state-of-the-art Graph Neural Network Potential trained with data of Materials Projects. +# This driver is developed by AdvanceSoft Corp . +# Before you use this driver, you have to install python3 and m3gnet (pip install m3gnet). +# +# NOTE: +# 1) the units must be metal +# 2) the 3D periodic boundary condition must be used +# 3) cannot use MPI parallelization, only OpenMP or GPU +# + +units metal +boundary p p p +atom_style atomic + +read_data data.lammps + +pair_style m3gnet $m3gnet_path +pair_coeff * * MP-2021.2.8-EFS $species + +thermo_style custom step time cpu pe ke etotal temp press vol density +thermo $print_every_n_step + +# Record trajectory +dump myDump all custom 10 trajectory.lammpstrj id element x y z +dump_modify myDump sort id element $species + +variable p1 equal "step" +variable p2 equal "temp" +variable p3 equal "vol" +variable p4 equal "density" +fix def1 all print $print_every_n_step "${p1} ${p2} ${p3} ${p4}" file step_temp_vol_density.txt + + +velocity all create $temperature 12345 +fix myEnse all $ensemble temp $temperature $temperature 0.1 $barostat $Pstart $Pstop $Pdamp +timestep 0.002 +run $total_steps diff --git a/src/mpmorph/jobs/lammps/templates/basic_temp_sweep.lammps b/src/mpmorph/jobs/lammps/templates/basic_temp_sweep.lammps new file mode 100644 index 00000000..4262ee5a --- /dev/null +++ b/src/mpmorph/jobs/lammps/templates/basic_temp_sweep.lammps @@ -0,0 +1,165 @@ +# Script originally made by Oscar Guerrero +# Reference: https://orca.cardiff.ac.uk/id/eprint/101322/1/MRSPaper2.pdf + +##################################################################### +# READ THIS BEFORE MAKING CHANGES +# +# NOTICE: ANY VARIABLES BEGINNING WITH _ ARE INTERNAL TO THE LAMMPS SCRIPT +# VARIABLES FOR USE REPLACEMENT BY TEMPLATE HAVE NO _ IN NAME +# +# Parameters for this template: +# tempstart - The starting temp for the simulation +# tempstop - The ending temp for the simulation +# species - A space-separated list of elements present in the system, e.g. "Y Mn O" +# m3gnet_path - The path to the m3gnet potential installation +# print_every_n_step - The frequency with which info should be printed to stdout +# total_steps - The total number of simulation steps +########################################################################## + + + +units metal +atom_style atomic +boundary p p p +box tilt large + +atom_modify map array + +#define variables + +variable tempstart equal $tempstart +variable tempstop equal $tempstop +variable myseed equal 12345 +variable atomrate equal 1000 +variable time_step equal 0.002 +variable time_eq equal 1000 +#variable tdamp equal 1. + +variable tamp equal "v_time_step*1000" # DO NOT CHANGE +variable pdamp equal "v_time_step*1000" # DO NOT CHANGE +timestep ${time_step} # DO NOT CHANGE + + +#Create structure +read_data data.lammps + + +#Define Interatomic Potential + +pair_style m3gnet $m3gnet_path +pair_coeff * * MP-2021.2.8-EFS $species + +# Equilibration +reset_timestep 0 +velocity all create ${tempstart} ${myseed} mom yes rot no dist gaussian +fix equilibration all npt temp ${tempstart} ${tempstart} $(100.0*dt) iso 1 1 ${pdamp} drag 0.2 + + +variable eq1 equal "step" +variable eq2 equal "pxx" +variable eq3 equal "pyy" +variable eq4 equal "pzz" +variable eq5 equal "lx" +variable eq6 equal "ly" +variable eq7 equal "lz" +variable eq8 equal "vol" +variable eq9 equal "temp" +variable eq10 equal "etotal" + +fix data_equilibration all print 10 "${eq1} ${eq2} ${eq3} ${eq4} ${eq5} ${eq6} ${eq7} ${eq8} ${eq9} ${eq10}" file ${tempstart}K.data +thermo 1000 +thermo_style custom step pxx pyy pzz lx ly lz temp etotal + +# RUN +run 1000 + +# store final volume Vo to calculate V/Vo (reduce units) +variable tmp equal "vol" +variable Vo equal ${tmp} +print "Volume initial is , Vo: ${Vo}" + +#reset +unfix equilibration +unfix data_equilibration + +#----------------------------- Increase temperature------------------------------------ +reset_timestep 0 +fix melting all npt temp ${tempstart} ${tempstop} $(100.0*dt) iso 1 1 ${pdamp} drag 0.2 +# fix melting all nvt temp ${tempstart} ${tempstop} ${tdamp} drag 0.2 + +variable eq1 equal "step" +variable eq2 equal "pxx" +variable eq3 equal "pyy" +variable eq4 equal "pzz" +variable eq5 equal "lx" +variable eq6 equal "ly" +variable eq7 equal "lz" +variable eq8 equal "temp" +variable eq9 equal "vol/v_Vo" +variable eq10 equal "etotal" +run 0 +fix data_melting all print $print_every_n_step "${eq8} ${eq9}" file temp_vs_ref_vol.txt screen no + +dump 1 all cfg 100 step*.cfg mass type xs ys zs id +# What does this do? so the element str can be written to the cfg file +dump_modify 1 element $species + +dump 2 all custom 100 trajectory.lammpstrj id element x y z +dump_modify 2 sort id element $species +# dump all the dump files into one file trajectory.lammpstrj + +# wildcard * is used to dump all the snapshots into invividual files +dump 3 all custom 100 dump.* id type x y z + +# Compute msd command and dump every 10 steps +compute msd all msd com yes +fix msd all ave/time 1 1 10 c_msd[4] file msd.txt + +group lithium type 1 +# group lanthanum type 2 +# group zirconium type 3 +# group oxygen type 4 + +compute mymsd1 lithium msd com yes +# compute ID(mymsd) group-ID(lithium) msd keyword(com)values(yes: If the com option is set to yes +# then the effect of any drift in the center-of-mass of the group of atoms is subtracted out +# before the displacement of each atom is calculated.) +variable msdxLi equal "c_mymsd1[1]" +variable msdyLi equal "c_mymsd1[2]" +variable msdzLi equal "c_mymsd1[3]" +variable msdtotLi equal "c_mymsd1[4]" +fix msdT1 lithium ave/time 1 1 1000 v_msdxLi v_msdyLi v_msdzLi v_msdtotLi file msd_Li.dat +#fix ID group-ID ave/time Nevery Nrepeat Nfreq value1 value2 ... keyword args ... +# For example, if Nevery=2, Nrepeat=6, and Nfreq=100, then values on timesteps 90,92,94,96,98,100 will be used to compute the final average on timestep 100. +# Similarly for timesteps 190,192,194,196,198,200 on timestep 200, etc. +# If Nrepeat=1 and Nfreq = 100, then no time averaging is done; values are simply generated on timesteps 100,200,etc. (v_msdx v_msdy v_msdz v_msdtot) + + +# use velocity auto-correlation function (VACF) to calculate diffusion coefficient +compute 2 all vacf +fix 5 all vector 1 c_2[4] +variable diff equal dt*trap(f_5) +fix vacf all print 1 "${eq1} ${eq8} ${eq9} ${diff}" + +run $total_steps +thermo $print_every_n_step +thermo_style custom step v_diff + + +# print to screen out + +#run 10000 +#reset +unfix melting +unfix data_melting +undump 1 +undump 2 +undump 3 +write_restart restart.equil +write_data data.* +# SAVE THE DATA OF THE CALCULATION OR ELSE YOU NEED TO START OVER = ( OUCH ! +# SIMULATION DONE +clear +print "Simulation done! You have done great job!" + + diff --git a/src/mpmorph/jobs/pv_from_calc.py b/src/mpmorph/jobs/pv_from_calc.py index 518cced7..e82ec028 100644 --- a/src/mpmorph/jobs/pv_from_calc.py +++ b/src/mpmorph/jobs/pv_from_calc.py @@ -30,7 +30,6 @@ def make(self, structure, scale_factor=None): @dataclass class PVFromM3GNet(PVFromCalc): - name: str = "PV_FROM_M3GNET" parameters: M3GNetMDInputs = None @@ -45,6 +44,22 @@ def build_doc(self, m3gnet_calc: M3GNetMDCalculation): return MDPVDataDoc(volume=v_data, pressure=p_data) +@dataclass +class PVFromM3GNetLammps(PVFromCalc): + """Generates a MDPVDataDoc using Lammps run with M3gnet and a npt ensemble.""" + + name: str = "PV_FROM_M3GNET_LAMMPS" + parameters: M3GNetMDInputs = None + + def run_md(self, structure: Structure, **kwargs): + calc_doc = run_m3gnet(structure, self.parameters, self.name, **kwargs) + + return calc_doc + + def build_doc(self, pvdoc: MDPVDataDoc): + return pvdoc + + def m3gnet_calc_to_vol(m3gnet_calc: M3GNetMDCalculation): volume = m3gnet_calc.trajectory[-1].lattice.volume return volume @@ -60,7 +75,6 @@ def m3gnet_calc_to_pressure(m3gnet_calc: M3GNetMDCalculation): @dataclass class PVFromVasp(PVFromCalc): - name: str = "PV_FROM_VASP" md_maker: Maker = MDMaker() diff --git a/src/mpmorph/jobs/tasks/m3gnet_input.py b/src/mpmorph/jobs/tasks/m3gnet_input.py index cf47e6c9..8392eceb 100644 --- a/src/mpmorph/jobs/tasks/m3gnet_input.py +++ b/src/mpmorph/jobs/tasks/m3gnet_input.py @@ -11,7 +11,6 @@ def one_atmosphere(): @dataclass class M3GNetMDInputs: - ensemble: str = "nvt" temperature: float = 2000.0 pressure: float = 1.01325 * units.bar diff --git a/src/mpmorph/jobs/volume_temperature_sweep.py b/src/mpmorph/jobs/volume_temperature_sweep.py index c9a9683b..83c66693 100644 --- a/src/mpmorph/jobs/volume_temperature_sweep.py +++ b/src/mpmorph/jobs/volume_temperature_sweep.py @@ -1,13 +1,9 @@ from jobflow import Flow, Maker, Response, job -import json - from mpmorph.jobs.tasks.m3gnet_input import M3GNetMDInputs -from ..schemas.vt_sweep_doc import VTSweepDoc - import dataclasses -class VolumeTemperatureSweepMaker(Maker): +class VolumeTemperatureSweepMaker(Maker): name: str = "VOLUME_TEMPERATURE_SWEEP" md_parameters: M3GNetMDInputs = None @@ -39,26 +35,3 @@ def make( new_flow = Flow([*volume_jobs, collect_job], output=collect_job.output) return Response(replace=new_flow) - - -@job -def _collect_vt_results(vs, ts, structure, output_fn = None): - filtered_vs = [] - filtered_ts = [] - for v, t in zip(vs, ts): - if v is not None: - filtered_vs.append(v) - filtered_ts.append(t) - - - result = VTSweepDoc( - volumes=filtered_vs, - temps=filtered_ts, - structure=structure - ) - - if output_fn is not None: - with open(output_fn, "+w") as f: - f.write(json.dumps(result)) - - return result diff --git a/src/mpmorph/schemas/lammps_calc.py b/src/mpmorph/schemas/lammps_calc.py new file mode 100644 index 00000000..c76c17d1 --- /dev/null +++ b/src/mpmorph/schemas/lammps_calc.py @@ -0,0 +1,33 @@ +from pydantic import BaseModel, Field +from pymatgen.core.composition import Composition +from pymatgen.core.trajectory import Trajectory as PmgTrajectory + +from mpmorph.utils import datetime_str + + +class LammpsCalc(BaseModel): + task_label: str = Field(None, description="The name of the task.") + dir_name: str = Field( + None, description="The directory where the LAMMPS calculation was run" + ) + last_updated: str = Field( + default_factory=datetime_str, + description="Timestamp of when the document was last updated.", + ) + trajectory: PmgTrajectory = Field( + None, description="The pymatgen Trajectory object stored ad dictionary" + ) + composition: Composition = Field(description="The composition of the structure.") + reduced_formula: str = Field( + description="The reduced formula of the structure's composition." + ) + dump_data: dict = Field( + None, description="Any additional data collected via LAMMPS dump files" + ) + metadata: dict = Field( + None, + description=( + "Important info about the calculation, including ensemble type," + " temperature, etc." + ), + ) diff --git a/src/mpmorph/schemas/m3gnet_md_calc.py b/src/mpmorph/schemas/m3gnet_md_calc.py index 941639a0..586e23c6 100644 --- a/src/mpmorph/schemas/m3gnet_md_calc.py +++ b/src/mpmorph/schemas/m3gnet_md_calc.py @@ -47,7 +47,6 @@ def from_directory( ), **kwargs, ): - """ Create a M3GnetCalculation document from a directory containing output files of a M3GNet MD run. diff --git a/src/mpmorph/schemas/pv_data_doc.py b/src/mpmorph/schemas/pv_data_doc.py index 7219d8ea..b836d8fd 100644 --- a/src/mpmorph/schemas/pv_data_doc.py +++ b/src/mpmorph/schemas/pv_data_doc.py @@ -2,7 +2,6 @@ class MDPVDataDoc(BaseModel): - task_label: str = Field(None, description="The name of the task.") volume: float = Field(None, description="The volume data from the MD run") - pressure: float = Field(None, description="The volume data from the MD run") + pressure: float = Field(None, description="The pressure of the MD run") diff --git a/src/mpmorph/schemas/vt_sweep_doc.py b/src/mpmorph/schemas/vt_sweep_doc.py index 4681ba43..c3b7d61f 100644 --- a/src/mpmorph/schemas/vt_sweep_doc.py +++ b/src/mpmorph/schemas/vt_sweep_doc.py @@ -3,8 +3,11 @@ class VTSweepDoc(BaseModel): - task_label: str = Field(None, description="The name of the task.") volumes: float = Field(description="The volume at each temperature") - temps: float = Field(description="The temperatures at which the volume was equilibrated") - structure: Structure = Field(description="The original structure for which this sweep was performed") + temps: float = Field( + description="The temperatures at which the volume was equilibrated" + ) + structure: Structure = Field( + description="The original structure for which this sweep was performed" + ) diff --git a/src/mpmorph/workflows/quench.py b/src/mpmorph/workflows/quench.py index 21832e2c..6e8066af 100644 --- a/src/mpmorph/workflows/quench.py +++ b/src/mpmorph/workflows/quench.py @@ -33,7 +33,7 @@ def get_quench_wf( hold_args = kwargs.get("hold_args", {"md_params": {"nsteps": 500}}) quench_args = kwargs.get("quench_args", {}) - for (i, structure) in enumerate(structures): + for i, structure in enumerate(structures): _fw_list = [] if quench_type == "slow_quench": for temp in np.arange(