PyPI - vitrum - Versions diffs - 1.0.0__py3-none-any.whl - Mend

vitrum 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

vitrum/__init__.py +15 -0
vitrum/batch_active/__init__.py +0 -0
vitrum/batch_active/database.py +72 -0
vitrum/batch_active/flows.py +118 -0
vitrum/batch_active/get_structures.py +160 -0
vitrum/batch_active/input_writer.py +166 -0
vitrum/batch_active/learning.py +339 -0
vitrum/batch_active/structure_gen.py +96 -0
vitrum/batch_active/workflow.py +147 -0
vitrum/comparison.py +39 -0
vitrum/coordination.py +148 -0
vitrum/diffusion.py +122 -0
vitrum/geometry.py +107 -0
vitrum/glass_atoms.py +291 -0
vitrum/io_helpers.py +46 -0
vitrum/mlip_functions.py +103 -0
vitrum/packing.py +153 -0
vitrum/persistent_homology.py +201 -0
vitrum/rings.py +273 -0
vitrum/scattering.py +462 -0
vitrum/scattering_lengths.csv +84 -0
vitrum/structure_gen.py +193 -0
vitrum/structure_validation.py +124 -0
vitrum/trajectory.py +55 -0
vitrum/volume_estimation.py +110 -0
vitrum/x_ray_scattering_factor_coefficients.csv +99 -0
vitrum-1.0.0.dist-info/METADATA +101 -0
vitrum-1.0.0.dist-info/RECORD +31 -0
vitrum-1.0.0.dist-info/WHEEL +5 -0
vitrum-1.0.0.dist-info/licenses/LICENSE +21 -0
vitrum-1.0.0.dist-info/top_level.txt +1 -0

vitrum/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from vitrum.glass_atoms import GlassAtoms
+from vitrum.coordination import Coordination
+from vitrum.diffusion import Diffusion
+from vitrum.rings import RingAnalysis
+from vitrum.scattering import Scattering
+__version__ = "1.0.0"
+__all__ = [
+    "GlassAtoms",
+    "Coordination",
+    "Diffusion",
+    "RingAnalysis",
+    "Scattering",
+]

vitrum/batch_active/__init__.py ADDED Viewed

File without changes

vitrum/batch_active/database.py ADDED Viewed

@@ -0,0 +1,72 @@
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+def update_ace_database(
+    wd: str,
+    atoms: list,
+    iteration: int,
+    force_threshold: int = 100,
+    train_test_method: str = "composition",
+    train_test_fraction: float = 0.2,
+    database_paths=None,
+    metadata=None,
+):
+    """
+    Update the ACE database with new structures.
+    Parameters:
+        wd (str): Working directory.
+        atoms (list): List of ASE atoms objects.
+        iteration (int): Current iteration number.
+        force_threshold (int): Force threshold for filtering structures.
+        train_test_method (str): Method for splitting data into train and test sets.
+        train_test_split (float): Fraction of data to be used for testing.
+        database_paths (dict): Paths to the existing train and test databases.
+        metadata (str): Metadata for the structures.
+    """
+    energy = [i.get_total_energy() for i in atoms]
+    force = [i.get_forces().tolist() for i in atoms]
+    stress = [i.get_stress() for i in atoms]
+    data = {"energy": energy, "forces": force, "stress": stress, "ase_atoms": atoms, "iteration": iteration}
+    if metadata:
+        data["sample_type"] = metadata
+    # create a DataFrame
+    df = pd.DataFrame(data)
+    df["stress"] = df["stress"].apply(np.array)
+    print(f"Iteration {iteration} has {len(df)} structures")
+    df = df[~df["forces"].apply(lambda x: np.max(x) > force_threshold)]
+    df = df[~df["forces"].apply(lambda x: np.min(x) < -force_threshold)]
+    print(f"{len(df)} structures remain after force threshold filter")
+    if train_test_method == "random":
+        # Randomly split the data into train and test sets
+        df_new = train_test_split(df, test_size=train_test_fraction, random_state=42)
+    elif train_test_method == "composition":
+        # determine train/test split
+        composition_set = set()
+        for atoms in df["ase_atoms"]:
+            composition_set.add(atoms.get_chemical_formula())
+        # Choose a random sample of the unique compositions
+        composition_list = list(composition_set)
+        np.random.shuffle(composition_list)
+        test_comps = composition_list[: int(len(composition_list) * train_test_fraction)]
+        # Create a mask to filter the DataFrame
+        mask = df["ase_atoms"].apply(lambda atoms: atoms.get_chemical_formula() in test_comps)
+        # Filter the DataFrame
+        df_new = [df[~mask], df[mask]]
+    print(f"{len(df_new[0])} structures added to train set and {len(df_new[1])} structures added to test set")
+    if database_paths:
+        for ind, file in enumerate([database_paths["train"], database_paths["test"]]):
+            df_old = pd.read_pickle(file, compression="gzip")
+            df_concat = pd.concat([df_old] + [df_new[ind]])
+            df_concat.to_pickle(file, compression="gzip", protocol=4)
+    else:
+        df_new[0].to_pickle(f"{wd}/train_data.pckl.gzip", compression="gzip", protocol=4)
+        df_new[1].to_pickle(f"{wd}/test_data.pckl.gzip", compression="gzip", protocol=4)
+        return {"train": f"{wd}/train_data.pckl.gzip", "test": f"{wd}/test_data.pckl.gzip"}

vitrum/batch_active/flows.py ADDED Viewed

@@ -0,0 +1,118 @@
+try:
+    from atomate2.vasp.jobs.core import StaticMaker
+    from atomate2.vasp.jobs.md import MDMaker
+    from atomate2.vasp.sets.core import StaticSetGenerator
+    from atomate2.vasp.sets.core import MDSetGenerator
+except ImportError:
+    raise ImportError("atomate2 is required for flows. Please install vitrum[batch_active].")
+from pymatgen.io.vasp import Kpoints
+def static_flow(structure, name=False, incar_settings={}, kpoint=False, potcar_functional="PBE_54"):
+    if not name:
+        name = structure.reduced_formula
+    num_atoms = len(structure)
+    incar_set = {
+        "EDIFF": (10**-5) * num_atoms,
+        "ENAUG": None,
+        "EDIFFG": None,
+        "ALGO": "Normal",
+        "ENCUT": 520,
+        "ISMEAR": 0,
+        "ISPIN": 1,  # Do not consider magnetism in AIMD simulations
+        "LREAL": "Auto",
+        "LAECHG": False,
+        "LASPH": True,
+        "LCHARG": False,
+        "GGA": None,  # Just let VASP decide based on POTCAR - the default PE
+        "LPLANE": False,  # LPLANE is recommended to be False on Cray machines
+        "LDAUPRINT": 0,
+        "ISIF": 2,
+        "SIGMA": 0.05,
+        "LVTOT": None,
+        "LMIXTAU": None,
+        "NELM": 200,
+        "PREC": "Normal",
+        "NCORE": 16,
+        "NSIM": 32,
+    }
+    incar_set.update(incar_settings)
+    if not kpoint:
+        kpoint = Kpoints()  # Gamma centered, 1x1x1 KPOINTS with no shift
+    run_vasp_kwargs = {"job_type": "direct"}
+    static_maker = StaticMaker(
+        name=name,
+        input_set_generator=StaticSetGenerator(
+            user_incar_settings=incar_set, user_kpoints_settings=kpoint, user_potcar_functional=potcar_functional
+        ),
+        run_vasp_kwargs=run_vasp_kwargs,
+    )
+    return static_maker.make(structure)
+def md_flow(
+    structure,
+    temperature=5000,
+    steps=100,
+    name=False,
+    timestep=1,
+    incar_settings={},
+    kpoint=False,
+    potcar_functional="PBE_54",
+):
+    if not name:
+        name = structure.reduced_formula
+    num_atoms = len(structure)
+    incar_set = {
+        "EDIFF": (10**-5) * num_atoms,
+        "ENAUG": None,
+        "EDIFFG": None,
+        "ALGO": "Normal",
+        "ENCUT": 520,
+        "ISMEAR": 0,
+        "ISPIN": 1,  # Do not consider magnetism in AIMD simulations
+        "LREAL": "Auto",
+        "LAECHG": False,
+        "LASPH": True,
+        "LCHARG": False,
+        "GGA": None,  # Just let VASP decide based on POTCAR - the default PE
+        "LPLANE": False,  # LPLANE is recommended to be False on Cray machines
+        "LDAUPRINT": 0,
+        "ISIF": 2,
+        "SIGMA": 0.05,
+        "LVTOT": None,
+        "LMIXTAU": None,
+        "NELM": 200,
+        "PREC": "Normal",
+        "NCORE": 16,
+        "NSIM": 32,
+    }
+    incar_set.update(incar_settings)
+    if not kpoint:
+        kpoint = Kpoints()  # Gamma centered, 1x1x1 KPOINTS with no shift
+    run_vasp_kwargs = {"job_type": "direct"}
+    aimd_maker = MDMaker(
+        name=name,
+        input_set_generator=MDSetGenerator(
+            ensemble="nvt",
+            start_temp=temperature,
+            end_temp=temperature,
+            nsteps=steps,
+            time_step=timestep,
+            # adapted from MPMorph settings
+            user_incar_settings=incar_set,
+            user_kpoints_settings=kpoint,
+            user_potcar_functional=potcar_functional,
+        ),
+        run_vasp_kwargs=run_vasp_kwargs,
+    )
+    return aimd_maker.make(structure)

vitrum/batch_active/get_structures.py ADDED Viewed

@@ -0,0 +1,160 @@
+import shutil
+import subprocess
+from pathlib import Path
+import numpy as np
+import pandas as pd
+from ase.io import read
+from pymatgen.io.ase import AseAtomsAdaptor
+from vitrum.io_helpers import correct_atom_types, get_LAMMPS_dump_timesteps
+def get_wflow_id_from_run_uuid(lp, run_uuid):
+    wf_ids = [
+        i
+        for i in lp.get_wf_ids()
+        if lp.workflows.find_one({"nodes": i}, projection=["metadata"]).get("metadata", {}).get("uuid") == run_uuid
+    ]
+    return wf_ids
+def get_atoms_from_wfs(lp, run_uuids, high_temp_params, sampling=":", state=None):
+    """
+    Reads all atoms from a workflow given by uuid and returns them.
+    Parameters:
+        run_uuids : list
+            list of uuids of the workflows to read from.
+        sampling : str or list or int, optional
+            If sampling is a string, it is interpreted as a slice string for numpy.
+            If it is an integer, it is interpreted as the number of samples to take.
+            If it is a list, it is interpreted as a list of indices to sample.
+            Defaults to ":".
+    Returns:
+        atoms: list
+            A list of ase atoms objects.
+    """
+    wf_ids = sum([get_wflow_id_from_run_uuid(lp, id) for id in run_uuids], [])
+    atoms = []
+    metadata = []
+    if state == "train_ace_high_temp":
+        sampling = high_temp_params["sampling"]
+    else:
+        sampling = sampling
+    for wf_id in wf_ids:
+        wf = lp.get_wf_by_fw_id(wf_id)
+        launch_dirs = [fw.launches[0].launch_dir if fw.launches else None for fw in wf.fws]
+        for dirs, fw in zip(launch_dirs, wf.fws):
+            if fw.state == "COMPLETED":
+                atoms_fw = read(f"{dirs}/OUTCAR.gz", format="vasp-out", index=":")
+                num_samples = len(atoms_fw)
+                if sampling == ":":
+                    atoms = atoms + atoms_fw
+                    num_samples = len(atoms_fw)
+                elif isinstance(sampling, int):
+                    sample_index = np.linspace(0, num_samples - 1, sampling, dtype=int)
+                    atoms = atoms + [atoms_fw[i] for i in sample_index]
+                    num_samples = len(sample_index)
+                elif isinstance(sampling, list):
+                    atoms = atoms + [atoms_fw[i] for i in sampling]
+                    num_samples = len(sampling)
+                metadata = metadata + [fw.spec["sample_type"]] * num_samples
+    return atoms, metadata
+def get_structures_from_lammps(
+    folder,
+    potential_folder,
+    atom_types,
+    potential,
+    pace_select=True,
+    force_glass_structures=True,
+    use_spaced_timesteps=False,
+    max_gamma_structures=500,
+):
+    select_files = []
+    forced_files = []
+    folder_path = Path(folder)
+    for dirpath in folder_path.rglob("*"):  # Recursively iterate over all directories/files
+        if dirpath.is_dir():  # Ensure it's a directory
+            for file in ["glass.dump", "gamma.dump"]:
+                file_path = dirpath / file  # Use pathlib's `/` operator to join paths
+                if file_path.exists():  # Check if file exists
+                    file_path_str = str(file_path)  # .replace(")", r"\)").replace("(", r"\(")
+                    if pace_select:
+                        if force_glass_structures:
+                            if file == "glass.dump":
+                                forced_files.append(file_path_str)
+                            else:
+                                select_files.append(file_path_str)
+                        else:
+                            select_files.append(file_path_str)
+                    else:
+                        forced_files.append(file_path_str)
+    gamma_file = f"{folder}/gamma_structures.dat"
+    with open(gamma_file, "wb") as wfd:
+        for f in select_files:
+            with open(f, "rb") as fd:
+                shutil.copyfileobj(fd, wfd)
+    atoms_selected = []
+    atoms_forced = []
+    if pace_select is True:
+        print("Running PACE select")
+        atoms_selected += select_structures(
+            potential_folder, atom_types, gamma_file, potential, num_select_structures=max_gamma_structures
+        )
+    for file_path in forced_files:
+        atoms = read(file_path, format="lammps-dump-text", index=":")
+        if len(atoms) == 0:
+            continue
+        symbol_change_map = {i + 1: x for i, x in enumerate(atom_types)}
+        atoms = correct_atom_types(atoms, symbol_change_map)
+        if use_spaced_timesteps is True:
+            timesteps = get_LAMMPS_dump_timesteps(file_path)
+            spaced_timesteps = [0]
+            for ind, time in enumerate(timesteps):
+                if time > timesteps[spaced_timesteps[-1]] + 100:
+                    spaced_timesteps.append(ind)
+            atoms_forced += [atoms[t] for t in spaced_timesteps]
+        else:
+            atoms_forced += atoms
+    print(f"Included {len(atoms_selected)} selected structures and {len(atoms_forced)} forced structures.")
+    metadata = ["gamma"] * len(atoms_selected) + ["manual"] * len(atoms_forced)
+    structures = [AseAtomsAdaptor().get_structure(atom) for atom in atoms_selected] + [
+        AseAtomsAdaptor().get_structure(atom) for atom in atoms_forced
+    ]
+    return structures, metadata
+def select_structures(folder, atom_types, gamma_file, potential, num_select_structures=500):
+    atom_string = " ".join([str(atom) for atom in atom_types])
+    if potential == "pace":
+        subprocess.run(
+            f"pace_select -p {folder}/output_potential.yaml -a "
+            f'{folder}/output_potential.asi -e "{atom_string}"'
+            f" -m {num_select_structures} {gamma_file}",
+            shell=True,
+        )
+    elif potential == "grace":
+        subprocess.run(
+            f"pace_select -p {folder}/FS_model.yaml"
+            f' -a {folder}/FS_model.asi -e "{atom_string}"'
+            f" -m {num_select_structures} {gamma_file}",
+            shell=True,
+        )
+    atoms = pd.read_pickle("selected.pkl.gz", compression="gzip")
+    return [structure for structure in atoms["ase_atoms"]]

vitrum/batch_active/input_writer.py ADDED Viewed

@@ -0,0 +1,166 @@
+import yaml
+import math
+def lammps_input_writer(
+    pot_dir,
+    potential,
+    atoms,
+    max_temp=5000,
+    min_temp=0.01,
+    cooling_rate=10,
+    sample_rate=100000,
+    equilibration_steps=10000,
+    seed=1,
+    c_min=2.5,
+    c_max=30,
+    gamma_sample_rate=5,
+    timestep=2,
+):
+    atom_string = " ".join([str(atom) for atom in atoms])
+    if min_temp == 0:
+        print("Using default min_temp = 0.01, LAMMPS cannot handle temperature = 0")
+        min_temp = 0.01
+    potential_templates = {
+        "pace": (
+            "pair_style  pace/extrapolation\n"
+            "pair_coeff  * * {}/output_potential.yaml {}/output_potential.asi {}\n"
+            "fix gamma all pair 1 pace/extrapolation gamma 1"
+        ),
+        "grace": (
+            "pair_style  grace/fs extrapolation\n"
+            "pair_coeff  * * {}/FS_model.yaml {}/FS_model.asi {}\n"
+            "fix gamma all pair 1 grace/fs gamma 1"
+        ),
+    }
+    if potential not in potential_templates:
+        raise ValueError(f"Unsupported potential type: {potential}")
+    potential_string = potential_templates[potential].format(pot_dir, pot_dir, atom_string)
+    cooling_steps = int(
+        sample_rate * math.ceil(((max_temp - min_temp) * 1000 / cooling_rate / (timestep * 1000)) / sample_rate)
+    )
+    input_script = f"""
+# Initialization
+units           metal
+dimension       3
+boundary        p p p
+atom_style      atomic
+read_data       structure.dat
+# Potential setup
+{potential_string}
+compute max_gamma all reduce max f_gamma
+timestep {timestep}
+# Output settings
+thermo          1000
+thermo_style    custom step temp pe etotal press vol density c_max_gamma
+velocity all create {max_temp} {seed} rot yes dist gaussian
+# Dump settings
+variable dump_skip equal "c_max_gamma < {c_min}"
+dump gamma_dump all custom {gamma_sample_rate} gamma.dump id type x y z f_gamma
+dump_modify gamma_dump skip v_dump_skip
+# Stop condition
+variable max_gamma equal c_max_gamma
+fix extreme_extrapolation all halt 1 v_max_gamma > {c_max}
+# Equilibration
+fix 1 all nvt temp {max_temp} {max_temp} {timestep*100}
+run {equilibration_steps}
+unfix 1
+undump gamma_dump
+reset_timestep 0
+# Cooling and sampling
+dump gamma_dump all custom {gamma_sample_rate} gamma.dump id type x y z f_gamma
+dump_modify gamma_dump append yes skip v_dump_skip
+dump glass_dump all custom {sample_rate} glass.dump id type x y z
+fix 1 all nvt temp {max_temp} {min_temp} {timestep*100}
+run {cooling_steps}
+unfix 1
+"""
+    with open("in.run", "w") as f:
+        f.write(input_script.strip() + "\n")
+def ace_yaml_writer(
+    wd,
+    train_database,
+    test_database,
+    elements,
+    reference_energy="auto",
+    cutoff=6.5,
+    number_of_functions_per_element=250,
+    embeddings={
+        "npot": "FinnisSinclairShiftedScaled",
+        "fs_parameters": [1, 1, 1, 0.5, 1, 0.75, 1, 0.25, 1, 0.125, 1, 0.375, 1, 0.875, 1, 2],
+        "ndensity": 8,
+    },
+    bonds={
+        "radbase": "SBessel",
+        "radparameters": [5.25],
+        "rcut": 8.0,
+        "dcut": 0.01,
+        "NameOfCutoffFunction": "cos",
+    },
+    deltaSplineBins=0.001,
+    nradmax_by_orders=[15, 3, 2, 1],
+    lmax_by_orders=[0, 4, 2, 0],
+    loss={"kappa": 0.05, "L1_coeffs": 1e-8, "L2_coeffs": 1e-8},
+    maxiter=2000,
+    ladder_steps=5,
+    ladder_type="power_order",
+    early_stopping_patience=50,
+    batch_size=100,
+    **kwargs,
+):
+    ace_input = {
+        "cutoff": cutoff,
+        "seed": 42,
+        "potential": {
+            "deltaSplineBins": deltaSplineBins,
+            "elements": elements,
+            "embeddings": {"ALL": embeddings},
+            "bonds": {"ALL": bonds},
+            "functions": {
+                "number_of_functions_per_element": number_of_functions_per_element,
+                "UNARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
+                "BINARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
+                "TERNARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
+                "QUATERNARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
+            },
+        },
+        "data": {"filename": train_database, "test_filename": test_database, "reference_energy": reference_energy},
+        "fit": {
+            "loss": loss,
+            "optimizer": "BFGS",
+            "repulsion": "auto",
+            "maxiter": maxiter,
+            "ladder_step": ladder_steps,
+            "ladder_type": ladder_type,
+            "min_relative_train_loss_per_iter": 5e-5,
+            "min_relative_test_loss_per_iter": 1e-5,
+            "early_stopping_patience": early_stopping_patience,
+        },
+        "backend": {
+            "evaluator": "tensorpot",
+            "batch_size": batch_size,
+            "display_step": 100,
+            "gpu_config": {"gpu_ind": -1, "mem_limit": 0},
+        },
+    }
+    yaml.Dumper.ignore_aliases = lambda *args: True
+    with open(f"{wd}/input.yaml", "w") as f:
+        yaml.dump(ace_input, f)