vitrum 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vitrum/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ from vitrum.glass_atoms import GlassAtoms
2
+ from vitrum.coordination import Coordination
3
+ from vitrum.diffusion import Diffusion
4
+ from vitrum.rings import RingAnalysis
5
+ from vitrum.scattering import Scattering
6
+
7
+ __version__ = "1.0.0"
8
+
9
+ __all__ = [
10
+ "GlassAtoms",
11
+ "Coordination",
12
+ "Diffusion",
13
+ "RingAnalysis",
14
+ "Scattering",
15
+ ]
File without changes
@@ -0,0 +1,72 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+
5
+
6
+ def update_ace_database(
7
+ wd: str,
8
+ atoms: list,
9
+ iteration: int,
10
+ force_threshold: int = 100,
11
+ train_test_method: str = "composition",
12
+ train_test_fraction: float = 0.2,
13
+ database_paths=None,
14
+ metadata=None,
15
+ ):
16
+ """
17
+ Update the ACE database with new structures.
18
+
19
+ Parameters:
20
+ wd (str): Working directory.
21
+ atoms (list): List of ASE atoms objects.
22
+ iteration (int): Current iteration number.
23
+ force_threshold (int): Force threshold for filtering structures.
24
+ train_test_method (str): Method for splitting data into train and test sets.
25
+ train_test_split (float): Fraction of data to be used for testing.
26
+ database_paths (dict): Paths to the existing train and test databases.
27
+ metadata (str): Metadata for the structures.
28
+ """
29
+ energy = [i.get_total_energy() for i in atoms]
30
+ force = [i.get_forces().tolist() for i in atoms]
31
+ stress = [i.get_stress() for i in atoms]
32
+ data = {"energy": energy, "forces": force, "stress": stress, "ase_atoms": atoms, "iteration": iteration}
33
+ if metadata:
34
+ data["sample_type"] = metadata
35
+ # create a DataFrame
36
+ df = pd.DataFrame(data)
37
+ df["stress"] = df["stress"].apply(np.array)
38
+ print(f"Iteration {iteration} has {len(df)} structures")
39
+ df = df[~df["forces"].apply(lambda x: np.max(x) > force_threshold)]
40
+ df = df[~df["forces"].apply(lambda x: np.min(x) < -force_threshold)]
41
+ print(f"{len(df)} structures remain after force threshold filter")
42
+
43
+ if train_test_method == "random":
44
+ # Randomly split the data into train and test sets
45
+ df_new = train_test_split(df, test_size=train_test_fraction, random_state=42)
46
+
47
+ elif train_test_method == "composition":
48
+ # determine train/test split
49
+ composition_set = set()
50
+ for atoms in df["ase_atoms"]:
51
+ composition_set.add(atoms.get_chemical_formula())
52
+
53
+ # Choose a random sample of the unique compositions
54
+ composition_list = list(composition_set)
55
+ np.random.shuffle(composition_list)
56
+ test_comps = composition_list[: int(len(composition_list) * train_test_fraction)]
57
+ # Create a mask to filter the DataFrame
58
+ mask = df["ase_atoms"].apply(lambda atoms: atoms.get_chemical_formula() in test_comps)
59
+ # Filter the DataFrame
60
+ df_new = [df[~mask], df[mask]]
61
+
62
+ print(f"{len(df_new[0])} structures added to train set and {len(df_new[1])} structures added to test set")
63
+
64
+ if database_paths:
65
+ for ind, file in enumerate([database_paths["train"], database_paths["test"]]):
66
+ df_old = pd.read_pickle(file, compression="gzip")
67
+ df_concat = pd.concat([df_old] + [df_new[ind]])
68
+ df_concat.to_pickle(file, compression="gzip", protocol=4)
69
+ else:
70
+ df_new[0].to_pickle(f"{wd}/train_data.pckl.gzip", compression="gzip", protocol=4)
71
+ df_new[1].to_pickle(f"{wd}/test_data.pckl.gzip", compression="gzip", protocol=4)
72
+ return {"train": f"{wd}/train_data.pckl.gzip", "test": f"{wd}/test_data.pckl.gzip"}
@@ -0,0 +1,118 @@
1
+ try:
2
+ from atomate2.vasp.jobs.core import StaticMaker
3
+ from atomate2.vasp.jobs.md import MDMaker
4
+ from atomate2.vasp.sets.core import StaticSetGenerator
5
+ from atomate2.vasp.sets.core import MDSetGenerator
6
+ except ImportError:
7
+ raise ImportError("atomate2 is required for flows. Please install vitrum[batch_active].")
8
+ from pymatgen.io.vasp import Kpoints
9
+
10
+
11
+ def static_flow(structure, name=False, incar_settings={}, kpoint=False, potcar_functional="PBE_54"):
12
+ if not name:
13
+ name = structure.reduced_formula
14
+ num_atoms = len(structure)
15
+ incar_set = {
16
+ "EDIFF": (10**-5) * num_atoms,
17
+ "ENAUG": None,
18
+ "EDIFFG": None,
19
+ "ALGO": "Normal",
20
+ "ENCUT": 520,
21
+ "ISMEAR": 0,
22
+ "ISPIN": 1, # Do not consider magnetism in AIMD simulations
23
+ "LREAL": "Auto",
24
+ "LAECHG": False,
25
+ "LASPH": True,
26
+ "LCHARG": False,
27
+ "GGA": None, # Just let VASP decide based on POTCAR - the default PE
28
+ "LPLANE": False, # LPLANE is recommended to be False on Cray machines
29
+ "LDAUPRINT": 0,
30
+ "ISIF": 2,
31
+ "SIGMA": 0.05,
32
+ "LVTOT": None,
33
+ "LMIXTAU": None,
34
+ "NELM": 200,
35
+ "PREC": "Normal",
36
+ "NCORE": 16,
37
+ "NSIM": 32,
38
+ }
39
+ incar_set.update(incar_settings)
40
+ if not kpoint:
41
+ kpoint = Kpoints() # Gamma centered, 1x1x1 KPOINTS with no shift
42
+
43
+ run_vasp_kwargs = {"job_type": "direct"}
44
+
45
+ static_maker = StaticMaker(
46
+ name=name,
47
+ input_set_generator=StaticSetGenerator(
48
+ user_incar_settings=incar_set, user_kpoints_settings=kpoint, user_potcar_functional=potcar_functional
49
+ ),
50
+ run_vasp_kwargs=run_vasp_kwargs,
51
+ )
52
+
53
+ return static_maker.make(structure)
54
+
55
+
56
+ def md_flow(
57
+ structure,
58
+ temperature=5000,
59
+ steps=100,
60
+ name=False,
61
+ timestep=1,
62
+ incar_settings={},
63
+ kpoint=False,
64
+ potcar_functional="PBE_54",
65
+ ):
66
+ if not name:
67
+ name = structure.reduced_formula
68
+ num_atoms = len(structure)
69
+
70
+ incar_set = {
71
+ "EDIFF": (10**-5) * num_atoms,
72
+ "ENAUG": None,
73
+ "EDIFFG": None,
74
+ "ALGO": "Normal",
75
+ "ENCUT": 520,
76
+ "ISMEAR": 0,
77
+ "ISPIN": 1, # Do not consider magnetism in AIMD simulations
78
+ "LREAL": "Auto",
79
+ "LAECHG": False,
80
+ "LASPH": True,
81
+ "LCHARG": False,
82
+ "GGA": None, # Just let VASP decide based on POTCAR - the default PE
83
+ "LPLANE": False, # LPLANE is recommended to be False on Cray machines
84
+ "LDAUPRINT": 0,
85
+ "ISIF": 2,
86
+ "SIGMA": 0.05,
87
+ "LVTOT": None,
88
+ "LMIXTAU": None,
89
+ "NELM": 200,
90
+ "PREC": "Normal",
91
+ "NCORE": 16,
92
+ "NSIM": 32,
93
+ }
94
+
95
+ incar_set.update(incar_settings)
96
+
97
+ if not kpoint:
98
+ kpoint = Kpoints() # Gamma centered, 1x1x1 KPOINTS with no shift
99
+
100
+ run_vasp_kwargs = {"job_type": "direct"}
101
+
102
+ aimd_maker = MDMaker(
103
+ name=name,
104
+ input_set_generator=MDSetGenerator(
105
+ ensemble="nvt",
106
+ start_temp=temperature,
107
+ end_temp=temperature,
108
+ nsteps=steps,
109
+ time_step=timestep,
110
+ # adapted from MPMorph settings
111
+ user_incar_settings=incar_set,
112
+ user_kpoints_settings=kpoint,
113
+ user_potcar_functional=potcar_functional,
114
+ ),
115
+ run_vasp_kwargs=run_vasp_kwargs,
116
+ )
117
+
118
+ return aimd_maker.make(structure)
@@ -0,0 +1,160 @@
1
+ import shutil
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from ase.io import read
8
+ from pymatgen.io.ase import AseAtomsAdaptor
9
+
10
+ from vitrum.io_helpers import correct_atom_types, get_LAMMPS_dump_timesteps
11
+
12
+
13
+ def get_wflow_id_from_run_uuid(lp, run_uuid):
14
+ wf_ids = [
15
+ i
16
+ for i in lp.get_wf_ids()
17
+ if lp.workflows.find_one({"nodes": i}, projection=["metadata"]).get("metadata", {}).get("uuid") == run_uuid
18
+ ]
19
+ return wf_ids
20
+
21
+
22
+ def get_atoms_from_wfs(lp, run_uuids, high_temp_params, sampling=":", state=None):
23
+ """
24
+ Reads all atoms from a workflow given by uuid and returns them.
25
+
26
+ Parameters:
27
+ run_uuids : list
28
+ list of uuids of the workflows to read from.
29
+ sampling : str or list or int, optional
30
+ If sampling is a string, it is interpreted as a slice string for numpy.
31
+ If it is an integer, it is interpreted as the number of samples to take.
32
+ If it is a list, it is interpreted as a list of indices to sample.
33
+ Defaults to ":".
34
+
35
+ Returns:
36
+ atoms: list
37
+ A list of ase atoms objects.
38
+ """
39
+ wf_ids = sum([get_wflow_id_from_run_uuid(lp, id) for id in run_uuids], [])
40
+ atoms = []
41
+ metadata = []
42
+
43
+ if state == "train_ace_high_temp":
44
+ sampling = high_temp_params["sampling"]
45
+ else:
46
+ sampling = sampling
47
+
48
+ for wf_id in wf_ids:
49
+ wf = lp.get_wf_by_fw_id(wf_id)
50
+ launch_dirs = [fw.launches[0].launch_dir if fw.launches else None for fw in wf.fws]
51
+ for dirs, fw in zip(launch_dirs, wf.fws):
52
+ if fw.state == "COMPLETED":
53
+ atoms_fw = read(f"{dirs}/OUTCAR.gz", format="vasp-out", index=":")
54
+ num_samples = len(atoms_fw)
55
+ if sampling == ":":
56
+ atoms = atoms + atoms_fw
57
+ num_samples = len(atoms_fw)
58
+ elif isinstance(sampling, int):
59
+ sample_index = np.linspace(0, num_samples - 1, sampling, dtype=int)
60
+ atoms = atoms + [atoms_fw[i] for i in sample_index]
61
+ num_samples = len(sample_index)
62
+ elif isinstance(sampling, list):
63
+ atoms = atoms + [atoms_fw[i] for i in sampling]
64
+ num_samples = len(sampling)
65
+ metadata = metadata + [fw.spec["sample_type"]] * num_samples
66
+
67
+ return atoms, metadata
68
+
69
+
70
+ def get_structures_from_lammps(
71
+ folder,
72
+ potential_folder,
73
+ atom_types,
74
+ potential,
75
+ pace_select=True,
76
+ force_glass_structures=True,
77
+ use_spaced_timesteps=False,
78
+ max_gamma_structures=500,
79
+ ):
80
+ select_files = []
81
+ forced_files = []
82
+
83
+ folder_path = Path(folder)
84
+ for dirpath in folder_path.rglob("*"): # Recursively iterate over all directories/files
85
+ if dirpath.is_dir(): # Ensure it's a directory
86
+ for file in ["glass.dump", "gamma.dump"]:
87
+ file_path = dirpath / file # Use pathlib's `/` operator to join paths
88
+ if file_path.exists(): # Check if file exists
89
+ file_path_str = str(file_path) # .replace(")", r"\)").replace("(", r"\(")
90
+
91
+ if pace_select:
92
+ if force_glass_structures:
93
+ if file == "glass.dump":
94
+ forced_files.append(file_path_str)
95
+ else:
96
+ select_files.append(file_path_str)
97
+ else:
98
+ select_files.append(file_path_str)
99
+ else:
100
+ forced_files.append(file_path_str)
101
+
102
+ gamma_file = f"{folder}/gamma_structures.dat"
103
+ with open(gamma_file, "wb") as wfd:
104
+ for f in select_files:
105
+ with open(f, "rb") as fd:
106
+ shutil.copyfileobj(fd, wfd)
107
+
108
+ atoms_selected = []
109
+ atoms_forced = []
110
+
111
+ if pace_select is True:
112
+ print("Running PACE select")
113
+ atoms_selected += select_structures(
114
+ potential_folder, atom_types, gamma_file, potential, num_select_structures=max_gamma_structures
115
+ )
116
+
117
+ for file_path in forced_files:
118
+ atoms = read(file_path, format="lammps-dump-text", index=":")
119
+ if len(atoms) == 0:
120
+ continue
121
+ symbol_change_map = {i + 1: x for i, x in enumerate(atom_types)}
122
+ atoms = correct_atom_types(atoms, symbol_change_map)
123
+
124
+ if use_spaced_timesteps is True:
125
+ timesteps = get_LAMMPS_dump_timesteps(file_path)
126
+ spaced_timesteps = [0]
127
+ for ind, time in enumerate(timesteps):
128
+ if time > timesteps[spaced_timesteps[-1]] + 100:
129
+ spaced_timesteps.append(ind)
130
+ atoms_forced += [atoms[t] for t in spaced_timesteps]
131
+ else:
132
+ atoms_forced += atoms
133
+
134
+ print(f"Included {len(atoms_selected)} selected structures and {len(atoms_forced)} forced structures.")
135
+ metadata = ["gamma"] * len(atoms_selected) + ["manual"] * len(atoms_forced)
136
+ structures = [AseAtomsAdaptor().get_structure(atom) for atom in atoms_selected] + [
137
+ AseAtomsAdaptor().get_structure(atom) for atom in atoms_forced
138
+ ]
139
+
140
+ return structures, metadata
141
+
142
+
143
+ def select_structures(folder, atom_types, gamma_file, potential, num_select_structures=500):
144
+ atom_string = " ".join([str(atom) for atom in atom_types])
145
+ if potential == "pace":
146
+ subprocess.run(
147
+ f"pace_select -p {folder}/output_potential.yaml -a "
148
+ f'{folder}/output_potential.asi -e "{atom_string}"'
149
+ f" -m {num_select_structures} {gamma_file}",
150
+ shell=True,
151
+ )
152
+ elif potential == "grace":
153
+ subprocess.run(
154
+ f"pace_select -p {folder}/FS_model.yaml"
155
+ f' -a {folder}/FS_model.asi -e "{atom_string}"'
156
+ f" -m {num_select_structures} {gamma_file}",
157
+ shell=True,
158
+ )
159
+ atoms = pd.read_pickle("selected.pkl.gz", compression="gzip")
160
+ return [structure for structure in atoms["ase_atoms"]]
@@ -0,0 +1,166 @@
1
+ import yaml
2
+ import math
3
+
4
+
5
+ def lammps_input_writer(
6
+ pot_dir,
7
+ potential,
8
+ atoms,
9
+ max_temp=5000,
10
+ min_temp=0.01,
11
+ cooling_rate=10,
12
+ sample_rate=100000,
13
+ equilibration_steps=10000,
14
+ seed=1,
15
+ c_min=2.5,
16
+ c_max=30,
17
+ gamma_sample_rate=5,
18
+ timestep=2,
19
+ ):
20
+ atom_string = " ".join([str(atom) for atom in atoms])
21
+ if min_temp == 0:
22
+ print("Using default min_temp = 0.01, LAMMPS cannot handle temperature = 0")
23
+ min_temp = 0.01
24
+
25
+ potential_templates = {
26
+ "pace": (
27
+ "pair_style pace/extrapolation\n"
28
+ "pair_coeff * * {}/output_potential.yaml {}/output_potential.asi {}\n"
29
+ "fix gamma all pair 1 pace/extrapolation gamma 1"
30
+ ),
31
+ "grace": (
32
+ "pair_style grace/fs extrapolation\n"
33
+ "pair_coeff * * {}/FS_model.yaml {}/FS_model.asi {}\n"
34
+ "fix gamma all pair 1 grace/fs gamma 1"
35
+ ),
36
+ }
37
+
38
+ if potential not in potential_templates:
39
+ raise ValueError(f"Unsupported potential type: {potential}")
40
+
41
+ potential_string = potential_templates[potential].format(pot_dir, pot_dir, atom_string)
42
+
43
+ cooling_steps = int(
44
+ sample_rate * math.ceil(((max_temp - min_temp) * 1000 / cooling_rate / (timestep * 1000)) / sample_rate)
45
+ )
46
+
47
+ input_script = f"""
48
+ # Initialization
49
+ units metal
50
+ dimension 3
51
+ boundary p p p
52
+ atom_style atomic
53
+ read_data structure.dat
54
+
55
+ # Potential setup
56
+ {potential_string}
57
+ compute max_gamma all reduce max f_gamma
58
+
59
+ timestep {timestep}
60
+
61
+ # Output settings
62
+ thermo 1000
63
+ thermo_style custom step temp pe etotal press vol density c_max_gamma
64
+ velocity all create {max_temp} {seed} rot yes dist gaussian
65
+
66
+ # Dump settings
67
+ variable dump_skip equal "c_max_gamma < {c_min}"
68
+ dump gamma_dump all custom {gamma_sample_rate} gamma.dump id type x y z f_gamma
69
+ dump_modify gamma_dump skip v_dump_skip
70
+
71
+ # Stop condition
72
+ variable max_gamma equal c_max_gamma
73
+ fix extreme_extrapolation all halt 1 v_max_gamma > {c_max}
74
+
75
+ # Equilibration
76
+ fix 1 all nvt temp {max_temp} {max_temp} {timestep*100}
77
+ run {equilibration_steps}
78
+ unfix 1
79
+ undump gamma_dump
80
+
81
+ reset_timestep 0
82
+
83
+ # Cooling and sampling
84
+ dump gamma_dump all custom {gamma_sample_rate} gamma.dump id type x y z f_gamma
85
+ dump_modify gamma_dump append yes skip v_dump_skip
86
+ dump glass_dump all custom {sample_rate} glass.dump id type x y z
87
+
88
+ fix 1 all nvt temp {max_temp} {min_temp} {timestep*100}
89
+ run {cooling_steps}
90
+ unfix 1
91
+ """
92
+
93
+ with open("in.run", "w") as f:
94
+ f.write(input_script.strip() + "\n")
95
+
96
+
97
+ def ace_yaml_writer(
98
+ wd,
99
+ train_database,
100
+ test_database,
101
+ elements,
102
+ reference_energy="auto",
103
+ cutoff=6.5,
104
+ number_of_functions_per_element=250,
105
+ embeddings={
106
+ "npot": "FinnisSinclairShiftedScaled",
107
+ "fs_parameters": [1, 1, 1, 0.5, 1, 0.75, 1, 0.25, 1, 0.125, 1, 0.375, 1, 0.875, 1, 2],
108
+ "ndensity": 8,
109
+ },
110
+ bonds={
111
+ "radbase": "SBessel",
112
+ "radparameters": [5.25],
113
+ "rcut": 8.0,
114
+ "dcut": 0.01,
115
+ "NameOfCutoffFunction": "cos",
116
+ },
117
+ deltaSplineBins=0.001,
118
+ nradmax_by_orders=[15, 3, 2, 1],
119
+ lmax_by_orders=[0, 4, 2, 0],
120
+ loss={"kappa": 0.05, "L1_coeffs": 1e-8, "L2_coeffs": 1e-8},
121
+ maxiter=2000,
122
+ ladder_steps=5,
123
+ ladder_type="power_order",
124
+ early_stopping_patience=50,
125
+ batch_size=100,
126
+ **kwargs,
127
+ ):
128
+
129
+ ace_input = {
130
+ "cutoff": cutoff,
131
+ "seed": 42,
132
+ "potential": {
133
+ "deltaSplineBins": deltaSplineBins,
134
+ "elements": elements,
135
+ "embeddings": {"ALL": embeddings},
136
+ "bonds": {"ALL": bonds},
137
+ "functions": {
138
+ "number_of_functions_per_element": number_of_functions_per_element,
139
+ "UNARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
140
+ "BINARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
141
+ "TERNARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
142
+ "QUATERNARY": {"nradmax_by_orders": nradmax_by_orders, "lmax_by_orders": lmax_by_orders},
143
+ },
144
+ },
145
+ "data": {"filename": train_database, "test_filename": test_database, "reference_energy": reference_energy},
146
+ "fit": {
147
+ "loss": loss,
148
+ "optimizer": "BFGS",
149
+ "repulsion": "auto",
150
+ "maxiter": maxiter,
151
+ "ladder_step": ladder_steps,
152
+ "ladder_type": ladder_type,
153
+ "min_relative_train_loss_per_iter": 5e-5,
154
+ "min_relative_test_loss_per_iter": 1e-5,
155
+ "early_stopping_patience": early_stopping_patience,
156
+ },
157
+ "backend": {
158
+ "evaluator": "tensorpot",
159
+ "batch_size": batch_size,
160
+ "display_step": 100,
161
+ "gpu_config": {"gpu_ind": -1, "mem_limit": 0},
162
+ },
163
+ }
164
+ yaml.Dumper.ignore_aliases = lambda *args: True
165
+ with open(f"{wd}/input.yaml", "w") as f:
166
+ yaml.dump(ace_input, f)