sxs 2024.0.44__py3-none-any.whl → 2025.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sxs/metadata/metric.py CHANGED
@@ -1,152 +1 @@
1
- from ..utilities.string_converters import *
2
- import numpy as np
3
-
4
- class MetadataMetric:
5
- """A metric for comparing metadata.
6
-
7
- This class is designed to be used as a callable object that takes
8
- two collections of metadata (`sxs.Metadata`, `dict`, `pd.Series`)
9
- and returns a number measuring the distance between the metadata.
10
-
11
- With the default arguments, this will not strictly be a metric, as
12
- it does not satisfy the triangle inequality. However, it is
13
- intended to be used as a heuristic for sorting and filtering
14
- metadata, rather than as a strict metric for clustering or
15
- classification.
16
-
17
- Note that calling an object of this class with two metadata
18
- collections will return the *squared* distance between them.
19
-
20
- Parameters
21
- ----------
22
- parameters : list of str, optional
23
- The names of the metadata fields to be compared. The defaults
24
- are the reference quantities for mass ratio, spin,
25
- eccentricity, and mean anomaly. Note that all of these fields
26
- *must* be present in *both* metadata collections. (The
27
- `Metadata.add_standard_parameters` method may be useful here.)
28
- metric : array_like, optional
29
- The matrix used to weight the differences in the parameters.
30
- The default is a diagonal matrix with ones on the diagonal,
31
- except for the mean-anomaly entry, which is 1/pi^2.
32
- allow_different_object_types : bool, optional
33
- If True, metadata with different object types (BHBH, BHNS,
34
- NSNS) will be compared without penalty. If False, metadata
35
- with different object types will be assigned an infinite
36
- distance.
37
- eccentricity_threshold1 : float, optional
38
- The threshold eccentricity below which we consider metadata1
39
- non-eccentric. Default is 1e-2.
40
- eccentricity_threshold2 : float, optional
41
- The threshold eccentricity below which we consider metadata2
42
- non-eccentric. Default is 1e-3.
43
- eccentricity_threshold_penalize_shorter : int, optional
44
- The number of orbits below which we penalize metadata2 for
45
- having a non-zero eccentricity when metadata1 does not. This
46
- is intended to avoid ascribing small distances to systems with
47
- shorter inspirals. Default is 20.
48
-
49
- The mean anomaly, if present, is treated specially to account for
50
- the fact that a mean anomaly of 0 is equivalent to a mean anomaly
51
- of 2π. The difference between the entries in the two metadata
52
- collections is "unwrapped" before the metric is applied.
53
-
54
- If the eccentricity of metadata1 is below
55
- `eccentricity_threshold1`, then the mean anomaly is ignored. If
56
- that is true and the eccentricity of metadata2 is below
57
- `eccentricity_threshold2` *and* the number of orbits in metadata2
58
- is longer than `eccentricity_threshold_penalize_shorter`, then the
59
- eccentricity is also ignored. You may set these arguments to 0 to
60
- disable these features.
61
-
62
- """
63
- def __init__(
64
- self,
65
- parameters=[
66
- "reference_mass1",
67
- "reference_mass2",
68
- "reference_dimensionless_spin1",
69
- "reference_dimensionless_spin2",
70
- "reference_eccentricity",
71
- "reference_mean_anomaly",
72
- ],
73
- metric=np.diag([1, 1, 1, 1, 1, 1, 1, 1, 1, 1/np.pi**2]),
74
- allow_different_object_types=False,
75
- eccentricity_threshold1=1e-2,
76
- eccentricity_threshold2=1e-3,
77
- eccentricity_threshold_penalize_shorter=20,
78
- ):
79
- self.parameters = parameters
80
- self.metric = metric
81
- self.allow_different_object_types = allow_different_object_types
82
- self.eccentricity_threshold1 = eccentricity_threshold1
83
- self.eccentricity_threshold2 = eccentricity_threshold2
84
- self.eccentricity_threshold_penalize_shorter = eccentricity_threshold_penalize_shorter
85
-
86
- def __call__(self, metadata1, metadata2, debug=False):
87
- if not self.allow_different_object_types:
88
- type1 = (
89
- metadata1["object_types"]
90
- if "object_types" in metadata1
91
- else "".join(sorted([
92
- metadata1.get("object1", "A").upper(),
93
- metadata1.get("object2", "B").upper()
94
- ]))
95
- )
96
- type2 = (
97
- metadata2["object_types"]
98
- if "object_types" in metadata2
99
- else "".join(sorted([
100
- metadata2.get("object1", "C").upper(),
101
- metadata2.get("object2", "D").upper()
102
- ]))
103
- )
104
- if type1 != type2:
105
- return np.inf
106
-
107
- values1 = [metadata1[parameter] for parameter in self.parameters]
108
- values2 = [metadata2[parameter] for parameter in self.parameters]
109
-
110
- if debug:
111
- print(f"{self.parameters=}")
112
- print(f"{values1=}")
113
- print(f"{values2=}")
114
-
115
- if "reference_mean_anomaly" in self.parameters:
116
- i = self.parameters.index("reference_mean_anomaly")
117
- values1[i], values2[i] = np.unwrap([floater(values1[i]), floater(values2[i])])
118
-
119
- if "reference_eccentricity" in self.parameters:
120
- # Either way, we first try to make sure that the corresponding entries are floats.
121
- i = self.parameters.index("reference_eccentricity")
122
- values1[i] = metadata1.get("reference_eccentricity_bound", floaterbound(values1[i]))
123
- values2[i] = metadata2.get("reference_eccentricity_bound", floaterbound(values2[i]))
124
-
125
- if values1[i] < self.eccentricity_threshold1:
126
- # Then we consider metadata1 a non-eccentric system...
127
-
128
- # ...so we ignore the mean anomaly entirely...
129
- if "reference_mean_anomaly" in self.parameters:
130
- i_ma = self.parameters.index("reference_mean_anomaly")
131
- values1[i_ma] = values2[i_ma]
132
-
133
- # ...and we ignore the eccentricity if metadata2 is also non-eccentric,
134
- # and longer than eccentricity_threshold_penalize_shorter.
135
- if (
136
- values2[i] < self.eccentricity_threshold2
137
- and metadata2.get(
138
- "number_of_orbits",
139
- metadata2.get("number_of_orbits_from_start", 0)
140
- ) > self.eccentricity_threshold_penalize_shorter
141
- ):
142
- values1[i] = values2[i]
143
-
144
- difference = (
145
- np.concatenate(list(map(np.atleast_1d, values1)))
146
- - np.concatenate(list(map(np.atleast_1d, values2)))
147
- )
148
-
149
- if debug:
150
- print(f"{difference=}")
151
-
152
- return difference @ self.metric @ difference
1
+ from sxscatalog.metadata.metric import *
sxs/simulations/local.py CHANGED
@@ -1,227 +1 @@
1
- from pathlib import Path
2
- from datetime import datetime, timezone
3
- from .. import sxs_id, Metadata, sxs_directory
4
- from ..utilities import sxs_identifier_re
5
- from ..zenodo import path_to_invenio
6
-
7
- def file_upload_allowed(file, directory_listing):
8
- """Return True if the file should be uploaded
9
-
10
- A file should be uploaded if
11
- * it is named "metadata.json" or "Horizons.h5"
12
- * it is named "Strain_*.json" or "ExtraWaveforms.json" and the corresponding
13
- ".h5" file is in the directory listing
14
- * it is named "Strain_*.h5" or "ExtraWaveforms.h5" and the corresponding
15
- ".json" file is in the directory listing
16
-
17
- """
18
- # Check `file.name` to ignore the directory
19
- if file.name in ["metadata.json", "Horizons.h5"]:
20
- return True
21
- if file.name.startswith("Strain_") or file.name.startswith("ExtraWaveforms"):
22
- # Ensure that both `.h5` and `.json` exist for all such files
23
- if file.suffix == ".json":
24
- return file.with_suffix(".h5") in directory_listing
25
- elif file.suffix == ".h5":
26
- return file.with_suffix(".json") in directory_listing
27
- else:
28
- return False
29
- return False
30
-
31
-
32
- def files_to_upload(directory, annex_dir="."):
33
- """Return a list of files to upload
34
-
35
- The files to upload are those that are in the directory listing
36
- and pass the `file_upload_allowed` function.
37
-
38
- """
39
- full_directory = annex_dir / Path(directory)
40
- files = []
41
- for lev in full_directory.resolve().glob("Lev*"):
42
- directory_listing = list(lev.iterdir())
43
- files.extend([
44
- file for file in directory_listing
45
- if file_upload_allowed(file, directory_listing)
46
- ])
47
- return sorted(files, key=lambda x: str(x).lower())
48
-
49
-
50
- def extract_id_from_common_metadata(file, annex_dir):
51
- """Extract the SXS ID from a common-metadata.txt file
52
-
53
- If the ID doesn't exist, return the directory path, relative to
54
- the `annex_dir`.
55
- """
56
- file = Path(file)
57
- annex_dir = Path(annex_dir)
58
- key = str(file.resolve().parent.relative_to(annex_dir.resolve()))
59
- with file.open("r") as f:
60
- for line in f.readlines():
61
- line = line.strip()
62
- if "alternative-names" in line:
63
- if (m := sxs_identifier_re.search(line)):
64
- key = m["sxs_identifier"]
65
- break
66
- return key
67
-
68
-
69
- def local_simulations(annex_dir, compute_md5=False, show_progress=False):
70
- """
71
- Walk the annex directory to find and process all simulations
72
-
73
- For each `common-metadata.txt` file found:
74
- - Ensures that at least one directory starting with "Lev"
75
- exists; if not, the process is skipped.
76
- - Defines a key for the metadata, which is either:
77
- - The SXS ID contained in that file's "alternative-names"
78
- field, if present.
79
- - The directory path relative to `annex_dir`.
80
- - Chooses the highest "Lev" directory and extracts the
81
- metadata.
82
- - Finds all files to upload in the directory; if none are
83
- found, the process is skipped.
84
- - Adds the "files" dictionary to the metadata, pointing to
85
- each file that would be uploaded if the simulation were
86
- published.
87
-
88
- Parameters
89
- ----------
90
- annex_dir : (str or Path)
91
- The path to the annex directory to be processed.
92
- compute_md5 : bool, optional
93
- Whether to compute the MD5 hash of each file. Default is
94
- False.
95
- show_progress : bool, optional
96
- Whether to show a progress bar. Default is False.
97
-
98
- Returns
99
- -------
100
- dict :
101
- A dictionary containing the processed metadata.
102
- """
103
- from os import walk
104
- from ..utilities import md5checksum
105
- from tqdm import tqdm
106
-
107
- simulations = {}
108
- annex_dir = Path(annex_dir).resolve()
109
-
110
- if show_progress: # Count the number of common-metadata.txt files
111
- num_files = 0
112
- for dirpath, dirnames, filenames in walk(annex_dir, topdown=True):
113
- if Path(dirpath).name.startswith("."):
114
- dirnames[:] = []
115
- continue
116
- if "common-metadata.txt" in filenames:
117
- if not any(d.startswith("Lev") for d in dirnames):
118
- continue
119
- num_files += 1
120
- dirnames[:] = []
121
- progress_bar = tqdm(total=num_files, desc="Processing simulations")
122
-
123
- # The `walk` method can be made *much* faster than the `glob` method
124
- for dirpath, dirnames, filenames in walk(annex_dir, topdown=True):
125
- dirpath = Path(dirpath)
126
-
127
- # Ignore hidden directories
128
- if dirpath.name.startswith("."):
129
- dirnames[:] = []
130
- continue
131
-
132
- if "common-metadata.txt" in filenames:
133
- if not any(d.startswith("Lev") for d in dirnames):
134
- continue
135
-
136
- if show_progress:
137
- progress_bar.update(1)
138
-
139
- try:
140
- key = extract_id_from_common_metadata(dirpath / "common-metadata.txt", annex_dir)
141
-
142
- # Find the highest Lev directory and extract the metadata
143
- highest_lev = sorted(
144
- [d for d in dirnames if d.startswith("Lev")]
145
- )[-1]
146
- metadata = Metadata.load(dirpath / highest_lev / "metadata")
147
- metadata = metadata.add_standard_parameters()
148
-
149
- metadata["directory"] = str(dirpath.relative_to(annex_dir))
150
-
151
- simulations[key] = metadata
152
-
153
- files = files_to_upload(dirpath, annex_dir)
154
-
155
- metadata["mtime"] = datetime.fromtimestamp(
156
- max(
157
- (
158
- file.resolve().stat().st_mtime
159
- for file in files
160
- if file.exists()
161
- ),
162
- default=0.0,
163
- ),
164
- tz=timezone.utc,
165
- ).isoformat()
166
-
167
- metadata["files"] = {
168
- path_to_invenio(file.relative_to(dirpath)): {
169
- "link": str(file),
170
- "size": file.stat().st_size,
171
- "checksum": md5checksum(file) if compute_md5 else "",
172
- }
173
- for file in files
174
- if file.exists()
175
- }
176
- except KeyboardInterrupt:
177
- raise
178
- except Exception as e:
179
- print(f"Error processing {dirpath}: {e}")
180
-
181
- dirnames[:] = [] # Don't keep looking for common-metadata.txt files under this directory
182
-
183
- return simulations
184
-
185
-
186
- def write_local_simulations(annex_dir, output_file=None, compute_md5=False, show_progress=False):
187
- """Write the local simulations to a file for use when loading `Simulations`
188
-
189
- This function calls `local_simulations` to obtain the dictionary,
190
- but also writes the dictionary to a JSON file.
191
-
192
- Parameters
193
- ----------
194
- annex_dir : (str or Path)
195
- The path to the annex directory to be processed.
196
- output_file : (str or Path, optional)
197
- The path to the file to be written. By default, the file is
198
- written to `sxs_directory("cache") / "local_simulations.json"`.
199
- N.B.: If you specify a different file, `sxs.load` will not
200
- automatically find it.
201
- compute_md5 : bool, optional
202
- Whether to compute the MD5 hash of each file. Default is
203
- False.
204
- show_progress : bool, optional
205
- Whether to show a progress bar. Default is False.
206
-
207
- Returns
208
- -------
209
- dict :
210
- A dictionary containing the processed metadata.
211
- """
212
- from json import dump
213
-
214
- # Process the annex directory to find all simulations
215
- simulations = local_simulations(annex_dir, compute_md5=compute_md5, show_progress=show_progress)
216
-
217
- # Write the simulations to file
218
- if output_file is not False: # Test literal identity to allow `None`
219
- if output_file is None:
220
- output_file = sxs_directory("cache") / "local_simulations.json"
221
- else:
222
- output_file = Path(output_file)
223
- output_file.parent.mkdir(parents=True, exist_ok=True)
224
- with output_file.open("w") as f:
225
- dump(simulations, f, indent=2, separators=(",", ": "), ensure_ascii=True)
226
-
227
- return simulations
1
+ from sxscatalog.simulations.local import *
@@ -124,9 +124,15 @@ def Simulation(location, *args, **kwargs):
124
124
  metadata = Metadata(simulations[simulation_id])
125
125
  series = simulations.dataframe.loc[simulation_id]
126
126
 
127
+ # If input_version is not the default, remove "files" from metadata
128
+ if input_version and input_version != max(metadata.get("DOI_versions", []), default=""):
129
+ metadata = type(metadata)({
130
+ key: value for key, value in metadata.items() if key != "files"
131
+ })
132
+
127
133
  # Check if the specified version exists in the simulation catalog
128
134
  if not hasattr(metadata, "DOI_versions"):
129
- input_version = "v0.0"
135
+ input_version = "v0.0" # A fake version, to signal this sim doesn't know about DOIs
130
136
  if input_version != "v0.0" and input_version not in metadata.DOI_versions:
131
137
  raise ValueError(f"Version '{input_version}' not found in simulation catalog for '{simulation_id}'")
132
138
 
@@ -217,10 +223,14 @@ def Simulation(location, *args, **kwargs):
217
223
  sim = Simulation_v1(
218
224
  metadata, series, version, sxs_id_stem, sxs_id, url, files, lev_numbers, output_lev_number, location, *args, **kwargs
219
225
  )
220
- elif 2 <= version_number < 3.0 or version == "v0.0":
226
+ elif 2 <= version_number < 3.0:
221
227
  sim = Simulation_v2(
222
228
  metadata, series, version, sxs_id_stem, sxs_id, url, files, lev_numbers, output_lev_number, location, *args, **kwargs
223
229
  )
230
+ elif 3 <= version_number < 4.0 or version == "v0.0":
231
+ sim = Simulation_v3(
232
+ metadata, series, version, sxs_id_stem, sxs_id, url, files, lev_numbers, output_lev_number, location, *args, **kwargs
233
+ )
224
234
  else:
225
235
  raise ValueError(f"Version '{version}' not yet supported")
226
236
  sim.__file__ = str(sxs_directory("cache") / sxs_path_to_system_path(sim.sxs_id))
@@ -365,7 +375,7 @@ class SimulationBase:
365
375
  dataframe = load("simulations").dataframe
366
376
  metadata_metric = metadata_metric or MetadataMetric()
367
377
  if drop_deprecated:
368
- dataframe = dataframe[~dataframe.deprecated]
378
+ dataframe = dataframe[~dataframe["deprecated"]]
369
379
  return dataframe.apply(
370
380
  lambda m: sqrt(metadata_metric(self.metadata, m)),
371
381
  axis=1
@@ -824,16 +834,21 @@ class Simulation_v2(SimulationBase):
824
834
  json_location = self.files.get(json_path)["link"]
825
835
  h5_truepath = Path(sxs_path_to_system_path(sxs_id_path / h5_path))
826
836
  json_truepath = Path(sxs_path_to_system_path(sxs_id_path / json_path))
837
+ json_truepath = sxs_directory("cache") / json_truepath
827
838
  if not Path(json_location).exists() and not json_truepath.exists():
828
839
  if not read_config("download", True):
829
840
  raise ValueError(f"{json_truepath} not found and download is disabled")
830
- download_file(json_location, sxs_directory("cache") / json_truepath)
841
+ download_file(json_location, json_truepath)
831
842
  return load(
832
843
  h5_location, truepath=h5_truepath, group=group, metadata=self.metadata,
833
844
  transform_to_inertial=transform_to_inertial
834
845
  )
835
846
 
836
847
 
848
+ class Simulation_v3(Simulation_v2):
849
+ pass
850
+
851
+
837
852
  def get_file_info(metadata, sxs_id, download=None):
838
853
  # TODO: Allow an existing zenodo_metadata.json file to be used
839
854
  from .. import load_via_sxs_id