PyPI - sxs - Versions diffs - 2024.0.44__py3-none-any.whl → 2025.0.2__py3-none-any.whl - Mend

sxs 2024.0.44py3-none-any.whl → 2025.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

sxs/__init__.py +1 -1
sxs/__version__.py +1 -1
sxs/handlers.py +23 -20
sxs/metadata/__init__.py +1 -9
sxs/metadata/metadata.py +1 -640
sxs/metadata/metric.py +1 -152
sxs/simulations/local.py +1 -227
sxs/simulations/simulation.py +19 -4
sxs/simulations/simulations.py +1 -572
sxs/utilities/downloads.py +1 -103
sxs/utilities/string_converters.py +1 -47
sxs/utilities/sxs_directories.py +1 -205
sxs/utilities/sxs_identifiers.py +1 -126
sxs/waveforms/format_handlers/lvc.py +6 -5
sxs/waveforms/format_handlers/rotating_paired_diff_multishuffle_bzip2.py +6 -0
sxs/zenodo/__init__.py +1 -11
{sxs-2024.0.44.dist-info → sxs-2025.0.2.dist-info}/METADATA +2 -1
{sxs-2024.0.44.dist-info → sxs-2025.0.2.dist-info}/RECORD +20 -23
sxs/caltechdata/__init__.py +0 -342
sxs/caltechdata/catalog.py +0 -85
sxs/caltechdata/login.py +0 -506
{sxs-2024.0.44.dist-info → sxs-2025.0.2.dist-info}/WHEEL +0 -0
{sxs-2024.0.44.dist-info → sxs-2025.0.2.dist-info}/licenses/LICENSE +0 -0

sxs/metadata/metric.py CHANGED Viewed

@@ -1,152 +1 @@
-from ..utilities.string_converters import *
-import numpy as np
-class MetadataMetric:
-    """A metric for comparing metadata.
-    This class is designed to be used as a callable object that takes
-    two collections of metadata (`sxs.Metadata`, `dict`, `pd.Series`)
-    and returns a number measuring the distance between the metadata.
-    With the default arguments, this will not strictly be a metric, as
-    it does not satisfy the triangle inequality.  However, it is
-    intended to be used as a heuristic for sorting and filtering
-    metadata, rather than as a strict metric for clustering or
-    classification.
-    Note that calling an object of this class with two metadata
-    collections will return the *squared* distance between them.
-    Parameters
-    ----------
-    parameters : list of str, optional
-        The names of the metadata fields to be compared.  The defaults
-        are the reference quantities for mass ratio, spin,
-        eccentricity, and mean anomaly.  Note that all of these fields
-        *must* be present in *both* metadata collections.  (The
-        `Metadata.add_standard_parameters` method may be useful here.)
-    metric : array_like, optional
-        The matrix used to weight the differences in the parameters.
-        The default is a diagonal matrix with ones on the diagonal,
-        except for the mean-anomaly entry, which is 1/pi^2.
-    allow_different_object_types : bool, optional
-        If True, metadata with different object types (BHBH, BHNS,
-        NSNS) will be compared without penalty.  If False, metadata
-        with different object types will be assigned an infinite
-        distance.
-    eccentricity_threshold1 : float, optional
-        The threshold eccentricity below which we consider metadata1
-        non-eccentric.  Default is 1e-2.
-    eccentricity_threshold2 : float, optional
-        The threshold eccentricity below which we consider metadata2
-        non-eccentric.  Default is 1e-3.
-    eccentricity_threshold_penalize_shorter : int, optional
-        The number of orbits below which we penalize metadata2 for
-        having a non-zero eccentricity when metadata1 does not.  This
-        is intended to avoid ascribing small distances to systems with
-        shorter inspirals.  Default is 20.
-    The mean anomaly, if present, is treated specially to account for
-    the fact that a mean anomaly of 0 is equivalent to a mean anomaly
-    of 2π.  The difference between the entries in the two metadata
-    collections is "unwrapped" before the metric is applied.
-    If the eccentricity of metadata1 is below
-    `eccentricity_threshold1`, then the mean anomaly is ignored.  If
-    that is true and the eccentricity of metadata2 is below
-    `eccentricity_threshold2` *and* the number of orbits in metadata2
-    is longer than `eccentricity_threshold_penalize_shorter`, then the
-    eccentricity is also ignored.  You may set these arguments to 0 to
-    disable these features.
-    """
-    def __init__(
-            self,
-            parameters=[
-                "reference_mass1",
-                "reference_mass2",
-                "reference_dimensionless_spin1",
-                "reference_dimensionless_spin2",
-                "reference_eccentricity",
-                "reference_mean_anomaly",
-            ],
-            metric=np.diag([1, 1, 1, 1, 1, 1, 1, 1, 1, 1/np.pi**2]),
-            allow_different_object_types=False,
-            eccentricity_threshold1=1e-2,
-            eccentricity_threshold2=1e-3,
-            eccentricity_threshold_penalize_shorter=20,
-    ):
-        self.parameters = parameters
-        self.metric = metric
-        self.allow_different_object_types = allow_different_object_types
-        self.eccentricity_threshold1 = eccentricity_threshold1
-        self.eccentricity_threshold2 = eccentricity_threshold2
-        self.eccentricity_threshold_penalize_shorter = eccentricity_threshold_penalize_shorter
-    def __call__(self, metadata1, metadata2, debug=False):
-        if not self.allow_different_object_types:
-            type1 = (
-                metadata1["object_types"]
-                if "object_types" in metadata1
-                else "".join(sorted([
-                    metadata1.get("object1", "A").upper(),
-                    metadata1.get("object2", "B").upper()
-                ]))
-            )
-            type2 = (
-                metadata2["object_types"]
-                if "object_types" in metadata2
-                else "".join(sorted([
-                    metadata2.get("object1", "C").upper(),
-                    metadata2.get("object2", "D").upper()
-                ]))
-            )
-            if type1 != type2:
-                return np.inf
-        values1 = [metadata1[parameter] for parameter in self.parameters]
-        values2 = [metadata2[parameter] for parameter in self.parameters]
-        if debug:
-            print(f"{self.parameters=}")
-            print(f"{values1=}")
-            print(f"{values2=}")
-        if "reference_mean_anomaly" in self.parameters:
-            i = self.parameters.index("reference_mean_anomaly")
-            values1[i], values2[i] = np.unwrap([floater(values1[i]), floater(values2[i])])
-        if "reference_eccentricity" in self.parameters:
-            # Either way, we first try to make sure that the corresponding entries are floats.
-            i = self.parameters.index("reference_eccentricity")
-            values1[i] = metadata1.get("reference_eccentricity_bound", floaterbound(values1[i]))
-            values2[i] = metadata2.get("reference_eccentricity_bound", floaterbound(values2[i]))
-            if values1[i] < self.eccentricity_threshold1:
-                # Then we consider metadata1 a non-eccentric system...
-                # ...so we ignore the mean anomaly entirely...
-                if "reference_mean_anomaly" in self.parameters:
-                    i_ma = self.parameters.index("reference_mean_anomaly")
-                    values1[i_ma] = values2[i_ma]
-                # ...and we ignore the eccentricity if metadata2 is also non-eccentric,
-                # and longer than eccentricity_threshold_penalize_shorter.
-                if (
-                    values2[i] < self.eccentricity_threshold2
-                    and metadata2.get(
-                        "number_of_orbits",
-                        metadata2.get("number_of_orbits_from_start", 0)
-                    ) > self.eccentricity_threshold_penalize_shorter
-                ):
-                    values1[i] = values2[i]
-        difference = (
-            np.concatenate(list(map(np.atleast_1d, values1)))
-            - np.concatenate(list(map(np.atleast_1d, values2)))
-        )
-        if debug:
-            print(f"{difference=}")
-        return difference @ self.metric @ difference
+from sxscatalog.metadata.metric import *

sxs/simulations/local.py CHANGED Viewed

@@ -1,227 +1 @@
-from pathlib import Path
-from datetime import datetime, timezone
-from .. import sxs_id, Metadata, sxs_directory
-from ..utilities import sxs_identifier_re
-from ..zenodo import path_to_invenio
-def file_upload_allowed(file, directory_listing):
-    """Return True if the file should be uploaded
-    A file should be uploaded if
-        * it is named "metadata.json" or "Horizons.h5"
-        * it is named "Strain_*.json" or "ExtraWaveforms.json" and the corresponding
-          ".h5" file is in the directory listing
-        * it is named "Strain_*.h5" or "ExtraWaveforms.h5" and the corresponding
-          ".json" file is in the directory listing
-    """
-    # Check `file.name` to ignore the directory
-    if file.name in ["metadata.json", "Horizons.h5"]:
-        return True
-    if file.name.startswith("Strain_") or file.name.startswith("ExtraWaveforms"):
-        # Ensure that both `.h5` and `.json` exist for all such files
-        if file.suffix == ".json":
-            return file.with_suffix(".h5") in directory_listing
-        elif file.suffix == ".h5":
-            return file.with_suffix(".json") in directory_listing
-        else:
-            return False
-    return False
-def files_to_upload(directory, annex_dir="."):
-    """Return a list of files to upload
-    The files to upload are those that are in the directory listing
-    and pass the `file_upload_allowed` function.
-    """
-    full_directory = annex_dir / Path(directory)
-    files = []
-    for lev in full_directory.resolve().glob("Lev*"):
-        directory_listing = list(lev.iterdir())
-        files.extend([
-            file for file in directory_listing
-            if file_upload_allowed(file, directory_listing)
-        ])
-    return sorted(files, key=lambda x: str(x).lower())
-def extract_id_from_common_metadata(file, annex_dir):
-    """Extract the SXS ID from a common-metadata.txt file
-    If the ID doesn't exist, return the directory path, relative to
-    the `annex_dir`.
-    """
-    file = Path(file)
-    annex_dir = Path(annex_dir)
-    key = str(file.resolve().parent.relative_to(annex_dir.resolve()))
-    with file.open("r") as f:
-        for line in f.readlines():
-            line = line.strip()
-            if "alternative-names" in line:
-                if (m := sxs_identifier_re.search(line)):
-                    key = m["sxs_identifier"]
-                    break
-    return key
-def local_simulations(annex_dir, compute_md5=False, show_progress=False):
-    """
-    Walk the annex directory to find and process all simulations
-    For each `common-metadata.txt` file found:
-        - Ensures that at least one directory starting with "Lev"
-          exists; if not, the process is skipped.
-        - Defines a key for the metadata, which is either:
-            - The SXS ID contained in that file's "alternative-names"
-              field, if present.
-            - The directory path relative to `annex_dir`.
-        - Chooses the highest "Lev" directory and extracts the
-          metadata.
-        - Finds all files to upload in the directory; if none are
-          found, the process is skipped.
-        - Adds the "files" dictionary to the metadata, pointing to
-          each file that would be uploaded if the simulation were
-          published.
-    Parameters
-    ----------
-    annex_dir : (str or Path)
-        The path to the annex directory to be processed.
-    compute_md5 : bool, optional
-        Whether to compute the MD5 hash of each file.  Default is
-        False.
-    show_progress : bool, optional
-        Whether to show a progress bar.  Default is False.
-    Returns
-    -------
-    dict :
-        A dictionary containing the processed metadata.
-    """
-    from os import walk
-    from ..utilities import md5checksum
-    from tqdm import tqdm
-    simulations = {}
-    annex_dir = Path(annex_dir).resolve()
-    if show_progress:  # Count the number of common-metadata.txt files
-        num_files = 0
-        for dirpath, dirnames, filenames in walk(annex_dir, topdown=True):
-            if Path(dirpath).name.startswith("."):
-                dirnames[:] = []
-                continue
-            if "common-metadata.txt" in filenames:
-                if not any(d.startswith("Lev") for d in dirnames):
-                    continue
-                num_files += 1
-                dirnames[:] = []
-        progress_bar = tqdm(total=num_files, desc="Processing simulations")
-    # The `walk` method can be made *much* faster than the `glob` method
-    for dirpath, dirnames, filenames in walk(annex_dir, topdown=True):
-        dirpath = Path(dirpath)
-        # Ignore hidden directories
-        if dirpath.name.startswith("."):
-            dirnames[:] = []
-            continue
-        if "common-metadata.txt" in filenames:
-            if not any(d.startswith("Lev") for d in dirnames):
-                continue
-            if show_progress:
-                progress_bar.update(1)
-            try:
-                key = extract_id_from_common_metadata(dirpath / "common-metadata.txt", annex_dir)
-                # Find the highest Lev directory and extract the metadata
-                highest_lev = sorted(
-                    [d for d in dirnames if d.startswith("Lev")]
-                )[-1]
-                metadata = Metadata.load(dirpath / highest_lev / "metadata")
-                metadata = metadata.add_standard_parameters()
-                metadata["directory"] = str(dirpath.relative_to(annex_dir))
-                simulations[key] = metadata
-                files = files_to_upload(dirpath, annex_dir)
-                metadata["mtime"] = datetime.fromtimestamp(
-                    max(
-                        (
-                            file.resolve().stat().st_mtime
-                            for file in files
-                            if file.exists()
-                        ),
-                        default=0.0,
-                    ),
-                    tz=timezone.utc,
-                ).isoformat()
-                metadata["files"] = {
-                    path_to_invenio(file.relative_to(dirpath)): {
-                        "link": str(file),
-                        "size": file.stat().st_size,
-                        "checksum": md5checksum(file) if compute_md5 else "",
-                    }
-                    for file in files
-                    if file.exists()
-                }
-            except KeyboardInterrupt:
-                raise
-            except Exception as e:
-                print(f"Error processing {dirpath}: {e}")
-            dirnames[:] = []  # Don't keep looking for common-metadata.txt files under this directory
-    return simulations
-def write_local_simulations(annex_dir, output_file=None, compute_md5=False, show_progress=False):
-    """Write the local simulations to a file for use when loading `Simulations`
-    This function calls `local_simulations` to obtain the dictionary,
-    but also writes the dictionary to a JSON file.
-    Parameters
-    ----------
-    annex_dir : (str or Path)
-        The path to the annex directory to be processed.
-    output_file : (str or Path, optional)
-        The path to the file to be written.  By default, the file is
-        written to `sxs_directory("cache") / "local_simulations.json"`.
-        N.B.: If you specify a different file, `sxs.load` will not
-        automatically find it.
-    compute_md5 : bool, optional
-        Whether to compute the MD5 hash of each file.  Default is
-        False.
-    show_progress : bool, optional
-        Whether to show a progress bar.  Default is False.
-    Returns
-    -------
-    dict :
-        A dictionary containing the processed metadata.
-    """
-    from json import dump
-    # Process the annex directory to find all simulations
-    simulations = local_simulations(annex_dir, compute_md5=compute_md5, show_progress=show_progress)
-    # Write the simulations to file
-    if output_file is not False:  # Test literal identity to allow `None`
-        if output_file is None:
-            output_file = sxs_directory("cache") / "local_simulations.json"
-        else:
-            output_file = Path(output_file)
-        output_file.parent.mkdir(parents=True, exist_ok=True)
-        with output_file.open("w") as f:
-            dump(simulations, f, indent=2, separators=(",", ": "), ensure_ascii=True)
-    return simulations
+from sxscatalog.simulations.local import *

sxs/simulations/simulation.py CHANGED Viewed

@@ -124,9 +124,15 @@ def Simulation(location, *args, **kwargs):
     metadata = Metadata(simulations[simulation_id])
     series = simulations.dataframe.loc[simulation_id]
+    # If input_version is not the default, remove "files" from metadata
+    if input_version and input_version != max(metadata.get("DOI_versions", []), default=""):
+        metadata = type(metadata)({
+            key: value for key, value in metadata.items() if key != "files"
+        })
     # Check if the specified version exists in the simulation catalog
     if not hasattr(metadata, "DOI_versions"):
-        input_version = "v0.0"
+        input_version = "v0.0"  # A fake version, to signal this sim doesn't know about DOIs
     if input_version != "v0.0" and input_version not in metadata.DOI_versions:
         raise ValueError(f"Version '{input_version}' not found in simulation catalog for '{simulation_id}'")
@@ -217,10 +223,14 @@ def Simulation(location, *args, **kwargs):
         sim = Simulation_v1(
             metadata, series, version, sxs_id_stem, sxs_id, url, files, lev_numbers, output_lev_number, location, *args, **kwargs
         )
-    elif 2 <= version_number < 3.0 or version == "v0.0":
+    elif 2 <= version_number < 3.0:
         sim = Simulation_v2(
             metadata, series, version, sxs_id_stem, sxs_id, url, files, lev_numbers, output_lev_number, location, *args, **kwargs
         )
+    elif 3 <= version_number < 4.0 or version == "v0.0":
+        sim = Simulation_v3(
+            metadata, series, version, sxs_id_stem, sxs_id, url, files, lev_numbers, output_lev_number, location, *args, **kwargs
+        )
     else:
         raise ValueError(f"Version '{version}' not yet supported")
     sim.__file__ = str(sxs_directory("cache") / sxs_path_to_system_path(sim.sxs_id))
@@ -365,7 +375,7 @@ class SimulationBase:
             dataframe = load("simulations").dataframe
         metadata_metric = metadata_metric or MetadataMetric()
         if drop_deprecated:
-            dataframe = dataframe[~dataframe.deprecated]
+            dataframe = dataframe[~dataframe["deprecated"]]
         return dataframe.apply(
             lambda m: sqrt(metadata_metric(self.metadata, m)),
             axis=1
@@ -824,16 +834,21 @@ class Simulation_v2(SimulationBase):
         json_location = self.files.get(json_path)["link"]
         h5_truepath = Path(sxs_path_to_system_path(sxs_id_path / h5_path))
         json_truepath = Path(sxs_path_to_system_path(sxs_id_path / json_path))
+        json_truepath = sxs_directory("cache") / json_truepath
         if not Path(json_location).exists() and not json_truepath.exists():
             if not read_config("download", True):
                 raise ValueError(f"{json_truepath} not found and download is disabled")
-            download_file(json_location, sxs_directory("cache") / json_truepath)
+            download_file(json_location, json_truepath)
         return load(
             h5_location, truepath=h5_truepath, group=group, metadata=self.metadata,
             transform_to_inertial=transform_to_inertial
         )
+class Simulation_v3(Simulation_v2):
+    pass
 def get_file_info(metadata, sxs_id, download=None):
     # TODO: Allow an existing zenodo_metadata.json file to be used
     from .. import load_via_sxs_id

sxs 2024.0.44__py3-none-any.whl → 2025.0.2__py3-none-any.whl

sxs 2024.0.44py3-none-any.whl → 2025.0.2py3-none-any.whl