PyPI - timewise - Versions diffs - 1.0.0a1__py3-none-any.whl → 1.0.0a5__py3-none-any.whl - Mend

timewise 1.0.0a1py3-none-any.whl → 1.0.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

ampel/timewise/alert/TimewiseAlertSupplier.py +113 -0
ampel/timewise/alert/load/TimewiseFileLoader.py +118 -0
ampel/timewise/ingest/TiCompilerOptions.py +20 -0
ampel/timewise/ingest/TiDataPointShaper.py +91 -0
ampel/timewise/ingest/TiMongoMuxer.py +176 -0
ampel/timewise/ingest/tags.py +15 -0
ampel/timewise/t1/T1HDBSCAN.py +222 -0
ampel/timewise/t1/TimewiseFilter.py +47 -0
ampel/timewise/t2/T2StackVisits.py +56 -0
ampel/timewise/util/AuxDiagnosticPlotter.py +47 -0
ampel/timewise/util/pdutil.py +48 -0
timewise/__init__.py +1 -1
timewise/cli.py +76 -70
timewise/io/download.py +11 -13
timewise/plot/diagnostic.py +1 -1
timewise/process/config.py +9 -5
timewise/process/interface.py +15 -3
timewise/process/stacking.py +21 -9
timewise/query/base.py +9 -2
timewise/util/visits.py +9 -9
{timewise-1.0.0a1.dist-info → timewise-1.0.0a5.dist-info}/METADATA +49 -29
{timewise-1.0.0a1.dist-info → timewise-1.0.0a5.dist-info}/RECORD +25 -14
{timewise-1.0.0a1.dist-info → timewise-1.0.0a5.dist-info}/WHEEL +0 -0
{timewise-1.0.0a1.dist-info → timewise-1.0.0a5.dist-info}/entry_points.txt +0 -0
{timewise-1.0.0a1.dist-info → timewise-1.0.0a5.dist-info}/licenses/LICENSE +0 -0

ampel/timewise/alert/TimewiseAlertSupplier.py ADDED Viewed

@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# File:                timewise/ampel/timewise/alert/TimewiseAlertSupplier.py
+# License:             BSD-3-Clause
+# Author:              Jannis Necker <jannis.necker@gmail.com>
+# Date:                16.09.2025
+# Last Modified Date:  16.09.2025
+# Last Modified By:    Jannis Necker <jannis.necker@gmail.com>
+import sys
+from hashlib import blake2b
+from typing import Literal, List
+import pandas as pd
+from bson import encode
+from ampel.alert.AmpelAlert import AmpelAlert
+from ampel.alert.BaseAlertSupplier import BaseAlertSupplier
+from ampel.view.ReadOnlyDict import ReadOnlyDict
+class TimewiseAlertSupplier(BaseAlertSupplier):
+    """
+    Iterable class that, for each transient name provided by the underlying alert_loader
+    returns a PhotoAlert instance.
+    """
+    stat_pps: int = 0
+    stat_uls: int = 0
+    dpid: Literal["hash", "inc"] = "hash"
+    #    external_directory: Optional[ str ]
+    #    deserialize: None | Literal["avro", "json"]
+    bands: List[str] = ["w1", "w2"]
+    def __init__(self, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self.counter = 0 if self.dpid == "hash" else 1
+    def __next__(self) -> AmpelAlert:
+        """
+        :returns: a dict with a structure that AlertProcessor understands
+        :raises StopIteration: when alert_loader dries out.
+        :raises AttributeError: if alert_loader was not set properly before this method is called
+        """
+        table: pd.DataFrame = self._deserialize(next(self.alert_loader))  # type: ignore
+        stock_ids = table["stock_id"].unique()
+        assert len(stock_ids) == 1
+        stock_id = stock_ids[0]
+        # make the tables into a list of dictionaries for ampel to understand
+        all_ids = b""
+        pps = []
+        # remove the _ep at the end of AllWISE MEP data
+        columns_to_rename = [c for c in table.columns if c.endswith("_ep")]
+        if len(columns_to_rename):
+            rename = {
+                c: c.replace("_ep", "")
+                for c in columns_to_rename
+                if c.replace("_ep", "") not in table.columns
+            }
+            if rename:
+                # in this case only the allwise column eith the _ep extension exists
+                # and we can simply rename the columns
+                table.rename(columns=rename, inplace=True)
+            move = {
+                c: c.replace("_ep", "")
+                for c in columns_to_rename
+                if c.replace("_ep", "") in table.columns
+            }
+            if move:
+                # In this case, the columns already exists because the neowise data is present
+                # we have to insert the values form the columns with the _ep extension into the
+                # respective neowise columns
+                for c, nc in move.items():
+                    na_mask = table[nc].isna()
+                    table.loc[na_mask, nc] = table[c][na_mask]
+                pd.options.mode.chained_assignment = None
+                table.drop(columns=[c for c in move], inplace=True)
+                pd.options.mode.chained_assignment = "warn"
+        for i, row in table.iterrows():
+            # convert table row to dict, convert data types from numpy to native python
+            # Respect masked fields and convert to None
+            pp = {k: None if pd.isna(v) else v for k, v in row.to_dict().items()}
+            pp_hash = blake2b(encode(pp), digest_size=7).digest()
+            if self.counter:
+                pp["candid"] = self.counter
+                self.counter += 1
+            else:
+                pp["candid"] = int.from_bytes(pp_hash, byteorder=sys.byteorder)
+            all_ids += pp_hash
+            pps.append(ReadOnlyDict(pp))
+        if not pps:
+            return self.__next__()
+        # Update stats
+        self.stat_pps += len(pps)
+        return AmpelAlert(
+            id=int.from_bytes(  # alert id
+                blake2b(all_ids, digest_size=7).digest(), byteorder=sys.byteorder
+            ),
+            stock=int(stock_id),  # internal ampel id
+            datapoints=tuple(pps),
+        )

ampel/timewise/alert/load/TimewiseFileLoader.py ADDED Viewed

@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# File:                timewise/ampel/timewise/alert/load/TimewiseFileLoader.py
+# License:             BSD-3-Clause
+# Author:              Jannis Necker <jannis.necker@gmail.com>
+# Date:                16.09.2025
+# Last Modified Date:  16.09.2025
+# Last Modified By:    Jannis Necker <jannis.necker@gmail.com>
+from typing import Dict, get_args
+import numpy as np
+import pandas as pd
+from astropy.table import Table, vstack
+from ampel.abstract.AbsAlertLoader import AbsAlertLoader
+from timewise.tables import TableType
+from timewise.config import TimewiseConfig
+from timewise.types import TaskID
+class TimewiseFileLoader(AbsAlertLoader[Dict]):
+    """
+    Load alerts from one of more files.
+    """
+    # path to timewise download config file
+    timewise_config_file: str
+    # column name of id
+    stock_id_column_name: str
+    chunks: list[int] | None = None
+    def __init__(self, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self.logger.info(f"loading timewise config file {self.timewise_config_file}")
+        timewise_config = TimewiseConfig.from_yaml(self.timewise_config_file)
+        dl = timewise_config.download.build_downloader()
+        self._timewise_backend = dl.backend
+        # selecting tasks to run
+        _tasks = [tasks for tasks in dl.iter_tasks_per_chunk()]
+        if self.chunks is not None:
+            self._tasks = [_tasks[i] for i in self.chunks]
+        else:
+            self._tasks = _tasks
+        if self.logger:
+            self.logger.info(
+                f"Registering {len(self._tasks)} chunk(s) to load: {self._tasks}"
+            )
+        self._table_types = get_args(TableType.__origin__)  # type: ignore
+        self._gen = self.iter_stocks()
+    @staticmethod
+    def encode_result(res: pd.DataFrame) -> pd.DataFrame:
+        if isinstance(res, pd.Series):
+            return pd.DataFrame([res])
+        return res
+    def find_table_from_task(self, task: TaskID) -> TableType:
+        tables = [
+            t for t in self._table_types if t.model_fields["name"].default in str(task)
+        ]
+        assert len(tables) > 0, f"No matching table found for {task}!"
+        assert len(tables) < 2, f"More than one matching table found for {task}!"
+        self.logger.debug(
+            f"{task} is from table {tables[0].model_fields['name'].default}"
+        )
+        return tables[0]
+    def iter_stocks(self):
+        # emit all datapoints per stock id
+        # This way ampel runs not per datapoint but per object
+        backend = self._timewise_backend
+        for tasks in self._tasks:
+            data = []
+            for task in tasks:
+                self.logger.debug(f"reading {task}")
+                idata = backend.load_data(task)
+                # add table name
+                idata["table_name"] = (
+                    self.find_table_from_task(task).model_fields["name"].default
+                )
+                data.append(idata)
+            data = vstack(data).to_pandas()
+            # rename stock id column
+            data.rename(columns={self.stock_id_column_name: "stock_id"}, inplace=True)
+            # Find the indices for each stock id. This is much faster than making a mask
+            # each loop and accessing the table then. Shown below is a comparison.
+            # The top example is the access provided by pandas which would be
+            # again a factor 3 faster.
+            #
+            # In [45]: %timeit test_df()
+            # 5.62 μs ± 47.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
+            #
+            # In [46]: %timeit test_index()
+            # 14.6 μs ± 45 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
+            #
+            # In [47]: %timeit test_mask()
+            # 2.61 ms ± 18 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+            data.set_index(data.stock_id, inplace=True)
+            # iterate over all stock ids
+            for stock_id in np.unique(data["stock_id"]):
+                selection = data.loc[stock_id]
+                yield self.encode_result(selection)
+    def __iter__(self):
+        return self
+    def __next__(self) -> pd.DataFrame:  # type: ignore
+        return next(self._gen)

ampel/timewise/ingest/TiCompilerOptions.py ADDED Viewed

@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+# File:                Ampel-ZTF/ampel/ztf/ingest/ZiCompilerOptions.py
+# License:             BSD-3-Clause
+# Author:              valery brinnel <firstname.lastname@gmail.com>
+# Date:                14.05.2021
+# Last Modified Date:  14.05.2021
+# Last Modified By:    valery brinnel <firstname.lastname@gmail.com>
+from typing import Any
+from ampel.model.ingest.CompilerOptions import CompilerOptions
+class TiCompilerOptions(CompilerOptions):
+    stock: dict[str, Any] = {"tag": "TIMEWISE"}
+    t0: dict[str, Any] = {"tag": "TIMEWISE"}
+    t1: dict[str, Any] = {"tag": "TIMEWISE"}
+    state_t2: dict[str, Any] = {"tag": "TIMEWISE"}
+    point_t2: dict[str, Any] = {"tag": "TIMEWISE"}
+    stock_t2: dict[str, Any] = {"tag": "TIMEWISE"}

ampel/timewise/ingest/TiDataPointShaper.py ADDED Viewed

@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+# File:                timewise/ampel/timewise/ingest/TiDataPointShaper.py
+# License:             BSD-3-Clause
+# Author:              valery brinnel <firstname.lastname@gmail.com>
+# Date:                14.12.2017
+# Last Modified Date:  19.09.2025
+# Last Modified By:    Jannis Necker <jannis.necker@gmail.com>
+from collections.abc import Iterable, Sequence
+from typing import Any
+from bson import encode
+from ampel.abstract.AbsT0Unit import AbsT0Unit
+from ampel.base.AmpelUnit import AmpelUnit
+from ampel.content.DataPoint import DataPoint
+from ampel.types import StockId, Tag
+from ampel.util.hash import hash_payload
+from ampel.timewise.ingest.tags import tags
+class TiDataPointShaperBase(AmpelUnit):
+    """
+    This class 'shapes' datapoints in a format suitable
+    to be saved into the ampel database
+    """
+    # JD2017 is used to define upper limits primary IDs
+    JD2017: float = 2457754.5
+    #: Byte width of datapoint ids
+    digest_size: int = 8
+    # Mandatory implementation
+    def process(self, arg: Iterable[dict[str, Any]], stock: StockId) -> list[DataPoint]:
+        """
+        :param arg: sequence of unshaped pps
+        IMPORTANT:
+        1) This method *modifies* the input dicts (it removes 'candid' and programpi),
+        even if the unshaped pps are ReadOnlyDict instances
+        2) 'stock' is not set here on purpose since it will conflict with the $addToSet operation
+        """
+        ret_list: list[DataPoint] = []
+        popitem = dict.pop
+        for photo_dict in arg:
+            # Photopoint
+            assert photo_dict.get("candid"), "photometry points does not have 'candid'!"
+            ret_list.append(
+                {  # type: ignore[typeddict-item]
+                    "id": photo_dict["candid"],
+                    "stock": stock,
+                    "tag": tags[photo_dict["table_name"]],
+                    "body": photo_dict,
+                }
+            )
+            popitem(photo_dict, "candid", None)
+        return ret_list
+    def _create_datapoint(
+        self, stock: StockId, tag: Sequence[Tag], body: dict[str, Any]
+    ) -> DataPoint:
+        """
+        Create a Datapoint from stock, body, and tags, using the hash of the body as id
+        """
+        # ensure that keys are ordered
+        sorted_body = dict(sorted(body.items()))
+        # The following is a comment from the original ampel.ztf.ingest.ZiDataPointShaperBase:
+        # This is not a complete DataPoint as (channel,meta) is missing, set later.
+        # Should these be optional? or added default?
+        return {  # type: ignore
+            "id": hash_payload(encode(sorted_body), size=-self.digest_size * 8),
+            "stock": stock,
+            "tag": [*tags[body["table_name"]], *tag],
+            "body": sorted_body,
+        }
+    def ul_identity(self, uld: dict[str, Any]) -> int:
+        """
+        This should not happen
+        """
+        raise NotImplementedError
+class TiDataPointShaper(TiDataPointShaperBase, AbsT0Unit):
+    def process(self, arg: Any, stock: None | StockId = None) -> list[DataPoint]:
+        assert stock is not None
+        return super().process(arg, stock)

ampel/timewise/ingest/TiMongoMuxer.py ADDED Viewed

@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+# File:                ampel/timewise/ingest/TiMongoMuxer.py
+# License:             BSD-3-Clause
+# Author:              Jannis Necker
+# Date:                19.09.2025
+# Last Modified Date:  27.09.2025
+# Last Modified By:    Jannis Necker
+from bisect import bisect_right
+from contextlib import suppress
+from typing import Any
+from ampel.abstract.AbsT0Muxer import AbsT0Muxer
+from ampel.content.DataPoint import DataPoint
+from ampel.types import DataPointId, StockId
+from ampel.util.mappings import unflatten_dict
+class ConcurrentUpdateError(Exception):
+    """
+    Raised when the t0 collection was updated during ingestion
+    """
+    ...
+class TiMongoMuxer(AbsT0Muxer):
+    """
+    This class compares info between alert and DB so that only the needed info is ingested.
+    It checks for duplicate datapoints.
+    """
+    # Standard projection used when checking DB for existing PPS/ULS
+    projection = {
+        "_id": 0,
+        "id": 1,
+        "tag": 1,
+        "channel": 1,
+        "stock": 1,
+        "body.mjd": 1,
+        "body.w1_flux": 1,
+        "body.w1_flux_error": 1,
+        "body.w1_mag": 1,
+        "body.w1_mag_error": 1,
+        "body.w2_flux": 1,
+        "body.w2_flux_error": 1,
+        "body.w2_mag": 1,
+        "body.w2_mag_error": 1,
+        "body.ra": 1,
+        "body.dec": 1,
+    }
+    def __init__(self, **kwargs) -> None:
+        super().__init__(**kwargs)
+        # used to check potentially already inserted pps
+        self._photo_col = self.context.db.get_collection("t0")
+        self._projection_spec = unflatten_dict(self.projection)
+    def process(
+        self, dps: list[DataPoint], stock_id: None | StockId = None
+    ) -> tuple[None | list[DataPoint], None | list[DataPoint]]:
+        """
+        :param dps: datapoints from alert
+        :param stock_id: stock id from alert
+        Attempt to determine which pps/uls should be inserted into the t0 collection,
+        and which one should be marked as superseded.
+        """
+        # IPAC occasionally issues multiple subtraction candidates for the same
+        # exposure and source, and these may be received in parallel by two
+        # AlertConsumers.
+        for _ in range(10):
+            with suppress(ConcurrentUpdateError):
+                return self._process(dps, stock_id)
+        raise ConcurrentUpdateError(
+            f"More than 10 iterations ingesting alert {dps[0]['id']}"
+        )
+    # NB: this 1-liner is a separate method to provide a patch point for race condition testing
+    def _get_dps(self, stock_id: None | StockId) -> list[DataPoint]:
+        return list(self._photo_col.find({"stock": stock_id}, self.projection))
+    def _process(
+        self, dps: list[DataPoint], stock_id: None | StockId = None
+    ) -> tuple[None | list[DataPoint], None | list[DataPoint]]:
+        """
+        :param dps: datapoints from alert
+        :param stock_id: stock id from alert
+        Attempt to determine which pps/uls should be inserted into the t0 collection,
+        and which one should be marked as superseded.
+        """
+        # Part 1: gather info from DB and alert
+        #######################################
+        # New pps/uls lists for db loaded datapoints
+        dps_db = self._get_dps(stock_id)
+        # python set of ids of datapoints from DB
+        ids_dps_db = {el["id"] for el in dps_db}
+        # Create set with new datapoint ids from alert
+        new_dps = [dp for dp in dps if dp["id"] not in ids_dps_db]
+        ids_dps_alert = {el["id"] for el in new_dps}
+        if len(ids_dps_alert) == 0:
+            self.logger.debug(f"{str(stock_id)}: no new data points")
+            return None, None
+        # uniquify photopoints by mjd, ra, and dec.
+        # make sure there are no duplicates
+        unique_dps_ids: dict[tuple[float, float, float], list[DataPointId]] = {}
+        for dp in dps_db + new_dps:
+            # jd alone is not enough for matching pps because each time is associated with
+            # two filters! Also, if there can be multiple sources within the same frame which
+            # leads to duplicate MJD and FID. Check position in addition.
+            key = (
+                dp["body"]["mjd"],
+                dp["body"]["ra"],
+                dp["body"]["dec"],
+            )
+            if target := unique_dps_ids.get(key):
+                # insert id in order
+                idx = bisect_right(target, dp["id"])
+                if idx == 0 or target[idx - 1] != dp["id"]:
+                    target.insert(idx, dp["id"])
+            else:
+                unique_dps_ids[key] = [dp["id"]]
+        # make sure no duplicate datapoints exist
+        for key, simultaneous_dps in unique_dps_ids.items():
+            dps_db_wrong = [dp for dp in dps_db if dp["id"] in simultaneous_dps]
+            dps_wrong = [dp for dp in dps if dp["id"] in simultaneous_dps]
+            msg = (
+                f"stockID {str(stock_id)}: Duplicate photopoints at {key}!\nDPS from DB:"
+                f"\n{dps_db_wrong}\nNew DPS:\n{dps_wrong}"
+            )
+            assert len(simultaneous_dps) == 1, msg
+        # Part 2: Update new data points that are already superseded
+        ############################################################
+        # Difference between candids from the alert and candids present in DB
+        ids_dps_to_insert = ids_dps_alert - ids_dps_db
+        dps_to_insert = [dp for dp in dps if dp["id"] in ids_dps_to_insert]
+        dps_to_combine = [
+            dp for dp in dps + dps_db if dp["id"] in ids_dps_alert | ids_dps_db
+        ]
+        self.logger.debug(
+            f"Got {len(ids_dps_alert)} datapoints from alerts, "
+            f"found {len(dps_db)} in DB, "
+            f"inserting {len(dps_to_insert)} datapoints, "
+            f"combining {len(dps_to_combine)} datapoints"
+        )
+        return dps_to_insert, dps_to_combine
+    def _project(self, doc, projection) -> DataPoint:
+        out: dict[str, Any] = {}
+        for key, spec in projection.items():
+            if key not in doc:
+                continue
+            if isinstance(spec, dict):
+                item = doc[key]
+                if isinstance(item, list):
+                    out[key] = [self._project(v, spec) for v in item]
+                elif isinstance(item, dict):
+                    out[key] = self._project(item, spec)
+            else:
+                out[key] = doc[key]
+        return out  # type: ignore[return-value]

ampel/timewise/ingest/tags.py ADDED Viewed

@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# File:                timewise/ampel/timewise/ingest/tags.py
+# License:             BSD-3-Clause
+# Author:              Jannis Necker <jannis.necker@gmail.com>
+# Date:                19.09.2025
+# Last Modified Date:  19.09.2025
+# Last Modified By:    Jannis Necker <jannis.necker@gmail.com>
+# tags is used by TiT0PhotoPointShaper and ZiT0UpperLimitShaper
+# key: filter id
+tags: dict[str, list[str]] = {
+    "allwise_p3as_mep": ["WISE", "TIMEWISE", "allwise_p3as_mep"],
+    "neowiser_p1bs_psd": ["WISE", "TIMEWISE", "neowiser_p1bs_psd"],
+}

timewise 1.0.0a1__py3-none-any.whl → 1.0.0a5__py3-none-any.whl

timewise 1.0.0a1py3-none-any.whl → 1.0.0a5py3-none-any.whl