timewise 1.0.0a1__py3-none-any.whl → 1.0.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # File: timewise/ampel/timewise/alert/TimewiseAlertSupplier.py
4
+ # License: BSD-3-Clause
5
+ # Author: Jannis Necker <jannis.necker@gmail.com>
6
+ # Date: 16.09.2025
7
+ # Last Modified Date: 16.09.2025
8
+ # Last Modified By: Jannis Necker <jannis.necker@gmail.com>
9
+
10
+ import sys
11
+ from hashlib import blake2b
12
+ from typing import Literal, List
13
+
14
+ import pandas as pd
15
+
16
+ from bson import encode
17
+
18
+ from ampel.alert.AmpelAlert import AmpelAlert
19
+ from ampel.alert.BaseAlertSupplier import BaseAlertSupplier
20
+ from ampel.view.ReadOnlyDict import ReadOnlyDict
21
+
22
+
23
+ class TimewiseAlertSupplier(BaseAlertSupplier):
24
+ """
25
+ Iterable class that, for each transient name provided by the underlying alert_loader
26
+ returns a PhotoAlert instance.
27
+ """
28
+
29
+ stat_pps: int = 0
30
+ stat_uls: int = 0
31
+
32
+ dpid: Literal["hash", "inc"] = "hash"
33
+ # external_directory: Optional[ str ]
34
+ # deserialize: None | Literal["avro", "json"]
35
+
36
+ bands: List[str] = ["w1", "w2"]
37
+
38
+ def __init__(self, **kwargs) -> None:
39
+ super().__init__(**kwargs)
40
+ self.counter = 0 if self.dpid == "hash" else 1
41
+
42
+ def __next__(self) -> AmpelAlert:
43
+ """
44
+ :returns: a dict with a structure that AlertProcessor understands
45
+ :raises StopIteration: when alert_loader dries out.
46
+ :raises AttributeError: if alert_loader was not set properly before this method is called
47
+ """
48
+ table: pd.DataFrame = self._deserialize(next(self.alert_loader)) # type: ignore
49
+
50
+ stock_ids = table["stock_id"].unique()
51
+ assert len(stock_ids) == 1
52
+ stock_id = stock_ids[0]
53
+
54
+ # make the tables into a list of dictionaries for ampel to understand
55
+ all_ids = b""
56
+ pps = []
57
+
58
+ # remove the _ep at the end of AllWISE MEP data
59
+ columns_to_rename = [c for c in table.columns if c.endswith("_ep")]
60
+ if len(columns_to_rename):
61
+ rename = {
62
+ c: c.replace("_ep", "")
63
+ for c in columns_to_rename
64
+ if c.replace("_ep", "") not in table.columns
65
+ }
66
+ if rename:
67
+ # in this case only the allwise column eith the _ep extension exists
68
+ # and we can simply rename the columns
69
+ table.rename(columns=rename, inplace=True)
70
+
71
+ move = {
72
+ c: c.replace("_ep", "")
73
+ for c in columns_to_rename
74
+ if c.replace("_ep", "") in table.columns
75
+ }
76
+ if move:
77
+ # In this case, the columns already exists because the neowise data is present
78
+ # we have to insert the values form the columns with the _ep extension into the
79
+ # respective neowise columns
80
+ for c, nc in move.items():
81
+ na_mask = table[nc].isna()
82
+ table.loc[na_mask, nc] = table[c][na_mask]
83
+ pd.options.mode.chained_assignment = None
84
+ table.drop(columns=[c for c in move], inplace=True)
85
+ pd.options.mode.chained_assignment = "warn"
86
+
87
+ for i, row in table.iterrows():
88
+ # convert table row to dict, convert data types from numpy to native python
89
+ # Respect masked fields and convert to None
90
+ pp = {k: None if pd.isna(v) else v for k, v in row.to_dict().items()}
91
+ pp_hash = blake2b(encode(pp), digest_size=7).digest()
92
+ if self.counter:
93
+ pp["candid"] = self.counter
94
+ self.counter += 1
95
+ else:
96
+ pp["candid"] = int.from_bytes(pp_hash, byteorder=sys.byteorder)
97
+
98
+ all_ids += pp_hash
99
+ pps.append(ReadOnlyDict(pp))
100
+
101
+ if not pps:
102
+ return self.__next__()
103
+
104
+ # Update stats
105
+ self.stat_pps += len(pps)
106
+
107
+ return AmpelAlert(
108
+ id=int.from_bytes( # alert id
109
+ blake2b(all_ids, digest_size=7).digest(), byteorder=sys.byteorder
110
+ ),
111
+ stock=int(stock_id), # internal ampel id
112
+ datapoints=tuple(pps),
113
+ )
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # File: timewise/ampel/timewise/alert/load/TimewiseFileLoader.py
4
+ # License: BSD-3-Clause
5
+ # Author: Jannis Necker <jannis.necker@gmail.com>
6
+ # Date: 16.09.2025
7
+ # Last Modified Date: 16.09.2025
8
+ # Last Modified By: Jannis Necker <jannis.necker@gmail.com>
9
+ from typing import Dict, get_args
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from astropy.table import Table, vstack
14
+ from ampel.abstract.AbsAlertLoader import AbsAlertLoader
15
+ from timewise.tables import TableType
16
+ from timewise.config import TimewiseConfig
17
+ from timewise.types import TaskID
18
+
19
+
20
+ class TimewiseFileLoader(AbsAlertLoader[Dict]):
21
+ """
22
+ Load alerts from one of more files.
23
+ """
24
+
25
+ # path to timewise download config file
26
+ timewise_config_file: str
27
+
28
+ # column name of id
29
+ stock_id_column_name: str
30
+
31
+ chunks: list[int] | None = None
32
+
33
+ def __init__(self, **kwargs) -> None:
34
+ super().__init__(**kwargs)
35
+
36
+ self.logger.info(f"loading timewise config file {self.timewise_config_file}")
37
+ timewise_config = TimewiseConfig.from_yaml(self.timewise_config_file)
38
+ dl = timewise_config.download.build_downloader()
39
+ self._timewise_backend = dl.backend
40
+
41
+ # selecting tasks to run
42
+ _tasks = [tasks for tasks in dl.iter_tasks_per_chunk()]
43
+ if self.chunks is not None:
44
+ self._tasks = [_tasks[i] for i in self.chunks]
45
+ else:
46
+ self._tasks = _tasks
47
+ if self.logger:
48
+ self.logger.info(
49
+ f"Registering {len(self._tasks)} chunk(s) to load: {self._tasks}"
50
+ )
51
+
52
+ self._table_types = get_args(TableType.__origin__) # type: ignore
53
+ self._gen = self.iter_stocks()
54
+
55
+ @staticmethod
56
+ def encode_result(res: pd.DataFrame) -> pd.DataFrame:
57
+ if isinstance(res, pd.Series):
58
+ return pd.DataFrame([res])
59
+ return res
60
+
61
+ def find_table_from_task(self, task: TaskID) -> TableType:
62
+ tables = [
63
+ t for t in self._table_types if t.model_fields["name"].default in str(task)
64
+ ]
65
+ assert len(tables) > 0, f"No matching table found for {task}!"
66
+ assert len(tables) < 2, f"More than one matching table found for {task}!"
67
+ self.logger.debug(
68
+ f"{task} is from table {tables[0].model_fields['name'].default}"
69
+ )
70
+ return tables[0]
71
+
72
+ def iter_stocks(self):
73
+ # emit all datapoints per stock id
74
+ # This way ampel runs not per datapoint but per object
75
+ backend = self._timewise_backend
76
+ for tasks in self._tasks:
77
+ data = []
78
+ for task in tasks:
79
+ self.logger.debug(f"reading {task}")
80
+ idata = backend.load_data(task)
81
+
82
+ # add table name
83
+ idata["table_name"] = (
84
+ self.find_table_from_task(task).model_fields["name"].default
85
+ )
86
+
87
+ data.append(idata)
88
+
89
+ data = vstack(data).to_pandas()
90
+
91
+ # rename stock id column
92
+ data.rename(columns={self.stock_id_column_name: "stock_id"}, inplace=True)
93
+
94
+ # Find the indices for each stock id. This is much faster than making a mask
95
+ # each loop and accessing the table then. Shown below is a comparison.
96
+ # The top example is the access provided by pandas which would be
97
+ # again a factor 3 faster.
98
+ #
99
+ # In [45]: %timeit test_df()
100
+ # 5.62 μs ± 47.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
101
+ #
102
+ # In [46]: %timeit test_index()
103
+ # 14.6 μs ± 45 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
104
+ #
105
+ # In [47]: %timeit test_mask()
106
+ # 2.61 ms ± 18 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
107
+ data.set_index(data.stock_id, inplace=True)
108
+
109
+ # iterate over all stock ids
110
+ for stock_id in np.unique(data["stock_id"]):
111
+ selection = data.loc[stock_id]
112
+ yield self.encode_result(selection)
113
+
114
+ def __iter__(self):
115
+ return self
116
+
117
+ def __next__(self) -> pd.DataFrame: # type: ignore
118
+ return next(self._gen)
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python
2
+ # File: Ampel-ZTF/ampel/ztf/ingest/ZiCompilerOptions.py
3
+ # License: BSD-3-Clause
4
+ # Author: valery brinnel <firstname.lastname@gmail.com>
5
+ # Date: 14.05.2021
6
+ # Last Modified Date: 14.05.2021
7
+ # Last Modified By: valery brinnel <firstname.lastname@gmail.com>
8
+
9
+ from typing import Any
10
+
11
+ from ampel.model.ingest.CompilerOptions import CompilerOptions
12
+
13
+
14
+ class TiCompilerOptions(CompilerOptions):
15
+ stock: dict[str, Any] = {"tag": "TIMEWISE"}
16
+ t0: dict[str, Any] = {"tag": "TIMEWISE"}
17
+ t1: dict[str, Any] = {"tag": "TIMEWISE"}
18
+ state_t2: dict[str, Any] = {"tag": "TIMEWISE"}
19
+ point_t2: dict[str, Any] = {"tag": "TIMEWISE"}
20
+ stock_t2: dict[str, Any] = {"tag": "TIMEWISE"}
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env python
2
+ # File: timewise/ampel/timewise/ingest/TiDataPointShaper.py
3
+ # License: BSD-3-Clause
4
+ # Author: valery brinnel <firstname.lastname@gmail.com>
5
+ # Date: 14.12.2017
6
+ # Last Modified Date: 19.09.2025
7
+ # Last Modified By: Jannis Necker <jannis.necker@gmail.com>
8
+
9
+ from collections.abc import Iterable, Sequence
10
+ from typing import Any
11
+
12
+ from bson import encode
13
+
14
+ from ampel.abstract.AbsT0Unit import AbsT0Unit
15
+ from ampel.base.AmpelUnit import AmpelUnit
16
+ from ampel.content.DataPoint import DataPoint
17
+ from ampel.types import StockId, Tag
18
+ from ampel.util.hash import hash_payload
19
+
20
+ from ampel.timewise.ingest.tags import tags
21
+
22
+
23
+ class TiDataPointShaperBase(AmpelUnit):
24
+ """
25
+ This class 'shapes' datapoints in a format suitable
26
+ to be saved into the ampel database
27
+ """
28
+
29
+ # JD2017 is used to define upper limits primary IDs
30
+ JD2017: float = 2457754.5
31
+ #: Byte width of datapoint ids
32
+ digest_size: int = 8
33
+
34
+ # Mandatory implementation
35
+ def process(self, arg: Iterable[dict[str, Any]], stock: StockId) -> list[DataPoint]:
36
+ """
37
+ :param arg: sequence of unshaped pps
38
+ IMPORTANT:
39
+ 1) This method *modifies* the input dicts (it removes 'candid' and programpi),
40
+ even if the unshaped pps are ReadOnlyDict instances
41
+ 2) 'stock' is not set here on purpose since it will conflict with the $addToSet operation
42
+ """
43
+
44
+ ret_list: list[DataPoint] = []
45
+ popitem = dict.pop
46
+
47
+ for photo_dict in arg:
48
+ # Photopoint
49
+ assert photo_dict.get("candid"), "photometry points does not have 'candid'!"
50
+ ret_list.append(
51
+ { # type: ignore[typeddict-item]
52
+ "id": photo_dict["candid"],
53
+ "stock": stock,
54
+ "tag": tags[photo_dict["table_name"]],
55
+ "body": photo_dict,
56
+ }
57
+ )
58
+
59
+ popitem(photo_dict, "candid", None)
60
+
61
+ return ret_list
62
+
63
+ def _create_datapoint(
64
+ self, stock: StockId, tag: Sequence[Tag], body: dict[str, Any]
65
+ ) -> DataPoint:
66
+ """
67
+ Create a Datapoint from stock, body, and tags, using the hash of the body as id
68
+ """
69
+ # ensure that keys are ordered
70
+ sorted_body = dict(sorted(body.items()))
71
+ # The following is a comment from the original ampel.ztf.ingest.ZiDataPointShaperBase:
72
+ # This is not a complete DataPoint as (channel,meta) is missing, set later.
73
+ # Should these be optional? or added default?
74
+ return { # type: ignore
75
+ "id": hash_payload(encode(sorted_body), size=-self.digest_size * 8),
76
+ "stock": stock,
77
+ "tag": [*tags[body["table_name"]], *tag],
78
+ "body": sorted_body,
79
+ }
80
+
81
+ def ul_identity(self, uld: dict[str, Any]) -> int:
82
+ """
83
+ This should not happen
84
+ """
85
+ raise NotImplementedError
86
+
87
+
88
+ class TiDataPointShaper(TiDataPointShaperBase, AbsT0Unit):
89
+ def process(self, arg: Any, stock: None | StockId = None) -> list[DataPoint]:
90
+ assert stock is not None
91
+ return super().process(arg, stock)
@@ -0,0 +1,176 @@
1
+ #!/usr/bin/env python
2
+ # File: ampel/timewise/ingest/TiMongoMuxer.py
3
+ # License: BSD-3-Clause
4
+ # Author: Jannis Necker
5
+ # Date: 19.09.2025
6
+ # Last Modified Date: 27.09.2025
7
+ # Last Modified By: Jannis Necker
8
+
9
+ from bisect import bisect_right
10
+ from contextlib import suppress
11
+ from typing import Any
12
+
13
+
14
+ from ampel.abstract.AbsT0Muxer import AbsT0Muxer
15
+ from ampel.content.DataPoint import DataPoint
16
+ from ampel.types import DataPointId, StockId
17
+ from ampel.util.mappings import unflatten_dict
18
+
19
+
20
+ class ConcurrentUpdateError(Exception):
21
+ """
22
+ Raised when the t0 collection was updated during ingestion
23
+ """
24
+
25
+ ...
26
+
27
+
28
+ class TiMongoMuxer(AbsT0Muxer):
29
+ """
30
+ This class compares info between alert and DB so that only the needed info is ingested.
31
+ It checks for duplicate datapoints.
32
+ """
33
+
34
+ # Standard projection used when checking DB for existing PPS/ULS
35
+ projection = {
36
+ "_id": 0,
37
+ "id": 1,
38
+ "tag": 1,
39
+ "channel": 1,
40
+ "stock": 1,
41
+ "body.mjd": 1,
42
+ "body.w1_flux": 1,
43
+ "body.w1_flux_error": 1,
44
+ "body.w1_mag": 1,
45
+ "body.w1_mag_error": 1,
46
+ "body.w2_flux": 1,
47
+ "body.w2_flux_error": 1,
48
+ "body.w2_mag": 1,
49
+ "body.w2_mag_error": 1,
50
+ "body.ra": 1,
51
+ "body.dec": 1,
52
+ }
53
+
54
+ def __init__(self, **kwargs) -> None:
55
+ super().__init__(**kwargs)
56
+
57
+ # used to check potentially already inserted pps
58
+ self._photo_col = self.context.db.get_collection("t0")
59
+ self._projection_spec = unflatten_dict(self.projection)
60
+
61
+ def process(
62
+ self, dps: list[DataPoint], stock_id: None | StockId = None
63
+ ) -> tuple[None | list[DataPoint], None | list[DataPoint]]:
64
+ """
65
+ :param dps: datapoints from alert
66
+ :param stock_id: stock id from alert
67
+ Attempt to determine which pps/uls should be inserted into the t0 collection,
68
+ and which one should be marked as superseded.
69
+ """
70
+ # IPAC occasionally issues multiple subtraction candidates for the same
71
+ # exposure and source, and these may be received in parallel by two
72
+ # AlertConsumers.
73
+ for _ in range(10):
74
+ with suppress(ConcurrentUpdateError):
75
+ return self._process(dps, stock_id)
76
+ raise ConcurrentUpdateError(
77
+ f"More than 10 iterations ingesting alert {dps[0]['id']}"
78
+ )
79
+
80
+ # NB: this 1-liner is a separate method to provide a patch point for race condition testing
81
+ def _get_dps(self, stock_id: None | StockId) -> list[DataPoint]:
82
+ return list(self._photo_col.find({"stock": stock_id}, self.projection))
83
+
84
+ def _process(
85
+ self, dps: list[DataPoint], stock_id: None | StockId = None
86
+ ) -> tuple[None | list[DataPoint], None | list[DataPoint]]:
87
+ """
88
+ :param dps: datapoints from alert
89
+ :param stock_id: stock id from alert
90
+ Attempt to determine which pps/uls should be inserted into the t0 collection,
91
+ and which one should be marked as superseded.
92
+ """
93
+
94
+ # Part 1: gather info from DB and alert
95
+ #######################################
96
+
97
+ # New pps/uls lists for db loaded datapoints
98
+ dps_db = self._get_dps(stock_id)
99
+
100
+ # python set of ids of datapoints from DB
101
+ ids_dps_db = {el["id"] for el in dps_db}
102
+
103
+ # Create set with new datapoint ids from alert
104
+ new_dps = [dp for dp in dps if dp["id"] not in ids_dps_db]
105
+ ids_dps_alert = {el["id"] for el in new_dps}
106
+
107
+ if len(ids_dps_alert) == 0:
108
+ self.logger.debug(f"{str(stock_id)}: no new data points")
109
+ return None, None
110
+
111
+ # uniquify photopoints by mjd, ra, and dec.
112
+ # make sure there are no duplicates
113
+ unique_dps_ids: dict[tuple[float, float, float], list[DataPointId]] = {}
114
+
115
+ for dp in dps_db + new_dps:
116
+ # jd alone is not enough for matching pps because each time is associated with
117
+ # two filters! Also, if there can be multiple sources within the same frame which
118
+ # leads to duplicate MJD and FID. Check position in addition.
119
+ key = (
120
+ dp["body"]["mjd"],
121
+ dp["body"]["ra"],
122
+ dp["body"]["dec"],
123
+ )
124
+
125
+ if target := unique_dps_ids.get(key):
126
+ # insert id in order
127
+ idx = bisect_right(target, dp["id"])
128
+ if idx == 0 or target[idx - 1] != dp["id"]:
129
+ target.insert(idx, dp["id"])
130
+ else:
131
+ unique_dps_ids[key] = [dp["id"]]
132
+
133
+ # make sure no duplicate datapoints exist
134
+ for key, simultaneous_dps in unique_dps_ids.items():
135
+ dps_db_wrong = [dp for dp in dps_db if dp["id"] in simultaneous_dps]
136
+ dps_wrong = [dp for dp in dps if dp["id"] in simultaneous_dps]
137
+ msg = (
138
+ f"stockID {str(stock_id)}: Duplicate photopoints at {key}!\nDPS from DB:"
139
+ f"\n{dps_db_wrong}\nNew DPS:\n{dps_wrong}"
140
+ )
141
+ assert len(simultaneous_dps) == 1, msg
142
+
143
+ # Part 2: Update new data points that are already superseded
144
+ ############################################################
145
+
146
+ # Difference between candids from the alert and candids present in DB
147
+ ids_dps_to_insert = ids_dps_alert - ids_dps_db
148
+ dps_to_insert = [dp for dp in dps if dp["id"] in ids_dps_to_insert]
149
+ dps_to_combine = [
150
+ dp for dp in dps + dps_db if dp["id"] in ids_dps_alert | ids_dps_db
151
+ ]
152
+ self.logger.debug(
153
+ f"Got {len(ids_dps_alert)} datapoints from alerts, "
154
+ f"found {len(dps_db)} in DB, "
155
+ f"inserting {len(dps_to_insert)} datapoints, "
156
+ f"combining {len(dps_to_combine)} datapoints"
157
+ )
158
+
159
+ return dps_to_insert, dps_to_combine
160
+
161
+ def _project(self, doc, projection) -> DataPoint:
162
+ out: dict[str, Any] = {}
163
+ for key, spec in projection.items():
164
+ if key not in doc:
165
+ continue
166
+
167
+ if isinstance(spec, dict):
168
+ item = doc[key]
169
+ if isinstance(item, list):
170
+ out[key] = [self._project(v, spec) for v in item]
171
+ elif isinstance(item, dict):
172
+ out[key] = self._project(item, spec)
173
+ else:
174
+ out[key] = doc[key]
175
+
176
+ return out # type: ignore[return-value]
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python
2
+ # File: timewise/ampel/timewise/ingest/tags.py
3
+ # License: BSD-3-Clause
4
+ # Author: Jannis Necker <jannis.necker@gmail.com>
5
+ # Date: 19.09.2025
6
+ # Last Modified Date: 19.09.2025
7
+ # Last Modified By: Jannis Necker <jannis.necker@gmail.com>
8
+
9
+
10
+ # tags is used by TiT0PhotoPointShaper and ZiT0UpperLimitShaper
11
+ # key: filter id
12
+ tags: dict[str, list[str]] = {
13
+ "allwise_p3as_mep": ["WISE", "TIMEWISE", "allwise_p3as_mep"],
14
+ "neowiser_p1bs_psd": ["WISE", "TIMEWISE", "neowiser_p1bs_psd"],
15
+ }