timewise 0.5.3__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. timewise/__init__.py +1 -5
  2. timewise/backend/__init__.py +6 -0
  3. timewise/backend/base.py +36 -0
  4. timewise/backend/filesystem.py +80 -0
  5. timewise/chunking.py +50 -0
  6. timewise/cli.py +117 -11
  7. timewise/config.py +34 -0
  8. timewise/io/__init__.py +1 -0
  9. timewise/io/config.py +64 -0
  10. timewise/io/download.py +302 -0
  11. timewise/io/stable_tap.py +121 -0
  12. timewise/plot/__init__.py +3 -0
  13. timewise/plot/diagnostic.py +242 -0
  14. timewise/plot/lightcurve.py +112 -0
  15. timewise/plot/panstarrs.py +260 -0
  16. timewise/plot/sdss.py +109 -0
  17. timewise/process/__init__.py +2 -0
  18. timewise/process/config.py +30 -0
  19. timewise/process/interface.py +143 -0
  20. timewise/process/keys.py +10 -0
  21. timewise/process/stacking.py +310 -0
  22. timewise/process/template.yml +49 -0
  23. timewise/query/__init__.py +6 -0
  24. timewise/query/base.py +45 -0
  25. timewise/query/positional.py +40 -0
  26. timewise/tables/__init__.py +10 -0
  27. timewise/tables/allwise_p3as_mep.py +22 -0
  28. timewise/tables/base.py +9 -0
  29. timewise/tables/neowiser_p1bs_psd.py +22 -0
  30. timewise/types.py +30 -0
  31. timewise/util/backoff.py +12 -0
  32. timewise/util/csv_utils.py +12 -0
  33. timewise/util/error_threading.py +70 -0
  34. timewise/util/visits.py +33 -0
  35. timewise-1.0.0a1.dist-info/METADATA +205 -0
  36. timewise-1.0.0a1.dist-info/RECORD +39 -0
  37. {timewise-0.5.3.dist-info → timewise-1.0.0a1.dist-info}/WHEEL +1 -1
  38. timewise-1.0.0a1.dist-info/entry_points.txt +3 -0
  39. timewise/big_parent_sample.py +0 -106
  40. timewise/config_loader.py +0 -157
  41. timewise/general.py +0 -52
  42. timewise/parent_sample_base.py +0 -89
  43. timewise/point_source_utils.py +0 -68
  44. timewise/utils.py +0 -558
  45. timewise/wise_bigdata_desy_cluster.py +0 -1407
  46. timewise/wise_data_base.py +0 -2027
  47. timewise/wise_data_by_visit.py +0 -672
  48. timewise/wise_flux_conversion_correction.dat +0 -19
  49. timewise-0.5.3.dist-info/METADATA +0 -55
  50. timewise-0.5.3.dist-info/RECORD +0 -17
  51. timewise-0.5.3.dist-info/entry_points.txt +0 -3
  52. {timewise-0.5.3.dist-info → timewise-1.0.0a1.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,310 @@
1
+ import logging
2
+ from typing import cast, Dict, Any
3
+
4
+ from scipy import stats
5
+ import numpy as np
6
+ from numpy import typing as npt
7
+ import pandas as pd
8
+
9
+ from ..util.visits import get_visit_map
10
+ from timewise.process import keys
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ # zero points come from https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux
17
+ # published in Jarret et al. (2011): https://ui.adsabs.harvard.edu/abs/2011ApJ...735..112J/abstract
18
+ MAGNITUDE_ZEROPOINTS: Dict[str, float] = {"w1": 20.752, "w2": 19.596}
19
+ # in Jy
20
+ FLUX_ZEROPOINTS = {"w1": 309.54, "w2": 171.787}
21
+
22
+
23
+ def calculate_epochs(
24
+ f: pd.Series,
25
+ e: pd.Series,
26
+ visit_mask: npt.NDArray[np.int64],
27
+ counts: npt.NDArray[np.int64],
28
+ remove_outliers: bool,
29
+ outlier_mask: npt.NDArray[np.bool_] | None = None,
30
+ ) -> tuple[
31
+ npt.NDArray[np.float64],
32
+ npt.NDArray[np.float64],
33
+ npt.NDArray[np.bool_],
34
+ npt.NDArray[np.bool_],
35
+ npt.NDArray[np.bool_],
36
+ npt.NDArray[np.int64],
37
+ ]:
38
+ """
39
+ Calculates the visits within a raw lightcurve.
40
+
41
+ :param f: the fluxes
42
+ :type f: np.array
43
+ :param e: the flux errors
44
+ :type e: np.array
45
+ :param visit_mask: the visit mask
46
+ :type visit_mask: np.array
47
+ :param counts: the counts
48
+ :type counts: np.array
49
+ :param remove_outliers: whether to remove outliers
50
+ :type remove_outliers: bool
51
+ :param outlier_mask: the outlier mask
52
+ :type outlier_mask: np.array
53
+ :return: the epoch
54
+ :rtype: float
55
+ """
56
+
57
+ if len(f) == 0:
58
+ return (
59
+ np.array([]),
60
+ np.array([]),
61
+ np.array([]),
62
+ np.array([]),
63
+ np.array([]),
64
+ np.array([]),
65
+ )
66
+
67
+ u_lims = pd.isna(e)
68
+ nan_mask = pd.isna(f)
69
+
70
+ # --------------------- remove outliers in the bins ---------------------- #
71
+
72
+ # if we do not want to clean outliers just set the threshold to infinity
73
+ outlier_thresh = np.inf if not remove_outliers else 20
74
+
75
+ # set up empty masks
76
+ outlier_mask = cast(
77
+ npt.NDArray[np.bool_],
78
+ (np.array([False] * len(f)) if outlier_mask is None else outlier_mask),
79
+ )
80
+ median = np.full_like(counts, np.nan, dtype=float)
81
+ u = np.full_like(counts, np.nan, dtype=float)
82
+ use_mask = np.full_like(counts, False, dtype=bool)
83
+ n_points = counts
84
+
85
+ # set up dummy values for number of remaining outliers
86
+ n_remaining_outlier = np.inf
87
+
88
+ # --------------------- flag upper limits ---------------------- #
89
+ bin_n_ulims: npt.NDArray[np.int64] = np.bincount(
90
+ visit_mask, weights=u_lims, minlength=len(counts)
91
+ )
92
+ bin_ulim_bool = cast(npt.NDArray[np.bool_], (counts - bin_n_ulims) == 0)
93
+ use_mask_ul = ~u_lims | (u_lims & bin_ulim_bool[visit_mask])
94
+
95
+ n_loops = 0
96
+
97
+ # recalculate uncertainty and median as long as no outliers left
98
+ while n_remaining_outlier > 0:
99
+ # make a mask of values to use
100
+ use_mask = ~outlier_mask & use_mask_ul & ~nan_mask # type: ignore[operator]
101
+ n_points = np.bincount(visit_mask, weights=use_mask)
102
+ zero_points_mask = cast(npt.NDArray[np.bool_], n_points == 0)
103
+
104
+ # ------------------------- calculate median ------------------------- #
105
+ median = np.zeros_like(counts, dtype=float)
106
+ visits_at_least_one_point = np.unique(visit_mask[~zero_points_mask[visit_mask]])
107
+ visits_zero_points = np.unique(visit_mask[zero_points_mask[visit_mask]])
108
+ median[visits_at_least_one_point] = np.array(
109
+ [
110
+ np.median(f[(visit_mask == i) & use_mask])
111
+ for i in visits_at_least_one_point
112
+ ]
113
+ )
114
+ median[visits_zero_points] = np.nan
115
+
116
+ # median is NaN for visits with 0 detections, (i.e. detections in one band and not the other)
117
+ # if median is NaN for other visits raise Error
118
+ if np.any(np.isnan(median[n_points > 0])):
119
+ nan_indices = np.where(np.isnan(median))[0]
120
+ msg = ""
121
+ for inan_index in nan_indices:
122
+ nanf = f[visit_mask == inan_index]
123
+ msg += f"median is nan for {inan_index}th bin\n{nanf}\n\n"
124
+ raise ValueError(msg)
125
+
126
+ # --------------------- calculate uncertainty ---------------------- #
127
+ mean_deviation = np.bincount(
128
+ visit_mask[use_mask],
129
+ weights=(f[use_mask] - median[visit_mask[use_mask]]) ** 2,
130
+ minlength=len(counts),
131
+ )
132
+ one_points_mask = n_points <= 1
133
+ # calculate standard deviation
134
+ std = np.zeros_like(counts, dtype=float)
135
+ std[~one_points_mask] = (
136
+ np.sqrt(mean_deviation[~one_points_mask])
137
+ / (n_points[~one_points_mask] - 1)
138
+ * stats.t.interval(0.68, df=n_points[~one_points_mask] - 1)[1]
139
+ # for visits with small number of detections we have to correct according to the t distribution
140
+ )
141
+ std[one_points_mask] = -np.inf
142
+
143
+ # calculate the propagated errors of the single exposure measurements
144
+ single_exp_measurement_errors = np.sqrt(
145
+ np.bincount(
146
+ visit_mask[use_mask],
147
+ weights=e[use_mask] ** 2,
148
+ minlength=len(counts),
149
+ )
150
+ )
151
+ e_meas = np.zeros_like(std, dtype=float)
152
+ e_meas[~zero_points_mask] = (
153
+ single_exp_measurement_errors[n_points > 0] / n_points[n_points > 0]
154
+ )
155
+ e_meas[zero_points_mask] = np.nan
156
+ # take the maximum value of the measured single exposure errors and the standard deviation
157
+ u = np.maximum(std, e_meas)
158
+
159
+ # calculate 90% confidence interval
160
+ u70 = np.zeros_like(counts, dtype=float)
161
+ u70[one_points_mask] = 1e-10
162
+ visits_at_least_two_point = np.unique(visit_mask[~one_points_mask[visit_mask]])
163
+ u70[visits_at_least_two_point] = np.array(
164
+ [
165
+ np.quantile(
166
+ abs(f[(visit_mask == i) & use_mask] - median[i]),
167
+ 0.7,
168
+ method="interpolated_inverted_cdf",
169
+ )
170
+ for i in visits_at_least_two_point
171
+ ]
172
+ )
173
+
174
+ # --------------------- remove outliers in the bins ---------------------- #
175
+ remaining_outliers = (
176
+ abs(median[visit_mask] - f) > outlier_thresh * u70[visit_mask]
177
+ ) & ~outlier_mask
178
+ outlier_mask |= remaining_outliers
179
+ n_remaining_outlier = sum(remaining_outliers) if remove_outliers else 0
180
+ # setting remaining_outliers to 0 will exit the while loop
181
+
182
+ n_loops += 1
183
+
184
+ if n_loops > 20:
185
+ raise Exception(f"{n_loops}!")
186
+
187
+ return median, u, bin_ulim_bool, outlier_mask, use_mask, n_points
188
+
189
+
190
+ def stack_visits(lightcurve: pd.DataFrame, clean_outliers: bool = True):
191
+ """
192
+ Combine the data by visits of the satellite of one region in the sky.
193
+ The visits typically consist of some tens of observations. The individual visits are separated by about
194
+ six months.
195
+ The mean flux for one visit is calculated by the weighted mean of the data.
196
+ The error on that mean is calculated by the root-mean-squared and corrected by the t-value.
197
+ Outliers per visit are identified if they are more than 100 times the rms away from the mean. These outliers
198
+ are removed from the calculation of the mean and the error if self.clean_outliers_when_stacking is True.
199
+
200
+ :param lightcurve: the raw lightcurve
201
+ :type lightcurve: pandas.DataFrame
202
+ :return: the stacked lightcurve
203
+ :rtype: pandas.DataFrame
204
+ """
205
+
206
+ # ------------------------- create visit mask -------------------------- #
207
+ visit_map = get_visit_map(lightcurve)
208
+ counts = np.bincount(visit_map)
209
+
210
+ stacked_data: Dict[str, Any] = dict()
211
+
212
+ # ------------------------- calculate mean mjd -------------------------- #
213
+ stacked_data["mean_mjd"] = np.bincount(visit_map, weights=lightcurve.mjd) / counts
214
+
215
+ # ------------------------- loop through bands -------------------------- #
216
+ for b in ["w1", "w2"]:
217
+ # loop through magnitude and flux and save the respective datapoints
218
+
219
+ outlier_masks: Dict[str, Any] = dict()
220
+ use_masks = dict()
221
+ bin_ulim_bools = dict()
222
+
223
+ for lum_ext in [keys.FLUX_EXT, keys.MAG_EXT]:
224
+ f = lightcurve[f"{b}{lum_ext}"]
225
+ e = lightcurve[f"{b}{keys.ERROR_EXT}{lum_ext}"]
226
+
227
+ # we will flag outliers based on the flux only
228
+ remove_outliers = lum_ext == keys.FLUX_EXT and clean_outliers
229
+ outlier_mask = outlier_masks.get(keys.FLUX_EXT, None)
230
+
231
+ mean, u, bin_ulim_bool, outlier_mask, use_mask, n_points = calculate_epochs(
232
+ f,
233
+ e,
234
+ visit_map,
235
+ counts,
236
+ remove_outliers=remove_outliers,
237
+ outlier_mask=outlier_mask,
238
+ )
239
+ n_outliers = np.sum(outlier_mask)
240
+
241
+ if n_outliers > 0:
242
+ logger.debug(
243
+ f"removed {n_outliers} outliers by brightness for {b} {lum_ext}"
244
+ )
245
+
246
+ stacked_data[f"{b}{keys.MEAN}{lum_ext}"] = mean
247
+ stacked_data[f"{b}{lum_ext}{keys.RMS}"] = u
248
+ stacked_data[f"{b}{lum_ext}{keys.UPPER_LIMIT}"] = bin_ulim_bool
249
+ stacked_data[f"{b}{lum_ext}{keys.NPOINTS}"] = n_points
250
+
251
+ outlier_masks[lum_ext] = outlier_mask
252
+ use_masks[lum_ext] = use_mask
253
+ bin_ulim_bools[lum_ext] = bin_ulim_bool
254
+
255
+ # ------- calculate the zeropoints per exposure ------- #
256
+ # this might look wrong since we use the flux mask on the magnitudes but it s right
257
+ # for each flux measurement we need the corresponding magnitude to get the zeropoint
258
+ mags = lightcurve[f"{b}{keys.MAG_EXT}"]
259
+ inst_fluxes = lightcurve[f"{b}{keys.FLUX_EXT}"]
260
+ pos_m = inst_fluxes > 0 # select only positive fluxes, i.e. detections
261
+ zp_mask = pos_m & use_masks[keys.FLUX_EXT]
262
+
263
+ # calculate zero points
264
+ zps = np.zeros_like(inst_fluxes)
265
+ zps[zp_mask] = mags[zp_mask] + 2.5 * np.log10(inst_fluxes[zp_mask])
266
+ # find visits with no zeropoints
267
+ n_valid_zps = np.bincount(visit_map, weights=zp_mask)
268
+ at_least_one_valid_zp = n_valid_zps > 0
269
+ # calculate the median zeropoint for each visit
270
+ zps_median = np.zeros_like(n_valid_zps, dtype=float)
271
+ zps_median[n_valid_zps > 0] = np.array(
272
+ [
273
+ np.median(zps[(visit_map == i) & zp_mask])
274
+ for i in np.unique(visit_map[at_least_one_valid_zp[visit_map]])
275
+ ]
276
+ )
277
+ # if there are only non-detections then fall back to default zeropoint
278
+ zps_median[n_valid_zps == 0] = MAGNITUDE_ZEROPOINTS[b]
279
+ # if the visit only has upper limits then use the fall-back zeropoint
280
+ zps_median[bin_ulim_bools[keys.FLUX_EXT]] = MAGNITUDE_ZEROPOINTS[b]
281
+
282
+ # --------------- calculate flux density from instrument flux ---------------- #
283
+ # get the instrument flux [digital numbers], i.e. source count
284
+ inst_fluxes_e = lightcurve[f"{b}{keys.ERROR_EXT}{keys.FLUX_EXT}"]
285
+
286
+ # calculate the proportionality constant between flux density and source count
287
+ mag_zp = FLUX_ZEROPOINTS[b] * 1e3 # in mJy
288
+ flux_dens_const = mag_zp * 10 ** (-zps_median / 2.5)
289
+
290
+ # calculate flux densities from instrument counts
291
+ flux_densities = inst_fluxes * flux_dens_const[visit_map]
292
+ flux_densities_e = inst_fluxes_e * flux_dens_const[visit_map]
293
+
294
+ # bin flux densities
295
+ mean_fd, u_fd, ul_fd, outlier_mask_fd, use_mask_fd, n_points_fd = (
296
+ calculate_epochs(
297
+ flux_densities,
298
+ flux_densities_e,
299
+ visit_map,
300
+ counts,
301
+ remove_outliers=False,
302
+ outlier_mask=outlier_masks[keys.FLUX_EXT],
303
+ )
304
+ )
305
+ stacked_data[f"{b}{keys.MEAN}{keys.FLUX_DENSITY_EXT}"] = mean_fd
306
+ stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.RMS}"] = u_fd
307
+ stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.UPPER_LIMIT}"] = ul_fd
308
+ stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.NPOINTS}"] = n_points_fd
309
+
310
+ return pd.DataFrame(stacked_data)
@@ -0,0 +1,49 @@
1
+ channel:
2
+ - access:
3
+ - ZTF_PUB
4
+ name: wise
5
+ policy: []
6
+ version: 0
7
+ mongo:
8
+ prefix: MONGODB_NAME
9
+ reset: true
10
+ name: timewise
11
+ task:
12
+ - config:
13
+ compiler_opts: TiCompilerOptions
14
+ directives:
15
+ - channel: wise
16
+ ingest:
17
+ mux:
18
+ combine:
19
+ - state_t2:
20
+ - unit: T2StackVisits
21
+ unit: T1HDBSCAN
22
+ config:
23
+ original_id_key: ORIGINAL_ID_KEY
24
+ input_mongo_db_name: INPUT_MONGODB_NAME
25
+ plot: true
26
+ unit: TiMongoMuxer
27
+ iter_max: 1000000
28
+ shaper: TiDataPointShaper
29
+ supplier:
30
+ config:
31
+ dpid: hash
32
+ loader:
33
+ config:
34
+ timewise_config_file: TIMEWISE_CONFIG_PATH
35
+ stock_id_column_name: ORIGINAL_ID_KEY
36
+ unit: TimewiseFileLoader
37
+ unit: TimewiseAlertSupplier
38
+ multiplier: 1
39
+ title: t0
40
+ template:
41
+ live:
42
+ - resolve_run_time_aliases
43
+ - hash_t2_config
44
+ unit: AlertConsumer
45
+ - config:
46
+ log_profile: default
47
+ multiplier: 1
48
+ title: t2
49
+ unit: T2Worker
@@ -0,0 +1,6 @@
1
+ from pydantic import Field
2
+ from typing import Union, Annotated, TypeAlias
3
+ from .positional import PositionalQuery
4
+
5
+ # Discriminated union of all query types
6
+ QueryType: TypeAlias = Annotated[Union[PositionalQuery], Field(discriminator="type")]
timewise/query/base.py ADDED
@@ -0,0 +1,45 @@
1
+ import abc
2
+ from typing import ClassVar, List
3
+ from pydantic import BaseModel
4
+ from hashlib import sha256
5
+
6
+ from ..tables import TableType
7
+
8
+
9
+ class Query(abc.ABC, BaseModel):
10
+ type: str
11
+ upload_name: ClassVar[str] = "mine"
12
+
13
+ original_id_key: str = "orig_id"
14
+ constraints: List[str] = [
15
+ "nb < 2",
16
+ "na < 1",
17
+ "cc_flags like '00%'",
18
+ "qi_fact >= 1",
19
+ "saa_sep >= 5",
20
+ "moon_masked like '00%'",
21
+ ]
22
+ columns: List[str]
23
+ table: TableType
24
+
25
+ @property
26
+ @abc.abstractmethod
27
+ def input_columns(self) -> dict[str, str]: ...
28
+
29
+ @abc.abstractmethod
30
+ def build(self) -> str: ...
31
+
32
+ @property
33
+ def adql(self) -> str:
34
+ """ADQL string computed once per instance."""
35
+ return self.build()
36
+
37
+ @property
38
+ def hash(self) -> str:
39
+ return (
40
+ self.type
41
+ + "_"
42
+ + self.table.name
43
+ + "_"
44
+ + sha256(self.adql.encode()).hexdigest()
45
+ )
@@ -0,0 +1,40 @@
1
+ import logging
2
+ from typing import Literal, Dict
3
+
4
+ from .base import Query
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class PositionalQuery(Query):
10
+ type: Literal["positional"] = "positional"
11
+ radius_arcsec: float
12
+
13
+ @property
14
+ def input_columns(self) -> Dict[str, str]:
15
+ return {"ra": "float", "dec": "float", self.original_id_key: "int"}
16
+
17
+ def build(self) -> str:
18
+ logger.debug(f"constructing positional query for {self.table.name}")
19
+
20
+ q = "SELECT \n\t"
21
+ for k in self.columns:
22
+ q += f"{self.table.name}.{k}, "
23
+ q += f"\n\tmine.{self.original_id_key} \n"
24
+ q += f"FROM\n\tTAP_UPLOAD.{self.upload_name} AS mine \n"
25
+ q += f"RIGHT JOIN\n\t{self.table.name} \n"
26
+ q += "WHERE \n"
27
+ q += (
28
+ f"\tCONTAINS(POINT('J2000',{self.table.name}.{self.table.ra_column},{self.table.name}.{self.table.dec_column}),"
29
+ f"CIRCLE('J2000',mine.ra,mine.dec,{self.radius_arcsec / 3600:.18f}))=1 "
30
+ )
31
+
32
+ if len(self.constraints) > 0:
33
+ q += " AND (\n"
34
+ for c in self.constraints:
35
+ q += f"\t{self.table.name}.{c} AND \n"
36
+ q = q.strip(" AND \n")
37
+ q += "\t)"
38
+
39
+ logger.debug(f"\n{q}")
40
+ return q
@@ -0,0 +1,10 @@
1
+ from pydantic import Field
2
+ from typing import Union, Annotated
3
+
4
+ from .allwise_p3as_mep import allwise_p3as_mep
5
+ from .neowiser_p1bs_psd import neowiser_p1bs_psd
6
+
7
+
8
+ TableType = Annotated[
9
+ Union[allwise_p3as_mep, neowiser_p1bs_psd], Field(discriminator="name")
10
+ ]
@@ -0,0 +1,22 @@
1
+ from typing import Literal, ClassVar, Type, Dict
2
+ from .base import TableConfig
3
+
4
+
5
+ class allwise_p3as_mep(TableConfig):
6
+ name: Literal["allwise_p3as_mep"] = "allwise_p3as_mep"
7
+ columns_dtypes: ClassVar[Dict[str, Type]] = {
8
+ "ra": float,
9
+ "dec": float,
10
+ "mjd": float,
11
+ "cntr_mf": str,
12
+ "w1mpro_ep": float,
13
+ "w1sigmpro_ep": float,
14
+ "w2mpro_ep": float,
15
+ "w2sigmpro_ep": float,
16
+ "w1flux_ep": float,
17
+ "w1sigflux_ep": float,
18
+ "w2flux_ep": float,
19
+ "w2sigflux_ep": float,
20
+ }
21
+ ra_column: ClassVar[str] = "ra"
22
+ dec_column: ClassVar[str] = "dec"
@@ -0,0 +1,9 @@
1
+ from typing import ClassVar, Dict, Type
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class TableConfig(BaseModel):
6
+ name: str
7
+ columns_dtypes: ClassVar[Dict[str, Type]]
8
+ ra_column: ClassVar[str]
9
+ dec_column: ClassVar[str]
@@ -0,0 +1,22 @@
1
+ from typing import Literal, ClassVar, Dict, Type
2
+ from .base import TableConfig
3
+
4
+
5
+ class neowiser_p1bs_psd(TableConfig):
6
+ name: Literal["neowiser_p1bs_psd"] = "neowiser_p1bs_psd"
7
+ columns_dtypes: ClassVar[Dict[str, Type]] = {
8
+ "ra": float,
9
+ "dec": float,
10
+ "mjd": float,
11
+ "allwise_cntr": str,
12
+ "w1mpro": float,
13
+ "w1sigmpro": float,
14
+ "w2mpro": float,
15
+ "w2sigmpro": float,
16
+ "w1flux": float,
17
+ "w1sigflux": float,
18
+ "w2flux": float,
19
+ "w2sigflux": float,
20
+ }
21
+ ra_column: ClassVar[str] = "ra"
22
+ dec_column: ClassVar[str] = "dec"
timewise/types.py ADDED
@@ -0,0 +1,30 @@
1
+ from typing import TypedDict, NamedTuple
2
+
3
+ from timewise.query import QueryType
4
+
5
+
6
+ class TAPJobMeta(TypedDict):
7
+ url: str
8
+ status: str
9
+ submitted: str
10
+ last_checked: str
11
+ input_length: int
12
+ query: str
13
+ query_config: QueryType | dict
14
+ completed_at: str
15
+
16
+
17
+ class TaskID(NamedTuple):
18
+ """
19
+ Generic identifier for a unit of work.
20
+ Can be extended by Downloader/Processor as needed.
21
+ """
22
+
23
+ namespace: str # e.g. "downloader", "processor"
24
+ key: str # unique string, e.g. "chunk_0001_q0" or "mask_2025-01-01"
25
+
26
+ def __str__(self):
27
+ return f"{self.namespace}_{self.key}"
28
+
29
+
30
+ TYPE_MAP = {"float": float, "str": str, "int": int}
@@ -0,0 +1,12 @@
1
+ import logging
2
+
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def backoff_hndlr(details):
8
+ logger.info(
9
+ "Backing off {wait:0.1f} seconds after {tries} tries "
10
+ "calling function {target} with args {args} and kwargs "
11
+ "{kwargs}".format(**details)
12
+ )
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from pathlib import Path
3
+
4
+
5
+ def get_n_rows(path: str | Path):
6
+ chunk = 1024 * 1024 # Process 1 MB at a time.
7
+ f = np.memmap(path)
8
+ num_newlines = sum(
9
+ np.sum(f[i : i + chunk] == ord("\n")) for i in range(0, len(f), chunk)
10
+ )
11
+ del f
12
+ return num_newlines
@@ -0,0 +1,70 @@
1
+ import sys
2
+ from queue import Queue
3
+ from threading import Thread, Event
4
+ from typing import Any, Optional, Callable, Mapping
5
+
6
+
7
+ class ErrorQueue(Queue):
8
+ """Queue subclass whose join() re-raises exceptions from worker threads."""
9
+
10
+ def __init__(self, stop_event: Event, *args, **kwargs):
11
+ super().__init__(*args, **kwargs)
12
+ self.error_queue: Queue = Queue()
13
+ self.stop_event = stop_event
14
+
15
+ def report_error(self, exc_info):
16
+ """Called by workers to push an exception into the error queue."""
17
+ self.error_queue.put(exc_info)
18
+ # Also decrement unfinished_tasks, so join() won't block forever
19
+ with self.all_tasks_done:
20
+ self.unfinished_tasks = max(0, self.unfinished_tasks - 1)
21
+ self.all_tasks_done.notify_all()
22
+
23
+ def join(self):
24
+ """Wait until all tasks are done, or raise if a worker failed."""
25
+ with self.all_tasks_done:
26
+ while self.unfinished_tasks:
27
+ if not self.error_queue.empty():
28
+ exc_info = self.error_queue.get()
29
+ self.stop_event.set()
30
+ raise exc_info[1].with_traceback(exc_info[2])
31
+ self.all_tasks_done.wait()
32
+
33
+ def raise_errors(self):
34
+ """
35
+ Raise the first worker exception, if any.
36
+ """
37
+ if not self.error_queue.empty():
38
+ exc_info = self.error_queue.get()
39
+ raise exc_info[1].with_traceback(exc_info[2])
40
+
41
+
42
+ class ExceptionSafeThread(Thread):
43
+ """Thread subclass that reports uncaught exceptions to the ErrorQueue."""
44
+
45
+ def __init__(
46
+ self,
47
+ error_queue: Any,
48
+ *,
49
+ group: Optional[Any] = None,
50
+ target: Optional[Callable[..., Any]] = None,
51
+ name: Optional[str] = None,
52
+ args: tuple = (),
53
+ kwargs: Optional[Mapping[str, Any]] = None,
54
+ daemon: Optional[bool] = None,
55
+ ):
56
+ super().__init__(
57
+ group=group,
58
+ target=target,
59
+ name=name,
60
+ args=args,
61
+ kwargs=kwargs,
62
+ daemon=daemon,
63
+ )
64
+ self.error_queue = error_queue
65
+
66
+ def run(self):
67
+ try:
68
+ super().run()
69
+ except Exception:
70
+ self.error_queue.report_error(sys.exc_info())
@@ -0,0 +1,33 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import numpy.typing as npt
4
+
5
+
6
+ def get_visit_map(lightcurve: pd.DataFrame) -> npt.NDArray[np.int64]:
7
+ """
8
+ Create a map datapoint to visit
9
+
10
+ :param lightcurve: the raw lightcurve
11
+ :type lightcurve: pd.DataFrame
12
+ :returns: visit map
13
+ :rtype: npt.ArrayLike
14
+ """
15
+ # ------------------------- find epoch intervals -------------------------- #
16
+ sorted_mjds = np.sort(lightcurve.mjd)
17
+ epoch_bounds_mask = (sorted_mjds[1:] - sorted_mjds[:-1]) > 100
18
+ epoch_bins = np.array(
19
+ [
20
+ lightcurve.mjd.min() * 0.99
21
+ ] # this makes sure that the first datapoint gets selected
22
+ + list(
23
+ ((sorted_mjds[1:] + sorted_mjds[:-1]) / 2)[epoch_bounds_mask]
24
+ ) # finding the middle between
25
+ +
26
+ # two visits
27
+ [
28
+ lightcurve.mjd.max() * 1.01
29
+ ] # this just makes sure that the last datapoint gets selected as well
30
+ )
31
+
32
+ visit_mask = np.digitize(lightcurve.mjd, epoch_bins) - 1
33
+ return visit_mask