timewise 0.5.4__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. timewise/__init__.py +1 -5
  2. timewise/backend/__init__.py +6 -0
  3. timewise/backend/base.py +36 -0
  4. timewise/backend/filesystem.py +80 -0
  5. timewise/chunking.py +50 -0
  6. timewise/cli.py +117 -11
  7. timewise/config.py +34 -0
  8. timewise/io/__init__.py +1 -0
  9. timewise/io/config.py +64 -0
  10. timewise/io/download.py +302 -0
  11. timewise/io/stable_tap.py +121 -0
  12. timewise/plot/__init__.py +3 -0
  13. timewise/plot/diagnostic.py +242 -0
  14. timewise/plot/lightcurve.py +112 -0
  15. timewise/plot/panstarrs.py +260 -0
  16. timewise/plot/sdss.py +109 -0
  17. timewise/process/__init__.py +2 -0
  18. timewise/process/config.py +34 -0
  19. timewise/process/interface.py +143 -0
  20. timewise/process/keys.py +10 -0
  21. timewise/process/stacking.py +322 -0
  22. timewise/process/template.yml +49 -0
  23. timewise/query/__init__.py +6 -0
  24. timewise/query/base.py +45 -0
  25. timewise/query/positional.py +40 -0
  26. timewise/tables/__init__.py +10 -0
  27. timewise/tables/allwise_p3as_mep.py +22 -0
  28. timewise/tables/base.py +9 -0
  29. timewise/tables/neowiser_p1bs_psd.py +22 -0
  30. timewise/types.py +30 -0
  31. timewise/util/backoff.py +12 -0
  32. timewise/util/csv_utils.py +12 -0
  33. timewise/util/error_threading.py +70 -0
  34. timewise/util/visits.py +33 -0
  35. timewise-1.0.0a2.dist-info/METADATA +205 -0
  36. timewise-1.0.0a2.dist-info/RECORD +39 -0
  37. timewise-1.0.0a2.dist-info/entry_points.txt +3 -0
  38. timewise/big_parent_sample.py +0 -106
  39. timewise/config_loader.py +0 -157
  40. timewise/general.py +0 -52
  41. timewise/parent_sample_base.py +0 -89
  42. timewise/point_source_utils.py +0 -68
  43. timewise/utils.py +0 -558
  44. timewise/wise_bigdata_desy_cluster.py +0 -1407
  45. timewise/wise_data_base.py +0 -2027
  46. timewise/wise_data_by_visit.py +0 -672
  47. timewise/wise_flux_conversion_correction.dat +0 -19
  48. timewise-0.5.4.dist-info/METADATA +0 -56
  49. timewise-0.5.4.dist-info/RECORD +0 -17
  50. timewise-0.5.4.dist-info/entry_points.txt +0 -3
  51. {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/WHEEL +0 -0
  52. {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,322 @@
1
+ import logging
2
+ from typing import cast, Dict, Any
3
+
4
+ from scipy import stats
5
+ import numpy as np
6
+ from numpy import typing as npt
7
+ import pandas as pd
8
+
9
+ from ..util.visits import get_visit_map
10
+ from timewise.process import keys
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ # zero points come from https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux
17
+ # published in Jarret et al. (2011): https://ui.adsabs.harvard.edu/abs/2011ApJ...735..112J/abstract
18
+ MAGNITUDE_ZEROPOINTS: Dict[str, float] = {"w1": 20.752, "w2": 19.596}
19
+ # in Jy
20
+ FLUX_ZEROPOINTS = {"w1": 309.54, "w2": 171.787}
21
+
22
+
23
+ def calculate_epochs(
24
+ f: pd.Series,
25
+ e: pd.Series,
26
+ visit_mask: npt.NDArray[np.int64],
27
+ counts: npt.NDArray[np.int64],
28
+ remove_outliers: bool,
29
+ outlier_threshold: float,
30
+ outlier_quantile: float,
31
+ outlier_mask: npt.NDArray[np.bool_] | None = None,
32
+ ) -> tuple[
33
+ npt.NDArray[np.float64],
34
+ npt.NDArray[np.float64],
35
+ npt.NDArray[np.bool_],
36
+ npt.NDArray[np.bool_],
37
+ npt.NDArray[np.bool_],
38
+ npt.NDArray[np.int64],
39
+ ]:
40
+ """
41
+ Calculates the visits within a raw lightcurve.
42
+
43
+ :param f: the fluxes
44
+ :type f: np.array
45
+ :param e: the flux errors
46
+ :type e: np.array
47
+ :param visit_mask: the visit mask
48
+ :type visit_mask: np.array
49
+ :param counts: the counts
50
+ :type counts: np.array
51
+ :param remove_outliers: whether to remove outliers
52
+ :type remove_outliers: bool
53
+ :param outlier_mask: the outlier mask
54
+ :type outlier_mask: np.array
55
+ :return: the epoch
56
+ :rtype: float
57
+ """
58
+
59
+ if len(f) == 0:
60
+ return (
61
+ np.array([]),
62
+ np.array([]),
63
+ np.array([]),
64
+ np.array([]),
65
+ np.array([]),
66
+ np.array([]),
67
+ )
68
+
69
+ u_lims = pd.isna(e)
70
+ nan_mask = pd.isna(f)
71
+
72
+ # --------------------- remove outliers in the bins ---------------------- #
73
+
74
+ # if we do not want to clean outliers just set the threshold to infinity
75
+ _outlier_threshold = np.inf if not remove_outliers else outlier_threshold
76
+
77
+ # set up empty masks
78
+ outlier_mask = cast(
79
+ npt.NDArray[np.bool_],
80
+ (np.array([False] * len(f)) if outlier_mask is None else outlier_mask),
81
+ )
82
+ median = np.full_like(counts, np.nan, dtype=float)
83
+ u = np.full_like(counts, np.nan, dtype=float)
84
+ use_mask = np.full_like(counts, False, dtype=bool)
85
+ n_points = counts
86
+
87
+ # set up dummy values for number of remaining outliers
88
+ n_remaining_outlier = np.inf
89
+
90
+ # --------------------- flag upper limits ---------------------- #
91
+ bin_n_ulims: npt.NDArray[np.int64] = np.bincount(
92
+ visit_mask, weights=u_lims, minlength=len(counts)
93
+ )
94
+ bin_ulim_bool = cast(npt.NDArray[np.bool_], (counts - bin_n_ulims) == 0)
95
+ use_mask_ul = ~u_lims | (u_lims & bin_ulim_bool[visit_mask])
96
+
97
+ n_loops = 0
98
+
99
+ # recalculate uncertainty and median as long as no outliers left
100
+ while n_remaining_outlier > 0:
101
+ # make a mask of values to use
102
+ use_mask = ~outlier_mask & use_mask_ul & ~nan_mask # type: ignore[operator]
103
+ n_points = np.bincount(visit_mask, weights=use_mask)
104
+ zero_points_mask = cast(npt.NDArray[np.bool_], n_points == 0)
105
+
106
+ # ------------------------- calculate median ------------------------- #
107
+ median = np.zeros_like(counts, dtype=float)
108
+ visits_at_least_one_point = np.unique(visit_mask[~zero_points_mask[visit_mask]])
109
+ visits_zero_points = np.unique(visit_mask[zero_points_mask[visit_mask]])
110
+ median[visits_at_least_one_point] = np.array(
111
+ [
112
+ np.median(f[(visit_mask == i) & use_mask])
113
+ for i in visits_at_least_one_point
114
+ ]
115
+ )
116
+ median[visits_zero_points] = np.nan
117
+
118
+ # median is NaN for visits with 0 detections, (i.e. detections in one band and not the other)
119
+ # if median is NaN for other visits raise Error
120
+ if np.any(np.isnan(median[n_points > 0])):
121
+ nan_indices = np.where(np.isnan(median))[0]
122
+ msg = ""
123
+ for inan_index in nan_indices:
124
+ nanf = f[visit_mask == inan_index]
125
+ msg += f"median is nan for {inan_index}th bin\n{nanf}\n\n"
126
+ raise ValueError(msg)
127
+
128
+ # --------------------- calculate uncertainty ---------------------- #
129
+ mean_deviation = np.bincount(
130
+ visit_mask[use_mask],
131
+ weights=(f[use_mask] - median[visit_mask[use_mask]]) ** 2,
132
+ minlength=len(counts),
133
+ )
134
+ one_points_mask = n_points <= 1
135
+ # calculate standard deviation
136
+ std = np.zeros_like(counts, dtype=float)
137
+ std[~one_points_mask] = (
138
+ np.sqrt(mean_deviation[~one_points_mask])
139
+ / (n_points[~one_points_mask] - 1)
140
+ * stats.t.interval(0.68, df=n_points[~one_points_mask] - 1)[1]
141
+ # for visits with small number of detections we have to correct according to the t distribution
142
+ )
143
+ std[one_points_mask] = -np.inf
144
+
145
+ # calculate the propagated errors of the single exposure measurements
146
+ single_exp_measurement_errors = np.sqrt(
147
+ np.bincount(
148
+ visit_mask[use_mask],
149
+ weights=e[use_mask] ** 2,
150
+ minlength=len(counts),
151
+ )
152
+ )
153
+ e_meas = np.zeros_like(std, dtype=float)
154
+ e_meas[~zero_points_mask] = (
155
+ single_exp_measurement_errors[n_points > 0] / n_points[n_points > 0]
156
+ )
157
+ e_meas[zero_points_mask] = np.nan
158
+ # take the maximum value of the measured single exposure errors and the standard deviation
159
+ u = np.maximum(std, e_meas)
160
+
161
+ # Estimate the spread of the flux.
162
+ # To be robust against outliers, do that with quantiles instead of std
163
+ qs = np.zeros_like(counts, dtype=float)
164
+ qs[one_points_mask] = 1e-10
165
+ visits_at_least_two_point = np.unique(visit_mask[~one_points_mask[visit_mask]])
166
+ qs[visits_at_least_two_point] = np.array(
167
+ [
168
+ np.quantile(
169
+ abs(f[(visit_mask == i) & use_mask] - median[i]),
170
+ outlier_quantile,
171
+ method="interpolated_inverted_cdf",
172
+ )
173
+ for i in visits_at_least_two_point
174
+ ]
175
+ )
176
+
177
+ # --------------------- remove outliers in the bins ---------------------- #
178
+ remaining_outliers = (
179
+ abs(median[visit_mask] - f) > _outlier_threshold * qs[visit_mask]
180
+ ) & ~outlier_mask
181
+ outlier_mask |= remaining_outliers
182
+ n_remaining_outlier = sum(remaining_outliers) if remove_outliers else 0
183
+ # setting remaining_outliers to 0 will exit the while loop
184
+
185
+ n_loops += 1
186
+
187
+ if n_loops > 20:
188
+ raise Exception(f"{n_loops}!")
189
+
190
+ return median, u, bin_ulim_bool, outlier_mask, use_mask, n_points
191
+
192
+
193
+ def stack_visits(
194
+ lightcurve: pd.DataFrame,
195
+ outlier_threshold: float,
196
+ outlier_quantile: float,
197
+ clean_outliers: bool = True,
198
+ ):
199
+ """
200
+ Combine the data by visits of the satellite of one region in the sky.
201
+ The visits typically consist of some tens of observations. The individual visits are separated by about
202
+ six months.
203
+ The mean flux for one visit is calculated by the weighted mean of the data.
204
+ The error on that mean is calculated by the root-mean-squared and corrected by the t-value.
205
+ Outliers per visit are identified if they are more than 100 times the rms away from the mean. These outliers
206
+ are removed from the calculation of the mean and the error if self.clean_outliers_when_stacking is True.
207
+
208
+ :param lightcurve: the raw lightcurve
209
+ :type lightcurve: pandas.DataFrame
210
+ :return: the stacked lightcurve
211
+ :rtype: pandas.DataFrame
212
+ """
213
+
214
+ # ------------------------- create visit mask -------------------------- #
215
+ visit_map = get_visit_map(lightcurve.mjd)
216
+ counts = np.bincount(visit_map)
217
+
218
+ stacked_data: Dict[str, Any] = dict()
219
+
220
+ # ------------------------- calculate mean mjd -------------------------- #
221
+ stacked_data["mean_mjd"] = np.bincount(visit_map, weights=lightcurve.mjd) / counts
222
+
223
+ # ------------------------- loop through bands -------------------------- #
224
+ for b in ["w1", "w2"]:
225
+ # loop through magnitude and flux and save the respective datapoints
226
+
227
+ outlier_masks: Dict[str, Any] = dict()
228
+ use_masks = dict()
229
+ bin_ulim_bools = dict()
230
+
231
+ for lum_ext in [keys.FLUX_EXT, keys.MAG_EXT]:
232
+ f = lightcurve[f"{b}{lum_ext}"]
233
+ e = lightcurve[f"{b}{keys.ERROR_EXT}{lum_ext}"]
234
+
235
+ # we will flag outliers based on the flux only
236
+ remove_outliers = lum_ext == keys.FLUX_EXT and clean_outliers
237
+ outlier_mask = outlier_masks.get(keys.FLUX_EXT, None)
238
+
239
+ mean, u, bin_ulim_bool, outlier_mask, use_mask, n_points = calculate_epochs(
240
+ f,
241
+ e,
242
+ visit_map,
243
+ counts,
244
+ remove_outliers=remove_outliers,
245
+ outlier_mask=outlier_mask,
246
+ outlier_quantile=outlier_quantile,
247
+ outlier_threshold=outlier_threshold,
248
+ )
249
+ n_outliers = np.sum(outlier_mask)
250
+
251
+ if n_outliers > 0:
252
+ logger.debug(
253
+ f"removed {n_outliers} outliers by brightness for {b} {lum_ext}"
254
+ )
255
+
256
+ stacked_data[f"{b}{keys.MEAN}{lum_ext}"] = mean
257
+ stacked_data[f"{b}{lum_ext}{keys.RMS}"] = u
258
+ stacked_data[f"{b}{lum_ext}{keys.UPPER_LIMIT}"] = bin_ulim_bool
259
+ stacked_data[f"{b}{lum_ext}{keys.NPOINTS}"] = n_points
260
+
261
+ outlier_masks[lum_ext] = outlier_mask
262
+ use_masks[lum_ext] = use_mask
263
+ bin_ulim_bools[lum_ext] = bin_ulim_bool
264
+
265
+ # ------- calculate the zeropoints per exposure ------- #
266
+ # this might look wrong since we use the flux mask on the magnitudes but it s right
267
+ # for each flux measurement we need the corresponding magnitude to get the zeropoint
268
+ mags = lightcurve[f"{b}{keys.MAG_EXT}"]
269
+ inst_fluxes = lightcurve[f"{b}{keys.FLUX_EXT}"]
270
+ pos_m = inst_fluxes > 0 # select only positive fluxes, i.e. detections
271
+ zp_mask = pos_m & use_masks[keys.FLUX_EXT]
272
+
273
+ # calculate zero points
274
+ zps = np.zeros_like(inst_fluxes)
275
+ zps[zp_mask] = mags[zp_mask] + 2.5 * np.log10(inst_fluxes[zp_mask])
276
+ # find visits with no zeropoints
277
+ n_valid_zps = np.bincount(visit_map, weights=zp_mask)
278
+ at_least_one_valid_zp = n_valid_zps > 0
279
+ # calculate the median zeropoint for each visit
280
+ zps_median = np.zeros_like(n_valid_zps, dtype=float)
281
+ zps_median[n_valid_zps > 0] = np.array(
282
+ [
283
+ np.median(zps[(visit_map == i) & zp_mask])
284
+ for i in np.unique(visit_map[at_least_one_valid_zp[visit_map]])
285
+ ]
286
+ )
287
+ # if there are only non-detections then fall back to default zeropoint
288
+ zps_median[n_valid_zps == 0] = MAGNITUDE_ZEROPOINTS[b]
289
+ # if the visit only has upper limits then use the fall-back zeropoint
290
+ zps_median[bin_ulim_bools[keys.FLUX_EXT]] = MAGNITUDE_ZEROPOINTS[b]
291
+
292
+ # --------------- calculate flux density from instrument flux ---------------- #
293
+ # get the instrument flux [digital numbers], i.e. source count
294
+ inst_fluxes_e = lightcurve[f"{b}{keys.ERROR_EXT}{keys.FLUX_EXT}"]
295
+
296
+ # calculate the proportionality constant between flux density and source count
297
+ mag_zp = FLUX_ZEROPOINTS[b] * 1e3 # in mJy
298
+ flux_dens_const = mag_zp * 10 ** (-zps_median / 2.5)
299
+
300
+ # calculate flux densities from instrument counts
301
+ flux_densities = inst_fluxes * flux_dens_const[visit_map]
302
+ flux_densities_e = inst_fluxes_e * flux_dens_const[visit_map]
303
+
304
+ # bin flux densities
305
+ mean_fd, u_fd, ul_fd, outlier_mask_fd, use_mask_fd, n_points_fd = (
306
+ calculate_epochs(
307
+ flux_densities,
308
+ flux_densities_e,
309
+ visit_map,
310
+ counts,
311
+ remove_outliers=False,
312
+ outlier_mask=outlier_masks[keys.FLUX_EXT],
313
+ outlier_threshold=outlier_threshold,
314
+ outlier_quantile=outlier_quantile,
315
+ )
316
+ )
317
+ stacked_data[f"{b}{keys.MEAN}{keys.FLUX_DENSITY_EXT}"] = mean_fd
318
+ stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.RMS}"] = u_fd
319
+ stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.UPPER_LIMIT}"] = ul_fd
320
+ stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.NPOINTS}"] = n_points_fd
321
+
322
+ return pd.DataFrame(stacked_data)
@@ -0,0 +1,49 @@
1
+ channel:
2
+ - access:
3
+ - ZTF_PUB
4
+ name: wise
5
+ policy: []
6
+ version: 0
7
+ mongo:
8
+ prefix: MONGODB_NAME
9
+ reset: true
10
+ name: timewise
11
+ task:
12
+ - config:
13
+ compiler_opts: TiCompilerOptions
14
+ directives:
15
+ - channel: wise
16
+ ingest:
17
+ mux:
18
+ combine:
19
+ - state_t2:
20
+ - unit: T2StackVisits
21
+ unit: T1HDBSCAN
22
+ config:
23
+ original_id_key: ORIGINAL_ID_KEY
24
+ input_mongo_db_name: INPUT_MONGODB_NAME
25
+ plot: true
26
+ unit: TiMongoMuxer
27
+ iter_max: 1000000
28
+ shaper: TiDataPointShaper
29
+ supplier:
30
+ config:
31
+ dpid: hash
32
+ loader:
33
+ config:
34
+ timewise_config_file: TIMEWISE_CONFIG_PATH
35
+ stock_id_column_name: ORIGINAL_ID_KEY
36
+ unit: TimewiseFileLoader
37
+ unit: TimewiseAlertSupplier
38
+ multiplier: 1
39
+ title: t0
40
+ template:
41
+ live:
42
+ - resolve_run_time_aliases
43
+ - hash_t2_config
44
+ unit: AlertConsumer
45
+ - config:
46
+ log_profile: default
47
+ multiplier: 1
48
+ title: t2
49
+ unit: T2Worker
@@ -0,0 +1,6 @@
1
+ from pydantic import Field
2
+ from typing import Union, Annotated, TypeAlias
3
+ from .positional import PositionalQuery
4
+
5
+ # Discriminated union of all query types
6
+ QueryType: TypeAlias = Annotated[Union[PositionalQuery], Field(discriminator="type")]
timewise/query/base.py ADDED
@@ -0,0 +1,45 @@
1
+ import abc
2
+ from typing import ClassVar, List
3
+ from pydantic import BaseModel
4
+ from hashlib import sha256
5
+
6
+ from ..tables import TableType
7
+
8
+
9
+ class Query(abc.ABC, BaseModel):
10
+ type: str
11
+ upload_name: ClassVar[str] = "mine"
12
+
13
+ original_id_key: str = "orig_id"
14
+ constraints: List[str] = [
15
+ "nb < 2",
16
+ "na < 1",
17
+ "cc_flags like '00%'",
18
+ "qi_fact >= 1",
19
+ "saa_sep >= 5",
20
+ "moon_masked like '00%'",
21
+ ]
22
+ columns: List[str]
23
+ table: TableType
24
+
25
+ @property
26
+ @abc.abstractmethod
27
+ def input_columns(self) -> dict[str, str]: ...
28
+
29
+ @abc.abstractmethod
30
+ def build(self) -> str: ...
31
+
32
+ @property
33
+ def adql(self) -> str:
34
+ """ADQL string computed once per instance."""
35
+ return self.build()
36
+
37
+ @property
38
+ def hash(self) -> str:
39
+ return (
40
+ self.type
41
+ + "_"
42
+ + self.table.name
43
+ + "_"
44
+ + sha256(self.adql.encode()).hexdigest()
45
+ )
@@ -0,0 +1,40 @@
1
+ import logging
2
+ from typing import Literal, Dict
3
+
4
+ from .base import Query
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class PositionalQuery(Query):
10
+ type: Literal["positional"] = "positional"
11
+ radius_arcsec: float
12
+
13
+ @property
14
+ def input_columns(self) -> Dict[str, str]:
15
+ return {"ra": "float", "dec": "float", self.original_id_key: "int"}
16
+
17
+ def build(self) -> str:
18
+ logger.debug(f"constructing positional query for {self.table.name}")
19
+
20
+ q = "SELECT \n\t"
21
+ for k in self.columns:
22
+ q += f"{self.table.name}.{k}, "
23
+ q += f"\n\tmine.{self.original_id_key} \n"
24
+ q += f"FROM\n\tTAP_UPLOAD.{self.upload_name} AS mine \n"
25
+ q += f"RIGHT JOIN\n\t{self.table.name} \n"
26
+ q += "WHERE \n"
27
+ q += (
28
+ f"\tCONTAINS(POINT('J2000',{self.table.name}.{self.table.ra_column},{self.table.name}.{self.table.dec_column}),"
29
+ f"CIRCLE('J2000',mine.ra,mine.dec,{self.radius_arcsec / 3600:.18f}))=1 "
30
+ )
31
+
32
+ if len(self.constraints) > 0:
33
+ q += " AND (\n"
34
+ for c in self.constraints:
35
+ q += f"\t{self.table.name}.{c} AND \n"
36
+ q = q.strip(" AND \n")
37
+ q += "\t)"
38
+
39
+ logger.debug(f"\n{q}")
40
+ return q
@@ -0,0 +1,10 @@
1
+ from pydantic import Field
2
+ from typing import Union, Annotated
3
+
4
+ from .allwise_p3as_mep import allwise_p3as_mep
5
+ from .neowiser_p1bs_psd import neowiser_p1bs_psd
6
+
7
+
8
+ TableType = Annotated[
9
+ Union[allwise_p3as_mep, neowiser_p1bs_psd], Field(discriminator="name")
10
+ ]
@@ -0,0 +1,22 @@
1
+ from typing import Literal, ClassVar, Type, Dict
2
+ from .base import TableConfig
3
+
4
+
5
+ class allwise_p3as_mep(TableConfig):
6
+ name: Literal["allwise_p3as_mep"] = "allwise_p3as_mep"
7
+ columns_dtypes: ClassVar[Dict[str, Type]] = {
8
+ "ra": float,
9
+ "dec": float,
10
+ "mjd": float,
11
+ "cntr_mf": str,
12
+ "w1mpro_ep": float,
13
+ "w1sigmpro_ep": float,
14
+ "w2mpro_ep": float,
15
+ "w2sigmpro_ep": float,
16
+ "w1flux_ep": float,
17
+ "w1sigflux_ep": float,
18
+ "w2flux_ep": float,
19
+ "w2sigflux_ep": float,
20
+ }
21
+ ra_column: ClassVar[str] = "ra"
22
+ dec_column: ClassVar[str] = "dec"
@@ -0,0 +1,9 @@
1
+ from typing import ClassVar, Dict, Type
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class TableConfig(BaseModel):
6
+ name: str
7
+ columns_dtypes: ClassVar[Dict[str, Type]]
8
+ ra_column: ClassVar[str]
9
+ dec_column: ClassVar[str]
@@ -0,0 +1,22 @@
1
+ from typing import Literal, ClassVar, Dict, Type
2
+ from .base import TableConfig
3
+
4
+
5
+ class neowiser_p1bs_psd(TableConfig):
6
+ name: Literal["neowiser_p1bs_psd"] = "neowiser_p1bs_psd"
7
+ columns_dtypes: ClassVar[Dict[str, Type]] = {
8
+ "ra": float,
9
+ "dec": float,
10
+ "mjd": float,
11
+ "allwise_cntr": str,
12
+ "w1mpro": float,
13
+ "w1sigmpro": float,
14
+ "w2mpro": float,
15
+ "w2sigmpro": float,
16
+ "w1flux": float,
17
+ "w1sigflux": float,
18
+ "w2flux": float,
19
+ "w2sigflux": float,
20
+ }
21
+ ra_column: ClassVar[str] = "ra"
22
+ dec_column: ClassVar[str] = "dec"
timewise/types.py ADDED
@@ -0,0 +1,30 @@
1
+ from typing import TypedDict, NamedTuple
2
+
3
+ from timewise.query import QueryType
4
+
5
+
6
+ class TAPJobMeta(TypedDict):
7
+ url: str
8
+ status: str
9
+ submitted: str
10
+ last_checked: str
11
+ input_length: int
12
+ query: str
13
+ query_config: QueryType | dict
14
+ completed_at: str
15
+
16
+
17
+ class TaskID(NamedTuple):
18
+ """
19
+ Generic identifier for a unit of work.
20
+ Can be extended by Downloader/Processor as needed.
21
+ """
22
+
23
+ namespace: str # e.g. "downloader", "processor"
24
+ key: str # unique string, e.g. "chunk_0001_q0" or "mask_2025-01-01"
25
+
26
+ def __str__(self):
27
+ return f"{self.namespace}_{self.key}"
28
+
29
+
30
+ TYPE_MAP = {"float": float, "str": str, "int": int}
@@ -0,0 +1,12 @@
1
+ import logging
2
+
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def backoff_hndlr(details):
8
+ logger.info(
9
+ "Backing off {wait:0.1f} seconds after {tries} tries "
10
+ "calling function {target} with args {args} and kwargs "
11
+ "{kwargs}".format(**details)
12
+ )
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from pathlib import Path
3
+
4
+
5
+ def get_n_rows(path: str | Path):
6
+ chunk = 1024 * 1024 # Process 1 MB at a time.
7
+ f = np.memmap(path)
8
+ num_newlines = sum(
9
+ np.sum(f[i : i + chunk] == ord("\n")) for i in range(0, len(f), chunk)
10
+ )
11
+ del f
12
+ return num_newlines
@@ -0,0 +1,70 @@
1
+ import sys
2
+ from queue import Queue
3
+ from threading import Thread, Event
4
+ from typing import Any, Optional, Callable, Mapping
5
+
6
+
7
+ class ErrorQueue(Queue):
8
+ """Queue subclass whose join() re-raises exceptions from worker threads."""
9
+
10
+ def __init__(self, stop_event: Event, *args, **kwargs):
11
+ super().__init__(*args, **kwargs)
12
+ self.error_queue: Queue = Queue()
13
+ self.stop_event = stop_event
14
+
15
+ def report_error(self, exc_info):
16
+ """Called by workers to push an exception into the error queue."""
17
+ self.error_queue.put(exc_info)
18
+ # Also decrement unfinished_tasks, so join() won't block forever
19
+ with self.all_tasks_done:
20
+ self.unfinished_tasks = max(0, self.unfinished_tasks - 1)
21
+ self.all_tasks_done.notify_all()
22
+
23
+ def join(self):
24
+ """Wait until all tasks are done, or raise if a worker failed."""
25
+ with self.all_tasks_done:
26
+ while self.unfinished_tasks:
27
+ if not self.error_queue.empty():
28
+ exc_info = self.error_queue.get()
29
+ self.stop_event.set()
30
+ raise exc_info[1].with_traceback(exc_info[2])
31
+ self.all_tasks_done.wait()
32
+
33
+ def raise_errors(self):
34
+ """
35
+ Raise the first worker exception, if any.
36
+ """
37
+ if not self.error_queue.empty():
38
+ exc_info = self.error_queue.get()
39
+ raise exc_info[1].with_traceback(exc_info[2])
40
+
41
+
42
+ class ExceptionSafeThread(Thread):
43
+ """Thread subclass that reports uncaught exceptions to the ErrorQueue."""
44
+
45
+ def __init__(
46
+ self,
47
+ error_queue: Any,
48
+ *,
49
+ group: Optional[Any] = None,
50
+ target: Optional[Callable[..., Any]] = None,
51
+ name: Optional[str] = None,
52
+ args: tuple = (),
53
+ kwargs: Optional[Mapping[str, Any]] = None,
54
+ daemon: Optional[bool] = None,
55
+ ):
56
+ super().__init__(
57
+ group=group,
58
+ target=target,
59
+ name=name,
60
+ args=args,
61
+ kwargs=kwargs,
62
+ daemon=daemon,
63
+ )
64
+ self.error_queue = error_queue
65
+
66
+ def run(self):
67
+ try:
68
+ super().run()
69
+ except Exception:
70
+ self.error_queue.report_error(sys.exc_info())