timewise 0.5.4__py3-none-any.whl → 1.0.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- timewise/__init__.py +1 -5
- timewise/backend/__init__.py +6 -0
- timewise/backend/base.py +36 -0
- timewise/backend/filesystem.py +80 -0
- timewise/chunking.py +50 -0
- timewise/cli.py +117 -11
- timewise/config.py +34 -0
- timewise/io/__init__.py +1 -0
- timewise/io/config.py +64 -0
- timewise/io/download.py +302 -0
- timewise/io/stable_tap.py +121 -0
- timewise/plot/__init__.py +3 -0
- timewise/plot/diagnostic.py +242 -0
- timewise/plot/lightcurve.py +112 -0
- timewise/plot/panstarrs.py +260 -0
- timewise/plot/sdss.py +109 -0
- timewise/process/__init__.py +2 -0
- timewise/process/config.py +34 -0
- timewise/process/interface.py +143 -0
- timewise/process/keys.py +10 -0
- timewise/process/stacking.py +322 -0
- timewise/process/template.yml +49 -0
- timewise/query/__init__.py +6 -0
- timewise/query/base.py +45 -0
- timewise/query/positional.py +40 -0
- timewise/tables/__init__.py +10 -0
- timewise/tables/allwise_p3as_mep.py +22 -0
- timewise/tables/base.py +9 -0
- timewise/tables/neowiser_p1bs_psd.py +22 -0
- timewise/types.py +30 -0
- timewise/util/backoff.py +12 -0
- timewise/util/csv_utils.py +12 -0
- timewise/util/error_threading.py +70 -0
- timewise/util/visits.py +33 -0
- timewise-1.0.0a2.dist-info/METADATA +205 -0
- timewise-1.0.0a2.dist-info/RECORD +39 -0
- timewise-1.0.0a2.dist-info/entry_points.txt +3 -0
- timewise/big_parent_sample.py +0 -106
- timewise/config_loader.py +0 -157
- timewise/general.py +0 -52
- timewise/parent_sample_base.py +0 -89
- timewise/point_source_utils.py +0 -68
- timewise/utils.py +0 -558
- timewise/wise_bigdata_desy_cluster.py +0 -1407
- timewise/wise_data_base.py +0 -2027
- timewise/wise_data_by_visit.py +0 -672
- timewise/wise_flux_conversion_correction.dat +0 -19
- timewise-0.5.4.dist-info/METADATA +0 -56
- timewise-0.5.4.dist-info/RECORD +0 -17
- timewise-0.5.4.dist-info/entry_points.txt +0 -3
- {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/WHEEL +0 -0
- {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import cast, Dict, Any
|
|
3
|
+
|
|
4
|
+
from scipy import stats
|
|
5
|
+
import numpy as np
|
|
6
|
+
from numpy import typing as npt
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from ..util.visits import get_visit_map
|
|
10
|
+
from timewise.process import keys
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# zero points come from https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux
|
|
17
|
+
# published in Jarret et al. (2011): https://ui.adsabs.harvard.edu/abs/2011ApJ...735..112J/abstract
|
|
18
|
+
MAGNITUDE_ZEROPOINTS: Dict[str, float] = {"w1": 20.752, "w2": 19.596}
|
|
19
|
+
# in Jy
|
|
20
|
+
FLUX_ZEROPOINTS = {"w1": 309.54, "w2": 171.787}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def calculate_epochs(
|
|
24
|
+
f: pd.Series,
|
|
25
|
+
e: pd.Series,
|
|
26
|
+
visit_mask: npt.NDArray[np.int64],
|
|
27
|
+
counts: npt.NDArray[np.int64],
|
|
28
|
+
remove_outliers: bool,
|
|
29
|
+
outlier_threshold: float,
|
|
30
|
+
outlier_quantile: float,
|
|
31
|
+
outlier_mask: npt.NDArray[np.bool_] | None = None,
|
|
32
|
+
) -> tuple[
|
|
33
|
+
npt.NDArray[np.float64],
|
|
34
|
+
npt.NDArray[np.float64],
|
|
35
|
+
npt.NDArray[np.bool_],
|
|
36
|
+
npt.NDArray[np.bool_],
|
|
37
|
+
npt.NDArray[np.bool_],
|
|
38
|
+
npt.NDArray[np.int64],
|
|
39
|
+
]:
|
|
40
|
+
"""
|
|
41
|
+
Calculates the visits within a raw lightcurve.
|
|
42
|
+
|
|
43
|
+
:param f: the fluxes
|
|
44
|
+
:type f: np.array
|
|
45
|
+
:param e: the flux errors
|
|
46
|
+
:type e: np.array
|
|
47
|
+
:param visit_mask: the visit mask
|
|
48
|
+
:type visit_mask: np.array
|
|
49
|
+
:param counts: the counts
|
|
50
|
+
:type counts: np.array
|
|
51
|
+
:param remove_outliers: whether to remove outliers
|
|
52
|
+
:type remove_outliers: bool
|
|
53
|
+
:param outlier_mask: the outlier mask
|
|
54
|
+
:type outlier_mask: np.array
|
|
55
|
+
:return: the epoch
|
|
56
|
+
:rtype: float
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
if len(f) == 0:
|
|
60
|
+
return (
|
|
61
|
+
np.array([]),
|
|
62
|
+
np.array([]),
|
|
63
|
+
np.array([]),
|
|
64
|
+
np.array([]),
|
|
65
|
+
np.array([]),
|
|
66
|
+
np.array([]),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
u_lims = pd.isna(e)
|
|
70
|
+
nan_mask = pd.isna(f)
|
|
71
|
+
|
|
72
|
+
# --------------------- remove outliers in the bins ---------------------- #
|
|
73
|
+
|
|
74
|
+
# if we do not want to clean outliers just set the threshold to infinity
|
|
75
|
+
_outlier_threshold = np.inf if not remove_outliers else outlier_threshold
|
|
76
|
+
|
|
77
|
+
# set up empty masks
|
|
78
|
+
outlier_mask = cast(
|
|
79
|
+
npt.NDArray[np.bool_],
|
|
80
|
+
(np.array([False] * len(f)) if outlier_mask is None else outlier_mask),
|
|
81
|
+
)
|
|
82
|
+
median = np.full_like(counts, np.nan, dtype=float)
|
|
83
|
+
u = np.full_like(counts, np.nan, dtype=float)
|
|
84
|
+
use_mask = np.full_like(counts, False, dtype=bool)
|
|
85
|
+
n_points = counts
|
|
86
|
+
|
|
87
|
+
# set up dummy values for number of remaining outliers
|
|
88
|
+
n_remaining_outlier = np.inf
|
|
89
|
+
|
|
90
|
+
# --------------------- flag upper limits ---------------------- #
|
|
91
|
+
bin_n_ulims: npt.NDArray[np.int64] = np.bincount(
|
|
92
|
+
visit_mask, weights=u_lims, minlength=len(counts)
|
|
93
|
+
)
|
|
94
|
+
bin_ulim_bool = cast(npt.NDArray[np.bool_], (counts - bin_n_ulims) == 0)
|
|
95
|
+
use_mask_ul = ~u_lims | (u_lims & bin_ulim_bool[visit_mask])
|
|
96
|
+
|
|
97
|
+
n_loops = 0
|
|
98
|
+
|
|
99
|
+
# recalculate uncertainty and median as long as no outliers left
|
|
100
|
+
while n_remaining_outlier > 0:
|
|
101
|
+
# make a mask of values to use
|
|
102
|
+
use_mask = ~outlier_mask & use_mask_ul & ~nan_mask # type: ignore[operator]
|
|
103
|
+
n_points = np.bincount(visit_mask, weights=use_mask)
|
|
104
|
+
zero_points_mask = cast(npt.NDArray[np.bool_], n_points == 0)
|
|
105
|
+
|
|
106
|
+
# ------------------------- calculate median ------------------------- #
|
|
107
|
+
median = np.zeros_like(counts, dtype=float)
|
|
108
|
+
visits_at_least_one_point = np.unique(visit_mask[~zero_points_mask[visit_mask]])
|
|
109
|
+
visits_zero_points = np.unique(visit_mask[zero_points_mask[visit_mask]])
|
|
110
|
+
median[visits_at_least_one_point] = np.array(
|
|
111
|
+
[
|
|
112
|
+
np.median(f[(visit_mask == i) & use_mask])
|
|
113
|
+
for i in visits_at_least_one_point
|
|
114
|
+
]
|
|
115
|
+
)
|
|
116
|
+
median[visits_zero_points] = np.nan
|
|
117
|
+
|
|
118
|
+
# median is NaN for visits with 0 detections, (i.e. detections in one band and not the other)
|
|
119
|
+
# if median is NaN for other visits raise Error
|
|
120
|
+
if np.any(np.isnan(median[n_points > 0])):
|
|
121
|
+
nan_indices = np.where(np.isnan(median))[0]
|
|
122
|
+
msg = ""
|
|
123
|
+
for inan_index in nan_indices:
|
|
124
|
+
nanf = f[visit_mask == inan_index]
|
|
125
|
+
msg += f"median is nan for {inan_index}th bin\n{nanf}\n\n"
|
|
126
|
+
raise ValueError(msg)
|
|
127
|
+
|
|
128
|
+
# --------------------- calculate uncertainty ---------------------- #
|
|
129
|
+
mean_deviation = np.bincount(
|
|
130
|
+
visit_mask[use_mask],
|
|
131
|
+
weights=(f[use_mask] - median[visit_mask[use_mask]]) ** 2,
|
|
132
|
+
minlength=len(counts),
|
|
133
|
+
)
|
|
134
|
+
one_points_mask = n_points <= 1
|
|
135
|
+
# calculate standard deviation
|
|
136
|
+
std = np.zeros_like(counts, dtype=float)
|
|
137
|
+
std[~one_points_mask] = (
|
|
138
|
+
np.sqrt(mean_deviation[~one_points_mask])
|
|
139
|
+
/ (n_points[~one_points_mask] - 1)
|
|
140
|
+
* stats.t.interval(0.68, df=n_points[~one_points_mask] - 1)[1]
|
|
141
|
+
# for visits with small number of detections we have to correct according to the t distribution
|
|
142
|
+
)
|
|
143
|
+
std[one_points_mask] = -np.inf
|
|
144
|
+
|
|
145
|
+
# calculate the propagated errors of the single exposure measurements
|
|
146
|
+
single_exp_measurement_errors = np.sqrt(
|
|
147
|
+
np.bincount(
|
|
148
|
+
visit_mask[use_mask],
|
|
149
|
+
weights=e[use_mask] ** 2,
|
|
150
|
+
minlength=len(counts),
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
e_meas = np.zeros_like(std, dtype=float)
|
|
154
|
+
e_meas[~zero_points_mask] = (
|
|
155
|
+
single_exp_measurement_errors[n_points > 0] / n_points[n_points > 0]
|
|
156
|
+
)
|
|
157
|
+
e_meas[zero_points_mask] = np.nan
|
|
158
|
+
# take the maximum value of the measured single exposure errors and the standard deviation
|
|
159
|
+
u = np.maximum(std, e_meas)
|
|
160
|
+
|
|
161
|
+
# Estimate the spread of the flux.
|
|
162
|
+
# To be robust against outliers, do that with quantiles instead of std
|
|
163
|
+
qs = np.zeros_like(counts, dtype=float)
|
|
164
|
+
qs[one_points_mask] = 1e-10
|
|
165
|
+
visits_at_least_two_point = np.unique(visit_mask[~one_points_mask[visit_mask]])
|
|
166
|
+
qs[visits_at_least_two_point] = np.array(
|
|
167
|
+
[
|
|
168
|
+
np.quantile(
|
|
169
|
+
abs(f[(visit_mask == i) & use_mask] - median[i]),
|
|
170
|
+
outlier_quantile,
|
|
171
|
+
method="interpolated_inverted_cdf",
|
|
172
|
+
)
|
|
173
|
+
for i in visits_at_least_two_point
|
|
174
|
+
]
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# --------------------- remove outliers in the bins ---------------------- #
|
|
178
|
+
remaining_outliers = (
|
|
179
|
+
abs(median[visit_mask] - f) > _outlier_threshold * qs[visit_mask]
|
|
180
|
+
) & ~outlier_mask
|
|
181
|
+
outlier_mask |= remaining_outliers
|
|
182
|
+
n_remaining_outlier = sum(remaining_outliers) if remove_outliers else 0
|
|
183
|
+
# setting remaining_outliers to 0 will exit the while loop
|
|
184
|
+
|
|
185
|
+
n_loops += 1
|
|
186
|
+
|
|
187
|
+
if n_loops > 20:
|
|
188
|
+
raise Exception(f"{n_loops}!")
|
|
189
|
+
|
|
190
|
+
return median, u, bin_ulim_bool, outlier_mask, use_mask, n_points
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def stack_visits(
|
|
194
|
+
lightcurve: pd.DataFrame,
|
|
195
|
+
outlier_threshold: float,
|
|
196
|
+
outlier_quantile: float,
|
|
197
|
+
clean_outliers: bool = True,
|
|
198
|
+
):
|
|
199
|
+
"""
|
|
200
|
+
Combine the data by visits of the satellite of one region in the sky.
|
|
201
|
+
The visits typically consist of some tens of observations. The individual visits are separated by about
|
|
202
|
+
six months.
|
|
203
|
+
The mean flux for one visit is calculated by the weighted mean of the data.
|
|
204
|
+
The error on that mean is calculated by the root-mean-squared and corrected by the t-value.
|
|
205
|
+
Outliers per visit are identified if they are more than 100 times the rms away from the mean. These outliers
|
|
206
|
+
are removed from the calculation of the mean and the error if self.clean_outliers_when_stacking is True.
|
|
207
|
+
|
|
208
|
+
:param lightcurve: the raw lightcurve
|
|
209
|
+
:type lightcurve: pandas.DataFrame
|
|
210
|
+
:return: the stacked lightcurve
|
|
211
|
+
:rtype: pandas.DataFrame
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
# ------------------------- create visit mask -------------------------- #
|
|
215
|
+
visit_map = get_visit_map(lightcurve.mjd)
|
|
216
|
+
counts = np.bincount(visit_map)
|
|
217
|
+
|
|
218
|
+
stacked_data: Dict[str, Any] = dict()
|
|
219
|
+
|
|
220
|
+
# ------------------------- calculate mean mjd -------------------------- #
|
|
221
|
+
stacked_data["mean_mjd"] = np.bincount(visit_map, weights=lightcurve.mjd) / counts
|
|
222
|
+
|
|
223
|
+
# ------------------------- loop through bands -------------------------- #
|
|
224
|
+
for b in ["w1", "w2"]:
|
|
225
|
+
# loop through magnitude and flux and save the respective datapoints
|
|
226
|
+
|
|
227
|
+
outlier_masks: Dict[str, Any] = dict()
|
|
228
|
+
use_masks = dict()
|
|
229
|
+
bin_ulim_bools = dict()
|
|
230
|
+
|
|
231
|
+
for lum_ext in [keys.FLUX_EXT, keys.MAG_EXT]:
|
|
232
|
+
f = lightcurve[f"{b}{lum_ext}"]
|
|
233
|
+
e = lightcurve[f"{b}{keys.ERROR_EXT}{lum_ext}"]
|
|
234
|
+
|
|
235
|
+
# we will flag outliers based on the flux only
|
|
236
|
+
remove_outliers = lum_ext == keys.FLUX_EXT and clean_outliers
|
|
237
|
+
outlier_mask = outlier_masks.get(keys.FLUX_EXT, None)
|
|
238
|
+
|
|
239
|
+
mean, u, bin_ulim_bool, outlier_mask, use_mask, n_points = calculate_epochs(
|
|
240
|
+
f,
|
|
241
|
+
e,
|
|
242
|
+
visit_map,
|
|
243
|
+
counts,
|
|
244
|
+
remove_outliers=remove_outliers,
|
|
245
|
+
outlier_mask=outlier_mask,
|
|
246
|
+
outlier_quantile=outlier_quantile,
|
|
247
|
+
outlier_threshold=outlier_threshold,
|
|
248
|
+
)
|
|
249
|
+
n_outliers = np.sum(outlier_mask)
|
|
250
|
+
|
|
251
|
+
if n_outliers > 0:
|
|
252
|
+
logger.debug(
|
|
253
|
+
f"removed {n_outliers} outliers by brightness for {b} {lum_ext}"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
stacked_data[f"{b}{keys.MEAN}{lum_ext}"] = mean
|
|
257
|
+
stacked_data[f"{b}{lum_ext}{keys.RMS}"] = u
|
|
258
|
+
stacked_data[f"{b}{lum_ext}{keys.UPPER_LIMIT}"] = bin_ulim_bool
|
|
259
|
+
stacked_data[f"{b}{lum_ext}{keys.NPOINTS}"] = n_points
|
|
260
|
+
|
|
261
|
+
outlier_masks[lum_ext] = outlier_mask
|
|
262
|
+
use_masks[lum_ext] = use_mask
|
|
263
|
+
bin_ulim_bools[lum_ext] = bin_ulim_bool
|
|
264
|
+
|
|
265
|
+
# ------- calculate the zeropoints per exposure ------- #
|
|
266
|
+
# this might look wrong since we use the flux mask on the magnitudes but it s right
|
|
267
|
+
# for each flux measurement we need the corresponding magnitude to get the zeropoint
|
|
268
|
+
mags = lightcurve[f"{b}{keys.MAG_EXT}"]
|
|
269
|
+
inst_fluxes = lightcurve[f"{b}{keys.FLUX_EXT}"]
|
|
270
|
+
pos_m = inst_fluxes > 0 # select only positive fluxes, i.e. detections
|
|
271
|
+
zp_mask = pos_m & use_masks[keys.FLUX_EXT]
|
|
272
|
+
|
|
273
|
+
# calculate zero points
|
|
274
|
+
zps = np.zeros_like(inst_fluxes)
|
|
275
|
+
zps[zp_mask] = mags[zp_mask] + 2.5 * np.log10(inst_fluxes[zp_mask])
|
|
276
|
+
# find visits with no zeropoints
|
|
277
|
+
n_valid_zps = np.bincount(visit_map, weights=zp_mask)
|
|
278
|
+
at_least_one_valid_zp = n_valid_zps > 0
|
|
279
|
+
# calculate the median zeropoint for each visit
|
|
280
|
+
zps_median = np.zeros_like(n_valid_zps, dtype=float)
|
|
281
|
+
zps_median[n_valid_zps > 0] = np.array(
|
|
282
|
+
[
|
|
283
|
+
np.median(zps[(visit_map == i) & zp_mask])
|
|
284
|
+
for i in np.unique(visit_map[at_least_one_valid_zp[visit_map]])
|
|
285
|
+
]
|
|
286
|
+
)
|
|
287
|
+
# if there are only non-detections then fall back to default zeropoint
|
|
288
|
+
zps_median[n_valid_zps == 0] = MAGNITUDE_ZEROPOINTS[b]
|
|
289
|
+
# if the visit only has upper limits then use the fall-back zeropoint
|
|
290
|
+
zps_median[bin_ulim_bools[keys.FLUX_EXT]] = MAGNITUDE_ZEROPOINTS[b]
|
|
291
|
+
|
|
292
|
+
# --------------- calculate flux density from instrument flux ---------------- #
|
|
293
|
+
# get the instrument flux [digital numbers], i.e. source count
|
|
294
|
+
inst_fluxes_e = lightcurve[f"{b}{keys.ERROR_EXT}{keys.FLUX_EXT}"]
|
|
295
|
+
|
|
296
|
+
# calculate the proportionality constant between flux density and source count
|
|
297
|
+
mag_zp = FLUX_ZEROPOINTS[b] * 1e3 # in mJy
|
|
298
|
+
flux_dens_const = mag_zp * 10 ** (-zps_median / 2.5)
|
|
299
|
+
|
|
300
|
+
# calculate flux densities from instrument counts
|
|
301
|
+
flux_densities = inst_fluxes * flux_dens_const[visit_map]
|
|
302
|
+
flux_densities_e = inst_fluxes_e * flux_dens_const[visit_map]
|
|
303
|
+
|
|
304
|
+
# bin flux densities
|
|
305
|
+
mean_fd, u_fd, ul_fd, outlier_mask_fd, use_mask_fd, n_points_fd = (
|
|
306
|
+
calculate_epochs(
|
|
307
|
+
flux_densities,
|
|
308
|
+
flux_densities_e,
|
|
309
|
+
visit_map,
|
|
310
|
+
counts,
|
|
311
|
+
remove_outliers=False,
|
|
312
|
+
outlier_mask=outlier_masks[keys.FLUX_EXT],
|
|
313
|
+
outlier_threshold=outlier_threshold,
|
|
314
|
+
outlier_quantile=outlier_quantile,
|
|
315
|
+
)
|
|
316
|
+
)
|
|
317
|
+
stacked_data[f"{b}{keys.MEAN}{keys.FLUX_DENSITY_EXT}"] = mean_fd
|
|
318
|
+
stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.RMS}"] = u_fd
|
|
319
|
+
stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.UPPER_LIMIT}"] = ul_fd
|
|
320
|
+
stacked_data[f"{b}{keys.FLUX_DENSITY_EXT}{keys.NPOINTS}"] = n_points_fd
|
|
321
|
+
|
|
322
|
+
return pd.DataFrame(stacked_data)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
channel:
|
|
2
|
+
- access:
|
|
3
|
+
- ZTF_PUB
|
|
4
|
+
name: wise
|
|
5
|
+
policy: []
|
|
6
|
+
version: 0
|
|
7
|
+
mongo:
|
|
8
|
+
prefix: MONGODB_NAME
|
|
9
|
+
reset: true
|
|
10
|
+
name: timewise
|
|
11
|
+
task:
|
|
12
|
+
- config:
|
|
13
|
+
compiler_opts: TiCompilerOptions
|
|
14
|
+
directives:
|
|
15
|
+
- channel: wise
|
|
16
|
+
ingest:
|
|
17
|
+
mux:
|
|
18
|
+
combine:
|
|
19
|
+
- state_t2:
|
|
20
|
+
- unit: T2StackVisits
|
|
21
|
+
unit: T1HDBSCAN
|
|
22
|
+
config:
|
|
23
|
+
original_id_key: ORIGINAL_ID_KEY
|
|
24
|
+
input_mongo_db_name: INPUT_MONGODB_NAME
|
|
25
|
+
plot: true
|
|
26
|
+
unit: TiMongoMuxer
|
|
27
|
+
iter_max: 1000000
|
|
28
|
+
shaper: TiDataPointShaper
|
|
29
|
+
supplier:
|
|
30
|
+
config:
|
|
31
|
+
dpid: hash
|
|
32
|
+
loader:
|
|
33
|
+
config:
|
|
34
|
+
timewise_config_file: TIMEWISE_CONFIG_PATH
|
|
35
|
+
stock_id_column_name: ORIGINAL_ID_KEY
|
|
36
|
+
unit: TimewiseFileLoader
|
|
37
|
+
unit: TimewiseAlertSupplier
|
|
38
|
+
multiplier: 1
|
|
39
|
+
title: t0
|
|
40
|
+
template:
|
|
41
|
+
live:
|
|
42
|
+
- resolve_run_time_aliases
|
|
43
|
+
- hash_t2_config
|
|
44
|
+
unit: AlertConsumer
|
|
45
|
+
- config:
|
|
46
|
+
log_profile: default
|
|
47
|
+
multiplier: 1
|
|
48
|
+
title: t2
|
|
49
|
+
unit: T2Worker
|
timewise/query/base.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import ClassVar, List
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
from hashlib import sha256
|
|
5
|
+
|
|
6
|
+
from ..tables import TableType
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Query(abc.ABC, BaseModel):
|
|
10
|
+
type: str
|
|
11
|
+
upload_name: ClassVar[str] = "mine"
|
|
12
|
+
|
|
13
|
+
original_id_key: str = "orig_id"
|
|
14
|
+
constraints: List[str] = [
|
|
15
|
+
"nb < 2",
|
|
16
|
+
"na < 1",
|
|
17
|
+
"cc_flags like '00%'",
|
|
18
|
+
"qi_fact >= 1",
|
|
19
|
+
"saa_sep >= 5",
|
|
20
|
+
"moon_masked like '00%'",
|
|
21
|
+
]
|
|
22
|
+
columns: List[str]
|
|
23
|
+
table: TableType
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
@abc.abstractmethod
|
|
27
|
+
def input_columns(self) -> dict[str, str]: ...
|
|
28
|
+
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def build(self) -> str: ...
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def adql(self) -> str:
|
|
34
|
+
"""ADQL string computed once per instance."""
|
|
35
|
+
return self.build()
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def hash(self) -> str:
|
|
39
|
+
return (
|
|
40
|
+
self.type
|
|
41
|
+
+ "_"
|
|
42
|
+
+ self.table.name
|
|
43
|
+
+ "_"
|
|
44
|
+
+ sha256(self.adql.encode()).hexdigest()
|
|
45
|
+
)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Literal, Dict
|
|
3
|
+
|
|
4
|
+
from .base import Query
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PositionalQuery(Query):
|
|
10
|
+
type: Literal["positional"] = "positional"
|
|
11
|
+
radius_arcsec: float
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def input_columns(self) -> Dict[str, str]:
|
|
15
|
+
return {"ra": "float", "dec": "float", self.original_id_key: "int"}
|
|
16
|
+
|
|
17
|
+
def build(self) -> str:
|
|
18
|
+
logger.debug(f"constructing positional query for {self.table.name}")
|
|
19
|
+
|
|
20
|
+
q = "SELECT \n\t"
|
|
21
|
+
for k in self.columns:
|
|
22
|
+
q += f"{self.table.name}.{k}, "
|
|
23
|
+
q += f"\n\tmine.{self.original_id_key} \n"
|
|
24
|
+
q += f"FROM\n\tTAP_UPLOAD.{self.upload_name} AS mine \n"
|
|
25
|
+
q += f"RIGHT JOIN\n\t{self.table.name} \n"
|
|
26
|
+
q += "WHERE \n"
|
|
27
|
+
q += (
|
|
28
|
+
f"\tCONTAINS(POINT('J2000',{self.table.name}.{self.table.ra_column},{self.table.name}.{self.table.dec_column}),"
|
|
29
|
+
f"CIRCLE('J2000',mine.ra,mine.dec,{self.radius_arcsec / 3600:.18f}))=1 "
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
if len(self.constraints) > 0:
|
|
33
|
+
q += " AND (\n"
|
|
34
|
+
for c in self.constraints:
|
|
35
|
+
q += f"\t{self.table.name}.{c} AND \n"
|
|
36
|
+
q = q.strip(" AND \n")
|
|
37
|
+
q += "\t)"
|
|
38
|
+
|
|
39
|
+
logger.debug(f"\n{q}")
|
|
40
|
+
return q
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from pydantic import Field
|
|
2
|
+
from typing import Union, Annotated
|
|
3
|
+
|
|
4
|
+
from .allwise_p3as_mep import allwise_p3as_mep
|
|
5
|
+
from .neowiser_p1bs_psd import neowiser_p1bs_psd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
TableType = Annotated[
|
|
9
|
+
Union[allwise_p3as_mep, neowiser_p1bs_psd], Field(discriminator="name")
|
|
10
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from typing import Literal, ClassVar, Type, Dict
|
|
2
|
+
from .base import TableConfig
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class allwise_p3as_mep(TableConfig):
|
|
6
|
+
name: Literal["allwise_p3as_mep"] = "allwise_p3as_mep"
|
|
7
|
+
columns_dtypes: ClassVar[Dict[str, Type]] = {
|
|
8
|
+
"ra": float,
|
|
9
|
+
"dec": float,
|
|
10
|
+
"mjd": float,
|
|
11
|
+
"cntr_mf": str,
|
|
12
|
+
"w1mpro_ep": float,
|
|
13
|
+
"w1sigmpro_ep": float,
|
|
14
|
+
"w2mpro_ep": float,
|
|
15
|
+
"w2sigmpro_ep": float,
|
|
16
|
+
"w1flux_ep": float,
|
|
17
|
+
"w1sigflux_ep": float,
|
|
18
|
+
"w2flux_ep": float,
|
|
19
|
+
"w2sigflux_ep": float,
|
|
20
|
+
}
|
|
21
|
+
ra_column: ClassVar[str] = "ra"
|
|
22
|
+
dec_column: ClassVar[str] = "dec"
|
timewise/tables/base.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from typing import Literal, ClassVar, Dict, Type
|
|
2
|
+
from .base import TableConfig
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class neowiser_p1bs_psd(TableConfig):
|
|
6
|
+
name: Literal["neowiser_p1bs_psd"] = "neowiser_p1bs_psd"
|
|
7
|
+
columns_dtypes: ClassVar[Dict[str, Type]] = {
|
|
8
|
+
"ra": float,
|
|
9
|
+
"dec": float,
|
|
10
|
+
"mjd": float,
|
|
11
|
+
"allwise_cntr": str,
|
|
12
|
+
"w1mpro": float,
|
|
13
|
+
"w1sigmpro": float,
|
|
14
|
+
"w2mpro": float,
|
|
15
|
+
"w2sigmpro": float,
|
|
16
|
+
"w1flux": float,
|
|
17
|
+
"w1sigflux": float,
|
|
18
|
+
"w2flux": float,
|
|
19
|
+
"w2sigflux": float,
|
|
20
|
+
}
|
|
21
|
+
ra_column: ClassVar[str] = "ra"
|
|
22
|
+
dec_column: ClassVar[str] = "dec"
|
timewise/types.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import TypedDict, NamedTuple
|
|
2
|
+
|
|
3
|
+
from timewise.query import QueryType
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TAPJobMeta(TypedDict):
|
|
7
|
+
url: str
|
|
8
|
+
status: str
|
|
9
|
+
submitted: str
|
|
10
|
+
last_checked: str
|
|
11
|
+
input_length: int
|
|
12
|
+
query: str
|
|
13
|
+
query_config: QueryType | dict
|
|
14
|
+
completed_at: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TaskID(NamedTuple):
|
|
18
|
+
"""
|
|
19
|
+
Generic identifier for a unit of work.
|
|
20
|
+
Can be extended by Downloader/Processor as needed.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
namespace: str # e.g. "downloader", "processor"
|
|
24
|
+
key: str # unique string, e.g. "chunk_0001_q0" or "mask_2025-01-01"
|
|
25
|
+
|
|
26
|
+
def __str__(self):
|
|
27
|
+
return f"{self.namespace}_{self.key}"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
TYPE_MAP = {"float": float, "str": str, "int": int}
|
timewise/util/backoff.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
logger = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def backoff_hndlr(details):
|
|
8
|
+
logger.info(
|
|
9
|
+
"Backing off {wait:0.1f} seconds after {tries} tries "
|
|
10
|
+
"calling function {target} with args {args} and kwargs "
|
|
11
|
+
"{kwargs}".format(**details)
|
|
12
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_n_rows(path: str | Path):
|
|
6
|
+
chunk = 1024 * 1024 # Process 1 MB at a time.
|
|
7
|
+
f = np.memmap(path)
|
|
8
|
+
num_newlines = sum(
|
|
9
|
+
np.sum(f[i : i + chunk] == ord("\n")) for i in range(0, len(f), chunk)
|
|
10
|
+
)
|
|
11
|
+
del f
|
|
12
|
+
return num_newlines
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from queue import Queue
|
|
3
|
+
from threading import Thread, Event
|
|
4
|
+
from typing import Any, Optional, Callable, Mapping
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ErrorQueue(Queue):
|
|
8
|
+
"""Queue subclass whose join() re-raises exceptions from worker threads."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, stop_event: Event, *args, **kwargs):
|
|
11
|
+
super().__init__(*args, **kwargs)
|
|
12
|
+
self.error_queue: Queue = Queue()
|
|
13
|
+
self.stop_event = stop_event
|
|
14
|
+
|
|
15
|
+
def report_error(self, exc_info):
|
|
16
|
+
"""Called by workers to push an exception into the error queue."""
|
|
17
|
+
self.error_queue.put(exc_info)
|
|
18
|
+
# Also decrement unfinished_tasks, so join() won't block forever
|
|
19
|
+
with self.all_tasks_done:
|
|
20
|
+
self.unfinished_tasks = max(0, self.unfinished_tasks - 1)
|
|
21
|
+
self.all_tasks_done.notify_all()
|
|
22
|
+
|
|
23
|
+
def join(self):
|
|
24
|
+
"""Wait until all tasks are done, or raise if a worker failed."""
|
|
25
|
+
with self.all_tasks_done:
|
|
26
|
+
while self.unfinished_tasks:
|
|
27
|
+
if not self.error_queue.empty():
|
|
28
|
+
exc_info = self.error_queue.get()
|
|
29
|
+
self.stop_event.set()
|
|
30
|
+
raise exc_info[1].with_traceback(exc_info[2])
|
|
31
|
+
self.all_tasks_done.wait()
|
|
32
|
+
|
|
33
|
+
def raise_errors(self):
|
|
34
|
+
"""
|
|
35
|
+
Raise the first worker exception, if any.
|
|
36
|
+
"""
|
|
37
|
+
if not self.error_queue.empty():
|
|
38
|
+
exc_info = self.error_queue.get()
|
|
39
|
+
raise exc_info[1].with_traceback(exc_info[2])
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ExceptionSafeThread(Thread):
|
|
43
|
+
"""Thread subclass that reports uncaught exceptions to the ErrorQueue."""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
error_queue: Any,
|
|
48
|
+
*,
|
|
49
|
+
group: Optional[Any] = None,
|
|
50
|
+
target: Optional[Callable[..., Any]] = None,
|
|
51
|
+
name: Optional[str] = None,
|
|
52
|
+
args: tuple = (),
|
|
53
|
+
kwargs: Optional[Mapping[str, Any]] = None,
|
|
54
|
+
daemon: Optional[bool] = None,
|
|
55
|
+
):
|
|
56
|
+
super().__init__(
|
|
57
|
+
group=group,
|
|
58
|
+
target=target,
|
|
59
|
+
name=name,
|
|
60
|
+
args=args,
|
|
61
|
+
kwargs=kwargs,
|
|
62
|
+
daemon=daemon,
|
|
63
|
+
)
|
|
64
|
+
self.error_queue = error_queue
|
|
65
|
+
|
|
66
|
+
def run(self):
|
|
67
|
+
try:
|
|
68
|
+
super().run()
|
|
69
|
+
except Exception:
|
|
70
|
+
self.error_queue.report_error(sys.exc_info())
|