timewise 0.5.4__py3-none-any.whl → 1.0.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- timewise/__init__.py +1 -5
- timewise/backend/__init__.py +6 -0
- timewise/backend/base.py +36 -0
- timewise/backend/filesystem.py +80 -0
- timewise/chunking.py +50 -0
- timewise/cli.py +117 -11
- timewise/config.py +34 -0
- timewise/io/__init__.py +1 -0
- timewise/io/config.py +64 -0
- timewise/io/download.py +302 -0
- timewise/io/stable_tap.py +121 -0
- timewise/plot/__init__.py +3 -0
- timewise/plot/diagnostic.py +242 -0
- timewise/plot/lightcurve.py +112 -0
- timewise/plot/panstarrs.py +260 -0
- timewise/plot/sdss.py +109 -0
- timewise/process/__init__.py +2 -0
- timewise/process/config.py +34 -0
- timewise/process/interface.py +143 -0
- timewise/process/keys.py +10 -0
- timewise/process/stacking.py +322 -0
- timewise/process/template.yml +49 -0
- timewise/query/__init__.py +6 -0
- timewise/query/base.py +45 -0
- timewise/query/positional.py +40 -0
- timewise/tables/__init__.py +10 -0
- timewise/tables/allwise_p3as_mep.py +22 -0
- timewise/tables/base.py +9 -0
- timewise/tables/neowiser_p1bs_psd.py +22 -0
- timewise/types.py +30 -0
- timewise/util/backoff.py +12 -0
- timewise/util/csv_utils.py +12 -0
- timewise/util/error_threading.py +70 -0
- timewise/util/visits.py +33 -0
- timewise-1.0.0a2.dist-info/METADATA +205 -0
- timewise-1.0.0a2.dist-info/RECORD +39 -0
- timewise-1.0.0a2.dist-info/entry_points.txt +3 -0
- timewise/big_parent_sample.py +0 -106
- timewise/config_loader.py +0 -157
- timewise/general.py +0 -52
- timewise/parent_sample_base.py +0 -89
- timewise/point_source_utils.py +0 -68
- timewise/utils.py +0 -558
- timewise/wise_bigdata_desy_cluster.py +0 -1407
- timewise/wise_data_base.py +0 -2027
- timewise/wise_data_by_visit.py +0 -672
- timewise/wise_flux_conversion_correction.dat +0 -19
- timewise-0.5.4.dist-info/METADATA +0 -56
- timewise-0.5.4.dist-info/RECORD +0 -17
- timewise-0.5.4.dist-info/entry_points.txt +0 -3
- {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/WHEEL +0 -0
- {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/licenses/LICENSE +0 -0
timewise/wise_data_base.py
DELETED
|
@@ -1,2027 +0,0 @@
|
|
|
1
|
-
import abc
|
|
2
|
-
import sys
|
|
3
|
-
|
|
4
|
-
import backoff
|
|
5
|
-
import copy
|
|
6
|
-
import json
|
|
7
|
-
import logging
|
|
8
|
-
import multiprocessing as mp
|
|
9
|
-
import os
|
|
10
|
-
import queue
|
|
11
|
-
import requests
|
|
12
|
-
import subprocess
|
|
13
|
-
import threading
|
|
14
|
-
import time
|
|
15
|
-
import tqdm
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
import astropy.units as u
|
|
19
|
-
import matplotlib.pyplot as plt
|
|
20
|
-
import numpy as np
|
|
21
|
-
import pandas as pd
|
|
22
|
-
import pyvo as vo
|
|
23
|
-
from collections.abc import Sequence
|
|
24
|
-
from astropy import constants
|
|
25
|
-
from astropy.cosmology import Planck18
|
|
26
|
-
from astropy.io import ascii
|
|
27
|
-
from astropy.table import Table
|
|
28
|
-
from astropy.coordinates.angle_utilities import angular_separation, position_angle
|
|
29
|
-
from sklearn.cluster import HDBSCAN
|
|
30
|
-
|
|
31
|
-
from timewise.general import get_directories, logger_format, backoff_hndlr
|
|
32
|
-
from timewise.utils import StableAsyncTAPJob, StableTAPService
|
|
33
|
-
|
|
34
|
-
logger = logging.getLogger(__name__)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class WISEDataBase(abc.ABC):
|
|
38
|
-
"""
|
|
39
|
-
Base class for WISE Data
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
:param parent_sample_class: class for parent sample
|
|
43
|
-
:type parent_sample_class: `ParentSample` class
|
|
44
|
-
:param base_name: unique name to determine storage directories
|
|
45
|
-
:type base_name: str
|
|
46
|
-
:param min_sep: query region around source for positional query
|
|
47
|
-
:type min_sep: astropy.units.Quantity
|
|
48
|
-
:param whitelist_region: region around source where all datapoints are accepted in positional query
|
|
49
|
-
:type whitelist_region: astropy.units.Quantity
|
|
50
|
-
:param n_chunks: number of chunks in declination
|
|
51
|
-
:type n_chunks: int
|
|
52
|
-
:param parent_wise_source_id_key: key for the WISE source ID in the parent sample
|
|
53
|
-
:type parent_wise_source_id_key: str
|
|
54
|
-
:param parent_sample_wise_skysep_key: key for the angular separation to the WISE source in the parent sample
|
|
55
|
-
:type parent_sample_wise_skysep_key: str
|
|
56
|
-
:param parent_sample_default_entries: default entries for the parent sample
|
|
57
|
-
:type parent_sample_default_entries: dict
|
|
58
|
-
:param cache_dir: directory for cached data
|
|
59
|
-
:type cache_dir: Path
|
|
60
|
-
:param cluster_dir: directory for cluster data
|
|
61
|
-
:param cluster_log_dir: directory for cluster logs
|
|
62
|
-
:type cluster_dir: Path
|
|
63
|
-
:param output_dir: directory for output data
|
|
64
|
-
:type output_dir: Path
|
|
65
|
-
:param lightcurve_dir: directory for lightcurve data
|
|
66
|
-
:type lightcurve_dir: Path
|
|
67
|
-
:param plots_dir: directory for plots
|
|
68
|
-
:type plots_dir: Path
|
|
69
|
-
:param submit_file: file for cluster submission
|
|
70
|
-
:type submit_file: Path
|
|
71
|
-
:param tap_jobs: TAP job URLs
|
|
72
|
-
:type tap_jobs: list[str]
|
|
73
|
-
:param queue: queue for cluster jobs
|
|
74
|
-
:type queue: multiprocessing.Queue
|
|
75
|
-
:param clear_unbinned_photometry_when_binning: whether to clear unbinned photometry when binning
|
|
76
|
-
:type clear_unbinned_photometry_when_binning: bool
|
|
77
|
-
:param chunk_map: map of chunks
|
|
78
|
-
:type chunk_map: np.ndarray
|
|
79
|
-
:param service_url: URL of the TAP service
|
|
80
|
-
:type service_url: str
|
|
81
|
-
:param service: custom TAP service, making sure that the TAP jobs are stable
|
|
82
|
-
:type service: `timewise.utils.StableTAPService`
|
|
83
|
-
:param active_tap_phases: phases of TAP jobs that are still active
|
|
84
|
-
:type active_tap_phases: set
|
|
85
|
-
:param running_tap_phases: phases of TAP jobs that are still running
|
|
86
|
-
:type running_tap_phases: list
|
|
87
|
-
:param done_tap_phases: phases of TAP jobs that are done
|
|
88
|
-
:type done_tap_phases: set
|
|
89
|
-
:param query_types: query types
|
|
90
|
-
:type query_types: list
|
|
91
|
-
:param table_names: map nice and program table names of WISE data tables
|
|
92
|
-
:type table_names: pd.DataFrame
|
|
93
|
-
:param bands: WISE bands
|
|
94
|
-
:type bands: list
|
|
95
|
-
:param flux_key_ext: key extension for flux keys
|
|
96
|
-
:type flux_key_ext: str
|
|
97
|
-
:param flux_density_key_ext: key extension for flux density keys
|
|
98
|
-
:type flux_density_key_ext: str
|
|
99
|
-
:param mag_key_ext: key extension for magnitude keys
|
|
100
|
-
:type mag_key_ext: str
|
|
101
|
-
:param luminosity_key_ext: key extension for luminosity keys
|
|
102
|
-
:type luminosity_key_ext: str
|
|
103
|
-
:param error_key_ext: key extension for error keys
|
|
104
|
-
:type error_key_ext: str
|
|
105
|
-
:param band_plot_colors: plot colors for bands
|
|
106
|
-
:type band_plot_colors: dict
|
|
107
|
-
:param photometry_table_keymap:
|
|
108
|
-
keymap for photometry tables, listing the column names for flux, mag etc for the different WISE data tables
|
|
109
|
-
:type photometry_table_keymap: dict
|
|
110
|
-
:param magnitude_zeropoints: magnitude zeropoints from `here <https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux>`_
|
|
111
|
-
:type magnitude_zeropoints: dict
|
|
112
|
-
:param constraints: constraints for TAP queries selecting good datapoints as explained in the explanatory supplements
|
|
113
|
-
:type constraints: list
|
|
114
|
-
:param parent_wise_source_id_key: key for the WISE source ID in the parent sample
|
|
115
|
-
:type parent_wise_source_id_key: str
|
|
116
|
-
:param parent_sample_wise_skysep_key: key for the angular separation to the WISE source in the parent sample
|
|
117
|
-
:type parent_sample_wise_skysep_key: str
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
service_url = 'https://irsa.ipac.caltech.edu/TAP'
|
|
121
|
-
service = StableTAPService(service_url)
|
|
122
|
-
active_tap_phases = {"QUEUED", "EXECUTING", "RUN", "COMPLETED", "ERROR", "UNKNOWN"}
|
|
123
|
-
running_tap_phases = ["QUEUED", "EXECUTING", "RUN"]
|
|
124
|
-
done_tap_phases = {"COMPLETED", "ABORTED", "ERROR"}
|
|
125
|
-
|
|
126
|
-
query_types = ['positional', 'by_allwise_id']
|
|
127
|
-
|
|
128
|
-
table_names = pd.DataFrame([
|
|
129
|
-
('AllWISE Multiepoch Photometry Table', 'allwise_p3as_mep'),
|
|
130
|
-
('AllWISE Source Catalog', 'allwise_p3as_psd'),
|
|
131
|
-
('WISE 3-Band Cryo Single Exposure (L1b) Source Table', 'allsky_3band_p1bs_psd'),
|
|
132
|
-
('NEOWISE-R Single Exposure (L1b) Source Table', 'neowiser_p1bs_psd'),
|
|
133
|
-
('WISE All-Sky Source Catalog', 'allsky_4band_p3as_psd')
|
|
134
|
-
], columns=['nice_table_name', 'table_name'])
|
|
135
|
-
|
|
136
|
-
bands = ['W1', 'W2']
|
|
137
|
-
flux_key_ext = "_flux"
|
|
138
|
-
flux_density_key_ext = "_flux_density"
|
|
139
|
-
mag_key_ext = "_mag"
|
|
140
|
-
luminosity_key_ext = "_luminosity"
|
|
141
|
-
error_key_ext = "_error"
|
|
142
|
-
band_plot_colors = {'W1': 'r', 'W2': 'b'}
|
|
143
|
-
|
|
144
|
-
photometry_table_keymap = {
|
|
145
|
-
'AllWISE Multiepoch Photometry Table': {
|
|
146
|
-
'flux': {
|
|
147
|
-
'w1flux_ep': f'W1{flux_key_ext}',
|
|
148
|
-
'w1sigflux_ep': f'W1{flux_key_ext}{error_key_ext}',
|
|
149
|
-
'w2flux_ep': f'W2{flux_key_ext}',
|
|
150
|
-
'w2sigflux_ep': f'W2{flux_key_ext}{error_key_ext}'
|
|
151
|
-
},
|
|
152
|
-
'mag': {
|
|
153
|
-
'w1mpro_ep': f'W1{mag_key_ext}',
|
|
154
|
-
'w1sigmpro_ep': f'W1{mag_key_ext}{error_key_ext}',
|
|
155
|
-
'w2mpro_ep': f'W2{mag_key_ext}',
|
|
156
|
-
'w2sigmpro_ep': f'W2{mag_key_ext}{error_key_ext}'
|
|
157
|
-
}
|
|
158
|
-
},
|
|
159
|
-
'NEOWISE-R Single Exposure (L1b) Source Table': {
|
|
160
|
-
'flux': {
|
|
161
|
-
'w1flux': f'W1{flux_key_ext}',
|
|
162
|
-
'w1sigflux': f'W1{flux_key_ext}{error_key_ext}',
|
|
163
|
-
'w2flux': f'W2{flux_key_ext}',
|
|
164
|
-
'w2sigflux': f'W2{flux_key_ext}{error_key_ext}'
|
|
165
|
-
},
|
|
166
|
-
'mag': {
|
|
167
|
-
'w1mpro': f'W1{mag_key_ext}',
|
|
168
|
-
'w1sigmpro': f'W1{mag_key_ext}{error_key_ext}',
|
|
169
|
-
'w2mpro': f'W2{mag_key_ext}',
|
|
170
|
-
'w2sigmpro': f'W2{mag_key_ext}{error_key_ext}'
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
# zero points come from https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux
|
|
176
|
-
# published in Jarret et al. (2011): https://ui.adsabs.harvard.edu/abs/2011ApJ...735..112J/abstract
|
|
177
|
-
magnitude_zeropoints = {
|
|
178
|
-
'F_nu': {
|
|
179
|
-
'W1': 309.54 * u.Jy,
|
|
180
|
-
'W2': 171.787 * u.Jy
|
|
181
|
-
},
|
|
182
|
-
'Fstar_nu': {
|
|
183
|
-
'W1': 306.682 * u.Jy,
|
|
184
|
-
'W2': 170.663 * u.Jy
|
|
185
|
-
},
|
|
186
|
-
'Mag': {
|
|
187
|
-
'W1': 20.752,
|
|
188
|
-
'W2': 19.596
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
aperture_corrections = {
|
|
193
|
-
'W1': 0.222,
|
|
194
|
-
'W2': 0.280
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
_this_dir = Path(__file__).absolute().parent
|
|
198
|
-
magnitude_zeropoints_corrections = ascii.read(
|
|
199
|
-
_this_dir / 'wise_flux_conversion_correction.dat',
|
|
200
|
-
delimiter='\t'
|
|
201
|
-
).to_pandas()
|
|
202
|
-
|
|
203
|
-
band_wavelengths = {
|
|
204
|
-
'W1': 3.368 * 1e-6 * u.m,
|
|
205
|
-
'W2': 4.618 * 1e-6 * u.m
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
constraints = [
|
|
209
|
-
"nb < 2",
|
|
210
|
-
"na < 1",
|
|
211
|
-
"cc_flags like '00%'",
|
|
212
|
-
"qi_fact >= 1",
|
|
213
|
-
"saa_sep >= 5",
|
|
214
|
-
"moon_masked like '00%'"
|
|
215
|
-
]
|
|
216
|
-
|
|
217
|
-
parent_wise_source_id_key = 'AllWISE_id'
|
|
218
|
-
parent_sample_wise_skysep_key = 'sep_to_WISE_source'
|
|
219
|
-
|
|
220
|
-
def __init__(self,
|
|
221
|
-
base_name: str,
|
|
222
|
-
parent_sample_class,
|
|
223
|
-
min_sep_arcsec,
|
|
224
|
-
n_chunks):
|
|
225
|
-
"""
|
|
226
|
-
Base class for WISE Data
|
|
227
|
-
|
|
228
|
-
:param base_name: unique name to determine storage directories
|
|
229
|
-
:type base_name: str
|
|
230
|
-
:param parent_sample_class: class for parent sample
|
|
231
|
-
:type parent_sample_class: `ParentSample` class
|
|
232
|
-
:param min_sep_arcsec: query region around source for positional query
|
|
233
|
-
:type min_sep_arcsec: float
|
|
234
|
-
:param n_chunks: number of chunks in declination
|
|
235
|
-
:type n_chunks: int
|
|
236
|
-
:param tap_url_cache_name: TAP job URLs are stored here to be able to resume them
|
|
237
|
-
:type tap_url_cache_name: str
|
|
238
|
-
"""
|
|
239
|
-
|
|
240
|
-
#######################################################################################
|
|
241
|
-
# START SET-UP #
|
|
242
|
-
#########################
|
|
243
|
-
|
|
244
|
-
self.parent_sample_class = parent_sample_class
|
|
245
|
-
self.base_name = base_name
|
|
246
|
-
self.min_sep = min_sep_arcsec * u.arcsec
|
|
247
|
-
self.whitelist_region = 1 * u.arcsec
|
|
248
|
-
self._n_chunks = n_chunks
|
|
249
|
-
|
|
250
|
-
# --------------------------- vvvv set up parent sample vvvv --------------------------- #
|
|
251
|
-
self.parent_wise_source_id_key = WISEDataBase.parent_wise_source_id_key
|
|
252
|
-
self.parent_sample_wise_skysep_key = WISEDataBase.parent_sample_wise_skysep_key
|
|
253
|
-
self.parent_sample_default_entries = {
|
|
254
|
-
self.parent_wise_source_id_key: "",
|
|
255
|
-
self.parent_sample_wise_skysep_key: np.inf
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
self._parent_sample = None
|
|
259
|
-
self._no_allwise_source = None
|
|
260
|
-
# --------------------------- ^^^^ set up parent sample ^^^^ --------------------------- #
|
|
261
|
-
|
|
262
|
-
# set up directories
|
|
263
|
-
directories = get_directories() # type: dict[str, Path]
|
|
264
|
-
self.cache_dir = directories['cache_dir'] / base_name
|
|
265
|
-
self._cache_photometry_dir = self.cache_dir / "photometry"
|
|
266
|
-
self.cluster_dir = self.cache_dir / 'cluster'
|
|
267
|
-
self.cluster_log_dir = self.cluster_dir / 'logs'
|
|
268
|
-
self.output_dir = directories["output_dir"] / base_name
|
|
269
|
-
self.lightcurve_dir = self.output_dir / "lightcurves"
|
|
270
|
-
self.plots_dir = directories["plots_dir"] / base_name
|
|
271
|
-
self.tap_jobs_cache_dir = self.cache_dir / 'tap_cache'
|
|
272
|
-
|
|
273
|
-
for d in [self.cache_dir, self._cache_photometry_dir, self.cluster_dir, self.cluster_log_dir,
|
|
274
|
-
self.output_dir, self.lightcurve_dir, self.plots_dir]:
|
|
275
|
-
d.mkdir(parents=True, exist_ok=True)
|
|
276
|
-
|
|
277
|
-
file_handler = logging.FileHandler(filename=self.cache_dir / 'log.err', mode="a")
|
|
278
|
-
file_handler.setLevel("WARNING")
|
|
279
|
-
file_handler.setFormatter(logger_format)
|
|
280
|
-
logger.addHandler(file_handler)
|
|
281
|
-
|
|
282
|
-
self.submit_file = self.cluster_dir / 'submit.txt'
|
|
283
|
-
|
|
284
|
-
# set up result attributes
|
|
285
|
-
self._split_chunk_key = '__chunk'
|
|
286
|
-
self._cached_raw_photometry_prefix = 'raw_photometry'
|
|
287
|
-
self.tap_jobs = None
|
|
288
|
-
self.queue = None
|
|
289
|
-
self.clear_unbinned_photometry_when_binning = False
|
|
290
|
-
self._cached_final_products = {
|
|
291
|
-
'lightcurves': dict(),
|
|
292
|
-
'metadata': dict()
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
self._tap_wise_id_key = 'wise_id'
|
|
296
|
-
self._tap_orig_id_key = 'orig_id'
|
|
297
|
-
|
|
298
|
-
# Any class that wants to implement cluster operation has to use this variable
|
|
299
|
-
# It specifies which chunks will be processed by which jobs
|
|
300
|
-
self.cluster_jobID_map = None
|
|
301
|
-
|
|
302
|
-
#########################
|
|
303
|
-
# END SET-UP #
|
|
304
|
-
#######################################################################################
|
|
305
|
-
|
|
306
|
-
#######################################################################################
|
|
307
|
-
# START CHUNK MASK #
|
|
308
|
-
#########################
|
|
309
|
-
|
|
310
|
-
self._chunk_map = None
|
|
311
|
-
self.n_chunks = self._n_chunks
|
|
312
|
-
|
|
313
|
-
@property
|
|
314
|
-
def parent_sample(self):
|
|
315
|
-
if self.parent_sample_class is None:
|
|
316
|
-
raise ValueError("Can not load ParentSample because no parent sample class was given!")
|
|
317
|
-
|
|
318
|
-
if self._parent_sample is None:
|
|
319
|
-
self._parent_sample = self.parent_sample_class()
|
|
320
|
-
for k, default in self.parent_sample_default_entries.items():
|
|
321
|
-
if k not in self._parent_sample.df.columns:
|
|
322
|
-
self.parent_sample.df[k] = default
|
|
323
|
-
|
|
324
|
-
self._no_allwise_source = self._parent_sample.df[self.parent_sample_wise_skysep_key] == np.inf
|
|
325
|
-
|
|
326
|
-
return self._parent_sample
|
|
327
|
-
|
|
328
|
-
@property
|
|
329
|
-
def n_chunks(self):
|
|
330
|
-
return self._n_chunks
|
|
331
|
-
|
|
332
|
-
@n_chunks.setter
|
|
333
|
-
def n_chunks(self, value):
|
|
334
|
-
"""Sets the private variable _n_chunks"""
|
|
335
|
-
self._n_chunks = value
|
|
336
|
-
# if a new value is set, set _chunk_map to None to trigger re-evaluation
|
|
337
|
-
if self._n_chunks != value:
|
|
338
|
-
self._chunk_map = None
|
|
339
|
-
|
|
340
|
-
@property
|
|
341
|
-
def chunk_map(self):
|
|
342
|
-
|
|
343
|
-
if self.parent_sample_class is None:
|
|
344
|
-
raise ValueError("No parent sample given! Can not calculate chunk map!")
|
|
345
|
-
|
|
346
|
-
if self._chunk_map is None:
|
|
347
|
-
self._chunk_map = np.zeros(len(self.parent_sample.df))
|
|
348
|
-
n_in_chunk = int(round(len(self._chunk_map) / self._n_chunks))
|
|
349
|
-
for i in range(self._n_chunks):
|
|
350
|
-
start_ind = i * n_in_chunk
|
|
351
|
-
end_ind = start_ind + n_in_chunk
|
|
352
|
-
self._chunk_map[start_ind:end_ind] = int(i)
|
|
353
|
-
|
|
354
|
-
return self._chunk_map
|
|
355
|
-
|
|
356
|
-
def _get_chunk_number(self, wise_id=None, parent_sample_index=None):
|
|
357
|
-
if isinstance(wise_id, type(None)) and isinstance(parent_sample_index, type(None)):
|
|
358
|
-
raise Exception
|
|
359
|
-
|
|
360
|
-
if not isinstance(wise_id, type(None)):
|
|
361
|
-
parent_sample_index = np.where(self.parent_sample.df[self.parent_wise_source_id_key] == int(wise_id))[0]
|
|
362
|
-
logger.debug(f"wise ID {wise_id} at index {parent_sample_index}")
|
|
363
|
-
|
|
364
|
-
loc = self.parent_sample.df.loc[int(parent_sample_index)].name
|
|
365
|
-
iloc = self.parent_sample.df.index.get_loc(loc)
|
|
366
|
-
_chunk_number = int(self.chunk_map[int(iloc)])
|
|
367
|
-
logger.debug(f"chunk number is {_chunk_number} for {parent_sample_index}")
|
|
368
|
-
return _chunk_number
|
|
369
|
-
|
|
370
|
-
#########################
|
|
371
|
-
# END CHUNK MASK #
|
|
372
|
-
#######################################################################################
|
|
373
|
-
|
|
374
|
-
def _start_data_product(self, parent_sample_indices):
|
|
375
|
-
|
|
376
|
-
# get all rows in this chunk and columns, specified in the keymap
|
|
377
|
-
parent_sample_sel = self.parent_sample.df.loc[
|
|
378
|
-
parent_sample_indices,
|
|
379
|
-
list(self.parent_sample.default_keymap.values())
|
|
380
|
-
]
|
|
381
|
-
|
|
382
|
-
# invert the keymap to rename the columns
|
|
383
|
-
inverse_keymap = {v: k for k, v in self.parent_sample.default_keymap.items()}
|
|
384
|
-
parent_sample_sel.rename(columns=inverse_keymap, inplace=True)
|
|
385
|
-
parent_sample_sel.set_index(parent_sample_sel.index.astype(str), inplace=True)
|
|
386
|
-
|
|
387
|
-
# save to data_product
|
|
388
|
-
data_product = parent_sample_sel.to_dict(orient="index")
|
|
389
|
-
|
|
390
|
-
return data_product
|
|
391
|
-
|
|
392
|
-
@staticmethod
|
|
393
|
-
def get_db_name(table_name, nice=False):
|
|
394
|
-
"""
|
|
395
|
-
Get the right table name
|
|
396
|
-
|
|
397
|
-
:param table_name: str, table name
|
|
398
|
-
:param nice: bool, whether to get the nice table name
|
|
399
|
-
:return: str
|
|
400
|
-
"""
|
|
401
|
-
source_column = 'nice_table_name' if not nice else 'table_name'
|
|
402
|
-
target_column = 'table_name' if not nice else 'nice_table_name'
|
|
403
|
-
|
|
404
|
-
m = WISEDataBase.table_names[source_column] == table_name
|
|
405
|
-
if np.any(m):
|
|
406
|
-
table_name = WISEDataBase.table_names[target_column][m].iloc[0]
|
|
407
|
-
else:
|
|
408
|
-
logger.debug(f"{table_name} not in Table. Assuming it is the right name already.")
|
|
409
|
-
return table_name
|
|
410
|
-
|
|
411
|
-
###########################################################################################################
|
|
412
|
-
# START MATCH PARENT SAMPLE TO WISE SOURCES #
|
|
413
|
-
#####################################################
|
|
414
|
-
|
|
415
|
-
def match_all_chunks(self,
|
|
416
|
-
table_name="AllWISE Source Catalog",
|
|
417
|
-
save_when_done=True,
|
|
418
|
-
additional_columns=None):
|
|
419
|
-
"""
|
|
420
|
-
Match the parent sample to a WISE catalogue and add the result to the parent sample.
|
|
421
|
-
|
|
422
|
-
:param table_name: The name of the table you want to match against
|
|
423
|
-
:type table_name: str
|
|
424
|
-
:param save_when_done: save the parent sample dataframe with the matching info when done
|
|
425
|
-
:type save_when_done: bool
|
|
426
|
-
:param additional_columns: optional, additional columns to add to the matching table
|
|
427
|
-
:type additional_columns: list
|
|
428
|
-
:return:
|
|
429
|
-
"""
|
|
430
|
-
|
|
431
|
-
logger.info(f'matching all chunks to {table_name}')
|
|
432
|
-
|
|
433
|
-
if additional_columns is None:
|
|
434
|
-
additional_columns = []
|
|
435
|
-
|
|
436
|
-
for i in range(self.n_chunks):
|
|
437
|
-
self._match_single_chunk(i, table_name, additional_columns)
|
|
438
|
-
|
|
439
|
-
_dupe_mask = self._get_dubplicated_wise_id_mask()
|
|
440
|
-
|
|
441
|
-
self._no_allwise_source = self.parent_sample.df[self.parent_sample_wise_skysep_key] == np.inf
|
|
442
|
-
if np.any(self._no_allwise_source):
|
|
443
|
-
logger.warning(f"{len(self.parent_sample.df[self._no_allwise_source])} of {len(self.parent_sample.df)} "
|
|
444
|
-
f"entries without match!")
|
|
445
|
-
|
|
446
|
-
if np.any(self._get_dubplicated_wise_id_mask()):
|
|
447
|
-
logger.warning(self.parent_sample.df[self._get_dubplicated_wise_id_mask()])
|
|
448
|
-
|
|
449
|
-
if save_when_done:
|
|
450
|
-
self.parent_sample.save_local()
|
|
451
|
-
|
|
452
|
-
def _run_gator_match(self, in_file, out_file, table_name,
|
|
453
|
-
one_to_one=True, minsep_arcsec=None, additional_keys='', silent=False, constraints=None):
|
|
454
|
-
_one_to_one = '-F one_to_one=1 ' if one_to_one else ''
|
|
455
|
-
_minsep_arcsec = self.min_sep.to("arcsec").value if minsep_arcsec is None else minsep_arcsec
|
|
456
|
-
_db_name = self.get_db_name(table_name)
|
|
457
|
-
_silent = "-s " if silent else ""
|
|
458
|
-
_constraints = '-F constraints="' + " and ".join(constraints).replace('%', '%%') + '" ' if constraints else ""
|
|
459
|
-
|
|
460
|
-
if _db_name == "allwise_p3as_mep":
|
|
461
|
-
_sigpos = _source_id = _des = ""
|
|
462
|
-
_id_key = "cntr_mf,cntr"
|
|
463
|
-
elif _db_name == "allsky_4band_p3as_psd":
|
|
464
|
-
_sigpos = 'sigra,sigdec,'
|
|
465
|
-
_source_id = "source_id,"
|
|
466
|
-
_des = 'designation,'
|
|
467
|
-
_id_key = 'cntr'
|
|
468
|
-
else:
|
|
469
|
-
_sigpos = 'sigra,sigdec,'
|
|
470
|
-
_source_id = "source_id,"
|
|
471
|
-
_des = 'designation,' if 'allwise' in _db_name else ''
|
|
472
|
-
_id_key = 'cntr' if 'allwise' in _db_name else 'allwise_cntr,cntr'
|
|
473
|
-
|
|
474
|
-
submit_cmd = f'curl ' \
|
|
475
|
-
f'--connect-timeout 3600 ' \
|
|
476
|
-
f'--max-time 3600 ' \
|
|
477
|
-
f'{_silent}' \
|
|
478
|
-
f'-o {out_file} ' \
|
|
479
|
-
f'-F filename=@{in_file} ' \
|
|
480
|
-
f'-F catalog={_db_name} ' \
|
|
481
|
-
f'-F spatial=Upload ' \
|
|
482
|
-
f'-F uradius={_minsep_arcsec} ' \
|
|
483
|
-
f'-F outfmt=1 ' \
|
|
484
|
-
f'{_one_to_one}' \
|
|
485
|
-
f'{_constraints}' \
|
|
486
|
-
f'-F selcols={_des}{_source_id}ra,dec,{_sigpos}{_id_key}{additional_keys} ' \
|
|
487
|
-
f'"https://irsa.ipac.caltech.edu/cgi-bin/Gator/nph-query"'
|
|
488
|
-
|
|
489
|
-
logger.debug(f'submit command: {submit_cmd}')
|
|
490
|
-
N_tries = 10
|
|
491
|
-
while True:
|
|
492
|
-
try:
|
|
493
|
-
process = subprocess.Popen(submit_cmd, stdout=subprocess.PIPE, shell=True)
|
|
494
|
-
break
|
|
495
|
-
except OSError as e:
|
|
496
|
-
if N_tries < 1:
|
|
497
|
-
raise OSError(e)
|
|
498
|
-
logger.warning(f"{e}, retry")
|
|
499
|
-
N_tries -= 1
|
|
500
|
-
|
|
501
|
-
out_msg, err_msg = process.communicate()
|
|
502
|
-
if out_msg:
|
|
503
|
-
logger.info(out_msg.decode())
|
|
504
|
-
if err_msg:
|
|
505
|
-
logger.error(err_msg.decode())
|
|
506
|
-
process.terminate()
|
|
507
|
-
if Path(out_file).is_file():
|
|
508
|
-
return 1
|
|
509
|
-
else:
|
|
510
|
-
return 0
|
|
511
|
-
|
|
512
|
-
def _match_to_wise(
|
|
513
|
-
self,
|
|
514
|
-
in_filename,
|
|
515
|
-
out_filename,
|
|
516
|
-
mask,
|
|
517
|
-
table_name,
|
|
518
|
-
N_retries=10,
|
|
519
|
-
**gator_kwargs
|
|
520
|
-
):
|
|
521
|
-
ra_key = self.parent_sample.default_keymap["ra"]
|
|
522
|
-
dec_key = self.parent_sample.default_keymap["dec"]
|
|
523
|
-
selected_parent_sample = copy.copy(self.parent_sample.df.loc[mask, [ra_key, dec_key]])
|
|
524
|
-
selected_parent_sample.rename(columns={dec_key: 'dec', ra_key: 'ra'}, inplace=True)
|
|
525
|
-
logger.debug(f"{len(selected_parent_sample)} selected")
|
|
526
|
-
|
|
527
|
-
# write to IPAC formatted table
|
|
528
|
-
_selected_parent_sample_astrotab = Table.from_pandas(selected_parent_sample, index=True)
|
|
529
|
-
logger.debug(f"writing {len(_selected_parent_sample_astrotab)} "
|
|
530
|
-
f"objects to {in_filename}")
|
|
531
|
-
_selected_parent_sample_astrotab.write(in_filename, format='ipac', overwrite=True)
|
|
532
|
-
_done = False
|
|
533
|
-
|
|
534
|
-
while True:
|
|
535
|
-
if N_retries == 0:
|
|
536
|
-
raise RuntimeError('Failed with retries')
|
|
537
|
-
|
|
538
|
-
try:
|
|
539
|
-
# use Gator to query IRSA
|
|
540
|
-
success = self._run_gator_match(in_filename, out_filename, table_name, **gator_kwargs)
|
|
541
|
-
|
|
542
|
-
if not success:
|
|
543
|
-
# if not successful try again
|
|
544
|
-
logger.warning("no success, try again")
|
|
545
|
-
continue
|
|
546
|
-
|
|
547
|
-
# load the result file
|
|
548
|
-
gator_res = Table.read(out_filename, format='ipac')
|
|
549
|
-
logger.debug(f"found {len(gator_res)} results")
|
|
550
|
-
return gator_res
|
|
551
|
-
|
|
552
|
-
except ValueError:
|
|
553
|
-
# this will happen if the gator match returns an output containing the error message
|
|
554
|
-
# read and display error message, then try again
|
|
555
|
-
with open(out_filename, 'r') as f:
|
|
556
|
-
err_msg = f.read()
|
|
557
|
-
logger.warning(f"{err_msg}: try again")
|
|
558
|
-
|
|
559
|
-
finally:
|
|
560
|
-
N_retries -= 1
|
|
561
|
-
|
|
562
|
-
def _match_single_chunk(self, chunk_number, table_name, additional_columns=None):
|
|
563
|
-
"""
|
|
564
|
-
Match the parent sample to WISE
|
|
565
|
-
|
|
566
|
-
:param chunk_number: number of the declination chunk
|
|
567
|
-
:type chunk_number: int
|
|
568
|
-
:param table_name: optional, WISE table to match to, default is AllWISE Source Catalog
|
|
569
|
-
:type table_name: str,
|
|
570
|
-
:param additional_columns: optional, additional columns to be added to the parent sample
|
|
571
|
-
:type additional_columns: list
|
|
572
|
-
"""
|
|
573
|
-
|
|
574
|
-
dec_intervall_mask = self.chunk_map == chunk_number
|
|
575
|
-
logger.debug(f"Any selected: {np.any(dec_intervall_mask)}")
|
|
576
|
-
_parent_sample_declination_band_file = self.cache_dir / f"parent_sample_chunk{chunk_number}.xml"
|
|
577
|
-
_output_file = self.cache_dir / f"parent_sample_chunk{chunk_number}.tbl"
|
|
578
|
-
|
|
579
|
-
additional_keys = (
|
|
580
|
-
"," + ",".join(additional_columns)
|
|
581
|
-
if (additional_columns is not None) and (len(additional_columns) > 0)
|
|
582
|
-
else ""
|
|
583
|
-
)
|
|
584
|
-
|
|
585
|
-
gator_res = self._match_to_wise(
|
|
586
|
-
in_filename=_parent_sample_declination_band_file,
|
|
587
|
-
out_filename=_output_file,
|
|
588
|
-
mask=dec_intervall_mask,
|
|
589
|
-
table_name=table_name,
|
|
590
|
-
additional_keys=additional_keys,
|
|
591
|
-
)
|
|
592
|
-
|
|
593
|
-
for fn in [_parent_sample_declination_band_file, _output_file]:
|
|
594
|
-
try:
|
|
595
|
-
logger.debug(f"removing {fn}")
|
|
596
|
-
os.remove(fn)
|
|
597
|
-
except FileNotFoundError:
|
|
598
|
-
logger.warning(f"No File!!")
|
|
599
|
-
|
|
600
|
-
# insert the corresponding separation to the WISE source into the parent sample
|
|
601
|
-
self.parent_sample.df.loc[
|
|
602
|
-
dec_intervall_mask,
|
|
603
|
-
self.parent_sample_wise_skysep_key
|
|
604
|
-
] = list(gator_res["dist_x"])
|
|
605
|
-
|
|
606
|
-
# insert the corresponding WISE IDs into the parent sample
|
|
607
|
-
self.parent_sample.df.loc[
|
|
608
|
-
dec_intervall_mask,
|
|
609
|
-
self.parent_wise_source_id_key
|
|
610
|
-
] = list(gator_res["cntr"])
|
|
611
|
-
|
|
612
|
-
if len(additional_columns) > 0:
|
|
613
|
-
for col in additional_columns:
|
|
614
|
-
logger.debug(f"inserting {col}")
|
|
615
|
-
|
|
616
|
-
if col not in self.parent_sample.df.columns:
|
|
617
|
-
self.parent_sample.df[col] = np.nan
|
|
618
|
-
|
|
619
|
-
self.parent_sample.df.loc[
|
|
620
|
-
dec_intervall_mask,
|
|
621
|
-
col
|
|
622
|
-
] = list(gator_res[col])
|
|
623
|
-
|
|
624
|
-
_no_match_mask = self.parent_sample.df[self.parent_sample_wise_skysep_key].isna() & dec_intervall_mask
|
|
625
|
-
for k, default in self.parent_sample_default_entries.items():
|
|
626
|
-
self.parent_sample.df.loc[_no_match_mask, k] = default
|
|
627
|
-
|
|
628
|
-
def _get_dubplicated_wise_id_mask(self):
|
|
629
|
-
idf_sorted_sep = self.parent_sample.df.sort_values(self.parent_sample_wise_skysep_key)
|
|
630
|
-
idf_sorted_sep['duplicate'] = idf_sorted_sep[self.parent_wise_source_id_key].duplicated(keep='first')
|
|
631
|
-
idf_sorted_sep.sort_index(inplace=True)
|
|
632
|
-
_inf_mask = idf_sorted_sep[self.parent_sample_wise_skysep_key] < np.inf
|
|
633
|
-
_dupe_mask = idf_sorted_sep['duplicate'] & (_inf_mask)
|
|
634
|
-
if np.any(_dupe_mask):
|
|
635
|
-
_N_dupe = len(self.parent_sample.df[_dupe_mask])
|
|
636
|
-
logger.info(f"{_N_dupe} duplicated entries in parent sample")
|
|
637
|
-
return _dupe_mask
|
|
638
|
-
|
|
639
|
-
###################################################
|
|
640
|
-
# END MATCH PARENT SAMPLE TO WISE SOURCES #
|
|
641
|
-
###########################################################################################################
|
|
642
|
-
|
|
643
|
-
###########################################################################################################
|
|
644
|
-
# START GET PHOTOMETRY DATA #
|
|
645
|
-
###################################
|
|
646
|
-
|
|
647
|
-
def get_photometric_data(self, tables=None, perc=1, service=None, nthreads=100,
|
|
648
|
-
chunks=None, overwrite=True, remove_chunks=False, query_type='positional',
|
|
649
|
-
skip_download=False, mask_by_position=False):
|
|
650
|
-
"""
|
|
651
|
-
Load photometric data from the IRSA server for the matched sample. The result will be saved under
|
|
652
|
-
|
|
653
|
-
</path/to/timewise/data/dir>/output/<base_name>/lightcurves/binned_lightcurves_<service>.json
|
|
654
|
-
|
|
655
|
-
If service is 'tap' then the process exists on the first call to give the jobs running on the IRSA
|
|
656
|
-
servers some time. The job infos are cached and loaded on the next function call. `timewise` will
|
|
657
|
-
then wait on the jobs to finish. If the process is terminated via the keyboard during the waiting
|
|
658
|
-
the TAP connections will also be cached to be resumed at a later time.
|
|
659
|
-
|
|
660
|
-
:param remove_chunks: remove single chunk files after binning
|
|
661
|
-
:type remove_chunks: bools
|
|
662
|
-
:param overwrite: overwrite already existing lightcurves and metadata
|
|
663
|
-
:type overwrite: bool
|
|
664
|
-
:param tables: WISE tables to use for photometry query, defaults to AllWISE and NOEWISER photometry
|
|
665
|
-
:type tables: str or list-like
|
|
666
|
-
:param perc: percentage of sources to load photometry for, default 1
|
|
667
|
-
:type perc: float
|
|
668
|
-
:param nthreads: max number of threads to launch
|
|
669
|
-
:type nthreads: int
|
|
670
|
-
:param service: either of 'gator' or 'tap', selects base on elements per chunk by default
|
|
671
|
-
:type service: str
|
|
672
|
-
:param chunks: containing indices of chunks to download
|
|
673
|
-
:type chunks: list-like
|
|
674
|
-
:param query_type: 'positional': query photometry based on distance from object, 'by_allwise_id': select all photometry points within a radius of 50 arcsec with the corresponding AllWISE ID
|
|
675
|
-
:type query_type: str
|
|
676
|
-
:param skip_download: if `True` skip downloading and only do binning
|
|
677
|
-
:type skip_download: bool
|
|
678
|
-
:param mask_by_position: if `True` mask single exposures that are too far away from the bulk
|
|
679
|
-
:type mask_by_position: bool
|
|
680
|
-
:return: The status of the processing
|
|
681
|
-
:rtype: bool
|
|
682
|
-
"""
|
|
683
|
-
|
|
684
|
-
mag = True
|
|
685
|
-
flux = True
|
|
686
|
-
|
|
687
|
-
if tables is None:
|
|
688
|
-
tables = [
|
|
689
|
-
'AllWISE Multiepoch Photometry Table',
|
|
690
|
-
'NEOWISE-R Single Exposure (L1b) Source Table'
|
|
691
|
-
]
|
|
692
|
-
|
|
693
|
-
if query_type not in self.query_types:
|
|
694
|
-
raise ValueError(f"Unknown query type {query_type}! Choose one of {self.query_types}")
|
|
695
|
-
|
|
696
|
-
if chunks is None:
|
|
697
|
-
chunks = list(range(round(int(self.n_chunks * perc))))
|
|
698
|
-
else:
|
|
699
|
-
cm = [c not in self.chunk_map for c in chunks]
|
|
700
|
-
if np.any(cm):
|
|
701
|
-
raise ValueError(f"Chunks {np.array(chunks)[cm]} are not in chunk map. "
|
|
702
|
-
f"Probably they are larger than the set chunk number of {self._n_chunks}")
|
|
703
|
-
|
|
704
|
-
if service is None:
|
|
705
|
-
elements_per_chunk = len(self.parent_sample.df) / self.n_chunks
|
|
706
|
-
service = 'tap' if elements_per_chunk > 300 else 'gator'
|
|
707
|
-
|
|
708
|
-
if (query_type == 'by_allwise_id') and (service == 'gator'):
|
|
709
|
-
raise ValueError(f"Query type 'by_allwise_id' only implemented for service 'tap'!")
|
|
710
|
-
|
|
711
|
-
if not skip_download:
|
|
712
|
-
|
|
713
|
-
logger.debug(f"Getting {perc * 100:.2f}% of lightcurve chunks ({len(chunks)}) via {service} "
|
|
714
|
-
f"in {'magnitude' if mag else ''} {'flux' if flux else ''} "
|
|
715
|
-
f"from {tables}")
|
|
716
|
-
|
|
717
|
-
if service == 'tap':
|
|
718
|
-
done = self._query_for_photometry(tables, chunks, mag, flux, nthreads, query_type)
|
|
719
|
-
if not done:
|
|
720
|
-
logger.info("Some TAP jobs still running. Exit here and resume later.")
|
|
721
|
-
return False
|
|
722
|
-
|
|
723
|
-
elif service == 'gator':
|
|
724
|
-
self._query_for_photometry_gator(tables, chunks, mag, flux, nthreads)
|
|
725
|
-
else:
|
|
726
|
-
raise ValueError(f"Unknown service {service}! Choose one of 'tap' or 'gator'")
|
|
727
|
-
|
|
728
|
-
else:
|
|
729
|
-
logger.info("skipping download, assume data is already downloaded.")
|
|
730
|
-
|
|
731
|
-
logger.info("Download done, processing lightcurves")
|
|
732
|
-
self._select_individual_lightcurves_and_bin(service=service, chunks=chunks, mask_by_position=mask_by_position)
|
|
733
|
-
for c in chunks:
|
|
734
|
-
self.calculate_metadata(service=service, chunk_number=c, overwrite=True)
|
|
735
|
-
|
|
736
|
-
self._combine_data_products(service=service, remove=remove_chunks, overwrite=overwrite)
|
|
737
|
-
|
|
738
|
-
return True
|
|
739
|
-
|
|
740
|
-
def _data_product_filename(self, service, chunk_number=None, jobID=None):
|
|
741
|
-
|
|
742
|
-
n = "timewise_data_product_"
|
|
743
|
-
|
|
744
|
-
if (chunk_number is None) and (jobID is None):
|
|
745
|
-
return self.lightcurve_dir / f"{n}{service}.json"
|
|
746
|
-
else:
|
|
747
|
-
fn = f"{n}{service}{self._split_chunk_key}{chunk_number}"
|
|
748
|
-
if (chunk_number is not None) and (jobID is None):
|
|
749
|
-
return self._cache_photometry_dir / (fn + ".json")
|
|
750
|
-
else:
|
|
751
|
-
return self._cache_photometry_dir / (fn + f"_{jobID}.json")
|
|
752
|
-
|
|
753
|
-
@staticmethod
|
|
754
|
-
def _verify_contains_lightcurves(data_product):
|
|
755
|
-
mask = ["timewise_lightcurve" in data.keys() for data in data_product.values()]
|
|
756
|
-
if not any(mask):
|
|
757
|
-
raise KeyError(f"'timewise_lightcurves' in none of the results."
|
|
758
|
-
f"Cluster job probably did not finish.")
|
|
759
|
-
|
|
760
|
-
def load_data_product(
|
|
761
|
-
self,
|
|
762
|
-
service,
|
|
763
|
-
chunk_number=None,
|
|
764
|
-
jobID=None,
|
|
765
|
-
return_filename=False,
|
|
766
|
-
verify_contains_lightcurves=False
|
|
767
|
-
):
|
|
768
|
-
"""
|
|
769
|
-
Load data product from disk
|
|
770
|
-
|
|
771
|
-
:param service: service used to download data ('tap' or 'gator')
|
|
772
|
-
:type service: str
|
|
773
|
-
:param chunk_number: chunk number to load, if None load combined file for this service
|
|
774
|
-
:type chunk_number: int, optional
|
|
775
|
-
:param jobID: jobID to load, if None load the combined file for this chunk
|
|
776
|
-
:type jobID: int, optional
|
|
777
|
-
:param return_filename: return filename of data product, defaults to False
|
|
778
|
-
:type return_filename: bool, optional
|
|
779
|
-
:param verify_contains_lightcurves: verify that the data product contains lightcurves, defaults to False
|
|
780
|
-
:type verify_contains_lightcurves: bool, optional
|
|
781
|
-
"""
|
|
782
|
-
fn = self._data_product_filename(service, chunk_number, jobID)
|
|
783
|
-
logger.debug(f"loading {fn}")
|
|
784
|
-
try:
|
|
785
|
-
with open(fn, "r") as f:
|
|
786
|
-
lcs = json.load(f)
|
|
787
|
-
|
|
788
|
-
if verify_contains_lightcurves:
|
|
789
|
-
try:
|
|
790
|
-
self._verify_contains_lightcurves(lcs)
|
|
791
|
-
except KeyError as e:
|
|
792
|
-
raise KeyError(f"{fn}: {e}")
|
|
793
|
-
|
|
794
|
-
if return_filename:
|
|
795
|
-
return lcs, fn
|
|
796
|
-
return lcs
|
|
797
|
-
except FileNotFoundError:
|
|
798
|
-
logger.warning(f"No file {fn}")
|
|
799
|
-
|
|
800
|
-
def _save_data_product(self, data_product, service, chunk_number=None, jobID=None, overwrite=False):
|
|
801
|
-
fn = self._data_product_filename(service, chunk_number, jobID)
|
|
802
|
-
logger.debug(f"saving {len(data_product)} new lightcurves to {fn}")
|
|
803
|
-
|
|
804
|
-
if fn == self._data_product_filename(service):
|
|
805
|
-
self._cached_final_products['lightcurves'][service] = data_product
|
|
806
|
-
|
|
807
|
-
if not overwrite:
|
|
808
|
-
try:
|
|
809
|
-
old_data_product = self.load_data_product(service=service, chunk_number=chunk_number, jobID=jobID)
|
|
810
|
-
logger.debug(f"Found {len(old_data_product)}. Combining")
|
|
811
|
-
data_product = data_product.update(old_data_product)
|
|
812
|
-
except FileNotFoundError as e:
|
|
813
|
-
logger.info(f"FileNotFoundError: {e}. Making new binned lightcurves.")
|
|
814
|
-
|
|
815
|
-
with open(fn, "w") as f:
|
|
816
|
-
json.dump(data_product, f, indent=4)
|
|
817
|
-
|
|
818
|
-
def _combine_data_products(
|
|
819
|
-
self,
|
|
820
|
-
service=None,
|
|
821
|
-
chunk_number=None,
|
|
822
|
-
remove=False,
|
|
823
|
-
overwrite=False
|
|
824
|
-
):
|
|
825
|
-
if not service:
|
|
826
|
-
logger.info("Combining all lightcuves collected with all services")
|
|
827
|
-
itr = ['service', ['gator', 'tap']]
|
|
828
|
-
kwargs = {}
|
|
829
|
-
elif chunk_number is None:
|
|
830
|
-
logger.info(f"Combining all lightcurves collected with {service}")
|
|
831
|
-
itr = ['chunk_number', range(self.n_chunks)]
|
|
832
|
-
kwargs = {'service': service}
|
|
833
|
-
elif chunk_number is not None:
|
|
834
|
-
logger.info(f"Combining all lightcurves collected with {service} for chunk {chunk_number}")
|
|
835
|
-
itr = ['jobID',
|
|
836
|
-
list(self.clusterJob_chunk_map.index[self.clusterJob_chunk_map.chunk_number == chunk_number])]
|
|
837
|
-
kwargs = {'service': service, 'chunk_number': chunk_number}
|
|
838
|
-
else:
|
|
839
|
-
raise NotImplementedError
|
|
840
|
-
|
|
841
|
-
lcs = None
|
|
842
|
-
fns = list()
|
|
843
|
-
missing_files = False
|
|
844
|
-
erroneous_files = False
|
|
845
|
-
for i in itr[1]:
|
|
846
|
-
kw = dict(kwargs)
|
|
847
|
-
kw[itr[0]] = i
|
|
848
|
-
kw['return_filename'] = True
|
|
849
|
-
kw["verify_contains_lightcurves"] = True
|
|
850
|
-
|
|
851
|
-
try:
|
|
852
|
-
res = self.load_data_product(**kw)
|
|
853
|
-
if res is not None:
|
|
854
|
-
ilcs, ifn = res
|
|
855
|
-
fns.append(ifn)
|
|
856
|
-
if isinstance(lcs, type(None)):
|
|
857
|
-
lcs = dict(ilcs)
|
|
858
|
-
else:
|
|
859
|
-
lcs.update(ilcs)
|
|
860
|
-
|
|
861
|
-
else:
|
|
862
|
-
missing_files = True
|
|
863
|
-
|
|
864
|
-
except KeyError as e:
|
|
865
|
-
logger.error(e)
|
|
866
|
-
erroneous_files = True
|
|
867
|
-
|
|
868
|
-
if missing_files:
|
|
869
|
-
logger.warning(f"Missing files for {service}")
|
|
870
|
-
|
|
871
|
-
if erroneous_files:
|
|
872
|
-
logger.warning(f"Erroneous files for {service}")
|
|
873
|
-
|
|
874
|
-
if erroneous_files or missing_files:
|
|
875
|
-
_chunk_str = f" for chunk {chunk_number}" if chunk_number is not None else ""
|
|
876
|
-
logger.warning(f"Not saving combined data product{_chunk_str}")
|
|
877
|
-
break
|
|
878
|
-
|
|
879
|
-
if not (erroneous_files or missing_files):
|
|
880
|
-
self._save_data_product(lcs, service=service, chunk_number=chunk_number, overwrite=overwrite)
|
|
881
|
-
|
|
882
|
-
if remove:
|
|
883
|
-
for fn in tqdm.tqdm(fns, desc="removing files"):
|
|
884
|
-
os.remove(fn)
|
|
885
|
-
|
|
886
|
-
return True
|
|
887
|
-
|
|
888
|
-
else:
|
|
889
|
-
return False
|
|
890
|
-
|
|
891
|
-
# ----------------------------------------------------------------------------------- #
|
|
892
|
-
# START using GATOR to get photometry #
|
|
893
|
-
# ------------------------------------------ #
|
|
894
|
-
|
|
895
|
-
def _gator_chunk_photometry_cache_filename(self, table_nice_name, chunk_number,
|
|
896
|
-
additional_neowise_query=False, gator_input=False):
|
|
897
|
-
table_name = self.get_db_name(table_nice_name)
|
|
898
|
-
_additional_neowise_query = '_neowise_gator' if additional_neowise_query else ''
|
|
899
|
-
_gator_input = '_gator_input' if gator_input else ''
|
|
900
|
-
_ending = '.xml' if gator_input else'.tbl'
|
|
901
|
-
fn = f"{self._cached_raw_photometry_prefix}_{table_name}{_additional_neowise_query}{_gator_input}" \
|
|
902
|
-
f"{self._split_chunk_key}{chunk_number}{_ending}"
|
|
903
|
-
return self._cache_photometry_dir / fn
|
|
904
|
-
|
|
905
|
-
def _thread_query_photometry_gator(self, chunk_number, table_name, mag, flux):
|
|
906
|
-
_infile = self._gator_chunk_photometry_cache_filename(table_name, chunk_number, gator_input=True)
|
|
907
|
-
_outfile = self._gator_chunk_photometry_cache_filename(table_name, chunk_number)
|
|
908
|
-
_nice_name = self.get_db_name(table_name, nice=True)
|
|
909
|
-
_additional_keys_list = ['mjd']
|
|
910
|
-
if mag:
|
|
911
|
-
_additional_keys_list += list(self.photometry_table_keymap[_nice_name]['mag'].keys())
|
|
912
|
-
if flux:
|
|
913
|
-
_additional_keys_list += list(self.photometry_table_keymap[_nice_name]['flux'].keys())
|
|
914
|
-
|
|
915
|
-
_additional_keys = "," + ",".join(_additional_keys_list)
|
|
916
|
-
_deci_mask = self.chunk_map == chunk_number
|
|
917
|
-
_mask = _deci_mask #& (~self._no_allwise_source)
|
|
918
|
-
|
|
919
|
-
res = self._match_to_wise(
|
|
920
|
-
in_filename=_infile,
|
|
921
|
-
out_filename=_outfile,
|
|
922
|
-
mask=_mask,
|
|
923
|
-
table_name=table_name,
|
|
924
|
-
one_to_one=False,
|
|
925
|
-
additional_keys=_additional_keys,
|
|
926
|
-
minsep_arcsec=self.min_sep.to('arcsec').value,
|
|
927
|
-
silent=True,
|
|
928
|
-
constraints=self.constraints
|
|
929
|
-
)
|
|
930
|
-
|
|
931
|
-
os.remove(_infile)
|
|
932
|
-
return res
|
|
933
|
-
|
|
934
|
-
def _gator_photometry_worker_thread(self):
|
|
935
|
-
while True:
|
|
936
|
-
try:
|
|
937
|
-
args = self.queue.get(block=False)
|
|
938
|
-
except (AttributeError, queue.Empty):
|
|
939
|
-
logger.debug('No more tasks, exiting')
|
|
940
|
-
break
|
|
941
|
-
logger.debug(f"{args}")
|
|
942
|
-
self._thread_query_photometry_gator(*args)
|
|
943
|
-
self.queue.task_done()
|
|
944
|
-
logger.info(f"{self.queue.qsize()} tasks remaining")
|
|
945
|
-
|
|
946
|
-
def _query_for_photometry_gator(self, tables, chunks, mag, flux, nthreads):
|
|
947
|
-
nthreads = min(nthreads, len(chunks))
|
|
948
|
-
logger.debug(f'starting {nthreads} workers')
|
|
949
|
-
threads = [threading.Thread(target=self._gator_photometry_worker_thread) for _ in range(nthreads)]
|
|
950
|
-
|
|
951
|
-
logger.debug(f"using {len(chunks)} chunks")
|
|
952
|
-
self.queue = queue.Queue()
|
|
953
|
-
for t in np.atleast_1d(tables):
|
|
954
|
-
for i in chunks:
|
|
955
|
-
self.queue.put([i, t, mag, flux])
|
|
956
|
-
|
|
957
|
-
logger.info(f"added {self.queue.qsize()} tasks to queue")
|
|
958
|
-
for t in threads:
|
|
959
|
-
t.start()
|
|
960
|
-
self.queue.join()
|
|
961
|
-
self.queue = None
|
|
962
|
-
|
|
963
|
-
for t in threads:
|
|
964
|
-
t.join()
|
|
965
|
-
|
|
966
|
-
return True
|
|
967
|
-
|
|
968
|
-
def _get_unbinned_lightcurves_gator(self, chunk_number, clear=False):
|
|
969
|
-
# load only the files for this chunk
|
|
970
|
-
fns = [self._cache_photometry_dir / fn
|
|
971
|
-
for fn in os.listdir(self._cache_photometry_dir)
|
|
972
|
-
if (fn.startswith(self._cached_raw_photometry_prefix) and
|
|
973
|
-
fn.endswith(f"{self._split_chunk_key}{chunk_number}.tbl"))
|
|
974
|
-
]
|
|
975
|
-
|
|
976
|
-
logger.debug(f"chunk {chunk_number}: loading {len(fns)} files for chunk {chunk_number}")
|
|
977
|
-
|
|
978
|
-
_data = list()
|
|
979
|
-
for fn in fns:
|
|
980
|
-
data_table = Table.read(fn, format='ipac').to_pandas()
|
|
981
|
-
|
|
982
|
-
t = 'allwise_p3as_mep' if 'allwise' in str(fn) else 'neowiser_p1bs_psd'
|
|
983
|
-
nice_name = self.get_db_name(t, nice=True)
|
|
984
|
-
cols = {'index_01': self._tap_orig_id_key}
|
|
985
|
-
cols.update(self.photometry_table_keymap[nice_name]['mag'])
|
|
986
|
-
cols.update(self.photometry_table_keymap[nice_name]['flux'])
|
|
987
|
-
if 'allwise' in str(fn):
|
|
988
|
-
cols['cntr_mf'] = 'allwise_cntr'
|
|
989
|
-
|
|
990
|
-
data_table = data_table.rename(columns=cols)
|
|
991
|
-
_data.append(data_table)
|
|
992
|
-
|
|
993
|
-
if clear:
|
|
994
|
-
os.remove(fn)
|
|
995
|
-
|
|
996
|
-
lightcurves = pd.concat(_data)
|
|
997
|
-
return lightcurves
|
|
998
|
-
|
|
999
|
-
# ------------------------------------------ #
|
|
1000
|
-
# END using GATOR to get photometry #
|
|
1001
|
-
# ----------------------------------------------------------------------------------- #
|
|
1002
|
-
|
|
1003
|
-
# ----------------------------------------------------------------------------------- #
|
|
1004
|
-
# START using TAP to get photometry #
|
|
1005
|
-
# ---------------------------------------- #
|
|
1006
|
-
|
|
1007
|
-
@property
|
|
1008
|
-
def tap_cache_filenames(self):
|
|
1009
|
-
return (
|
|
1010
|
-
self.tap_jobs_cache_dir / f"tap_jobs.json",
|
|
1011
|
-
self.tap_jobs_cache_dir / f"queue.json"
|
|
1012
|
-
)
|
|
1013
|
-
|
|
1014
|
-
def dump_tap_cache(self):
|
|
1015
|
-
self.tap_jobs_cache_dir.mkdir(parents=True, exist_ok=True)
|
|
1016
|
-
|
|
1017
|
-
tap_jobs_fn, queue_fn = self.tap_cache_filenames
|
|
1018
|
-
logger.debug(f"saving TAP jobs to {tap_jobs_fn}")
|
|
1019
|
-
tap_jobs_fn.parent.mkdir(parents=True, exist_ok=True)
|
|
1020
|
-
with tap_jobs_fn.open("w") as f:
|
|
1021
|
-
json.dump(self.tap_jobs, f, indent=4)
|
|
1022
|
-
|
|
1023
|
-
queue_fn.parent.mkdir(parents=True, exist_ok=True)
|
|
1024
|
-
logger.debug(f"saving queue to {queue_fn}")
|
|
1025
|
-
with queue_fn.open("w") as f:
|
|
1026
|
-
json.dump(list(self.queue.queue), f, indent=4)
|
|
1027
|
-
|
|
1028
|
-
def load_tap_cache(self):
|
|
1029
|
-
tap_jobs_fn, queue_fn = self.tap_cache_filenames
|
|
1030
|
-
|
|
1031
|
-
logger.debug(f"loading TAP jobs from {tap_jobs_fn}")
|
|
1032
|
-
if tap_jobs_fn.is_file():
|
|
1033
|
-
with tap_jobs_fn.open("r") as f:
|
|
1034
|
-
tap_jobs_json = json.load(f)
|
|
1035
|
-
# JSON keys are always strings while we need the chunk numbers
|
|
1036
|
-
# to be integers in the dictionary
|
|
1037
|
-
self.tap_jobs = {
|
|
1038
|
-
t: {int(i): url for i, url in v.items()} for t, v in tap_jobs_json.items()
|
|
1039
|
-
}
|
|
1040
|
-
logger.debug(f"removing {tap_jobs_fn}")
|
|
1041
|
-
tap_jobs_fn.unlink()
|
|
1042
|
-
else:
|
|
1043
|
-
logger.warning(f"No file {tap_jobs_fn}")
|
|
1044
|
-
self.tap_jobs = None
|
|
1045
|
-
|
|
1046
|
-
logger.debug(f"loading queue from {queue_fn}")
|
|
1047
|
-
if queue_fn.is_file():
|
|
1048
|
-
with queue_fn.open("r") as f:
|
|
1049
|
-
ql = json.load(f)
|
|
1050
|
-
logger.debug(f"loaded {len(ql)} queue elements")
|
|
1051
|
-
self.queue = queue.Queue()
|
|
1052
|
-
for q in ql:
|
|
1053
|
-
self.queue.put(q)
|
|
1054
|
-
logger.debug(f"removing {queue_fn}")
|
|
1055
|
-
queue_fn.unlink()
|
|
1056
|
-
else:
|
|
1057
|
-
logger.warning(f"No file {queue_fn}")
|
|
1058
|
-
self.queue = None
|
|
1059
|
-
|
|
1060
|
-
cache_exists = (self.tap_jobs is not None) and (self.queue is not None)
|
|
1061
|
-
return cache_exists
|
|
1062
|
-
|
|
1063
|
-
def _get_photometry_query_string(self, table_name, mag, flux, query_type):
|
|
1064
|
-
"""
|
|
1065
|
-
Construct a query string to submit to IRSA
|
|
1066
|
-
:param table_name: str, table name
|
|
1067
|
-
:type table_name: str
|
|
1068
|
-
:return: str
|
|
1069
|
-
"""
|
|
1070
|
-
logger.debug(f"constructing query for {table_name}")
|
|
1071
|
-
db_name = self.get_db_name(table_name)
|
|
1072
|
-
nice_name = self.get_db_name(table_name, nice=True)
|
|
1073
|
-
id_key = 'cntr_mf' if 'allwise' in db_name else 'allwise_cntr'
|
|
1074
|
-
lum_keys = list()
|
|
1075
|
-
if mag:
|
|
1076
|
-
lum_keys += list(self.photometry_table_keymap[nice_name]['mag'].keys())
|
|
1077
|
-
if flux:
|
|
1078
|
-
lum_keys += list(self.photometry_table_keymap[nice_name]['flux'].keys())
|
|
1079
|
-
keys = ['ra', 'dec', 'mjd', id_key] + lum_keys
|
|
1080
|
-
_constraints = list(self.constraints)
|
|
1081
|
-
|
|
1082
|
-
q = 'SELECT \n\t'
|
|
1083
|
-
for k in keys:
|
|
1084
|
-
q += f'{db_name}.{k}, '
|
|
1085
|
-
q += f'\n\tmine.{self._tap_orig_id_key} \n'
|
|
1086
|
-
q += f'FROM\n\tTAP_UPLOAD.ids AS mine \n'
|
|
1087
|
-
|
|
1088
|
-
if query_type == 'positional':
|
|
1089
|
-
q += f'RIGHT JOIN\n\t{db_name} \n'
|
|
1090
|
-
radius = self.min_sep
|
|
1091
|
-
|
|
1092
|
-
if query_type == 'by_allwise_id':
|
|
1093
|
-
q += f'INNER JOIN\n\t{db_name} ON {db_name}.{id_key} = mine.{self._tap_wise_id_key} \n'
|
|
1094
|
-
radius = 15 * u.arcsec
|
|
1095
|
-
|
|
1096
|
-
q += 'WHERE \n'
|
|
1097
|
-
|
|
1098
|
-
if query_type == 'positional':
|
|
1099
|
-
q += f"\tCONTAINS(POINT('J2000',{db_name}.ra,{db_name}.dec)," \
|
|
1100
|
-
f"CIRCLE('J2000',mine.ra_in,mine.dec_in,{radius.to('deg').value}))=1 "
|
|
1101
|
-
|
|
1102
|
-
if len(_constraints) > 0:
|
|
1103
|
-
|
|
1104
|
-
if query_type == 'positional':
|
|
1105
|
-
q += ' AND (\n'
|
|
1106
|
-
|
|
1107
|
-
for c in _constraints:
|
|
1108
|
-
q += f'\t{db_name}.{c} AND \n'
|
|
1109
|
-
q = q.strip(" AND \n")
|
|
1110
|
-
|
|
1111
|
-
if query_type == 'positional':
|
|
1112
|
-
q += '\t)'
|
|
1113
|
-
|
|
1114
|
-
logger.debug(f"\n{q}")
|
|
1115
|
-
return q
|
|
1116
|
-
|
|
1117
|
-
def _submit_job_to_TAP(self, chunk_number, table_name, mag, flux, query_type):
|
|
1118
|
-
i = chunk_number
|
|
1119
|
-
t = table_name
|
|
1120
|
-
m = self.chunk_map == i
|
|
1121
|
-
|
|
1122
|
-
# if perc is smaller than one select only a subset of wise IDs
|
|
1123
|
-
sel = self.parent_sample.df[np.array(m)]
|
|
1124
|
-
|
|
1125
|
-
tab_d = dict()
|
|
1126
|
-
|
|
1127
|
-
tab_d[self._tap_orig_id_key] = np.array(sel.index).astype(int)
|
|
1128
|
-
tab_d['ra_in'] = np.array(sel[self.parent_sample.default_keymap['ra']]).astype(float)
|
|
1129
|
-
tab_d['dec_in'] = np.array(sel[self.parent_sample.default_keymap['dec']]).astype(float)
|
|
1130
|
-
|
|
1131
|
-
if query_type == 'by_allwise_id':
|
|
1132
|
-
tab_d[self._tap_wise_id_key] = np.array(sel[self.parent_wise_source_id_key]).astype(int)
|
|
1133
|
-
|
|
1134
|
-
del sel
|
|
1135
|
-
|
|
1136
|
-
logger.debug(f"{chunk_number}th query of {table_name}: uploading {len(list(tab_d.values())[0])} objects.")
|
|
1137
|
-
qstring = self._get_photometry_query_string(t, mag, flux, query_type)
|
|
1138
|
-
|
|
1139
|
-
N_tries = 5
|
|
1140
|
-
while True:
|
|
1141
|
-
if N_tries == 0:
|
|
1142
|
-
logger.warning("No more tries left!")
|
|
1143
|
-
raise vo.dal.exceptions.DALServiceError(f"Submission failed "
|
|
1144
|
-
f"for {i}th chunk "
|
|
1145
|
-
f"of {t} "
|
|
1146
|
-
f"after {N_tries} attempts")
|
|
1147
|
-
try:
|
|
1148
|
-
job = self.service.submit_job(qstring, uploads={'ids': Table(tab_d)})
|
|
1149
|
-
job.run()
|
|
1150
|
-
logger.debug(job.url)
|
|
1151
|
-
time.sleep(5) # wait a bit until checking phase
|
|
1152
|
-
|
|
1153
|
-
if isinstance(job.phase, type(None)):
|
|
1154
|
-
raise vo.dal.DALServiceError(
|
|
1155
|
-
f"Job submission failed. No phase!"
|
|
1156
|
-
f"response: {job.submit_response}"
|
|
1157
|
-
)
|
|
1158
|
-
|
|
1159
|
-
logger.info(f'submitted job for {t} for chunk {i}: ')
|
|
1160
|
-
logger.debug(f'Job: {job.url}; {job.phase}')
|
|
1161
|
-
self.tap_jobs[t][i] = job.url
|
|
1162
|
-
self.queue.put((t, i))
|
|
1163
|
-
break
|
|
1164
|
-
|
|
1165
|
-
except (
|
|
1166
|
-
requests.exceptions.ConnectionError,
|
|
1167
|
-
vo.dal.exceptions.DALServiceError,
|
|
1168
|
-
requests.HTTPError
|
|
1169
|
-
) as e:
|
|
1170
|
-
wait = 60
|
|
1171
|
-
N_tries -= 1
|
|
1172
|
-
logger.warning(f"{chunk_number}th query of {table_name}: Could not submit TAP job!\n"
|
|
1173
|
-
f"{e}. Waiting {wait}s and try again. {N_tries} tries left.")
|
|
1174
|
-
time.sleep(wait)
|
|
1175
|
-
|
|
1176
|
-
def _chunk_photometry_cache_filename(self, table_nice_name, chunk_number, additional_neowise_query=False):
|
|
1177
|
-
table_name = self.get_db_name(table_nice_name)
|
|
1178
|
-
_additional_neowise_query = '_neowise_gator' if additional_neowise_query else ''
|
|
1179
|
-
fn = f"{self._cached_raw_photometry_prefix}_{table_name}{_additional_neowise_query}" \
|
|
1180
|
-
f"{self._split_chunk_key}{chunk_number}.csv"
|
|
1181
|
-
return self._cache_photometry_dir / fn
|
|
1182
|
-
|
|
1183
|
-
@staticmethod
|
|
1184
|
-
def _give_up_tap(e):
|
|
1185
|
-
return ("Job is not active!" in str(e))
|
|
1186
|
-
|
|
1187
|
-
@backoff.on_exception(
|
|
1188
|
-
backoff.expo,
|
|
1189
|
-
vo.dal.exceptions.DALServiceError,
|
|
1190
|
-
giveup=_give_up_tap,
|
|
1191
|
-
max_tries=50,
|
|
1192
|
-
on_backoff=backoff_hndlr
|
|
1193
|
-
)
|
|
1194
|
-
def _thread_wait_and_get_results(self, t, i):
|
|
1195
|
-
logger.info(f"Waiting on {i}th query of {t} ........")
|
|
1196
|
-
|
|
1197
|
-
_job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
|
|
1198
|
-
_job.wait()
|
|
1199
|
-
logger.info(f'{i}th query of {t}: Done!')
|
|
1200
|
-
|
|
1201
|
-
lightcurve = _job.fetch_result().to_table().to_pandas()
|
|
1202
|
-
fn = self._chunk_photometry_cache_filename(t, i)
|
|
1203
|
-
logger.debug(f"{i}th query of {t}: saving under {fn}")
|
|
1204
|
-
|
|
1205
|
-
table_nice_name = self.get_db_name(t, nice=True)
|
|
1206
|
-
cols = dict(self.photometry_table_keymap[table_nice_name]['mag'])
|
|
1207
|
-
cols.update(self.photometry_table_keymap[table_nice_name]['flux'])
|
|
1208
|
-
|
|
1209
|
-
if 'allwise' in t:
|
|
1210
|
-
cols['cntr_mf'] = 'allwise_cntr'
|
|
1211
|
-
|
|
1212
|
-
lightcurve.rename(columns=cols).to_csv(fn)
|
|
1213
|
-
return
|
|
1214
|
-
|
|
1215
|
-
def _tap_photometry_worker_thread(self):
|
|
1216
|
-
while True:
|
|
1217
|
-
try:
|
|
1218
|
-
t, i = self.queue.get(block=False)
|
|
1219
|
-
except queue.Empty:
|
|
1220
|
-
logger.debug("No more tasks, exiting")
|
|
1221
|
-
break
|
|
1222
|
-
except AttributeError:
|
|
1223
|
-
logger.debug(f"No more queue. exiting")
|
|
1224
|
-
break
|
|
1225
|
-
|
|
1226
|
-
job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
|
|
1227
|
-
|
|
1228
|
-
_ntries = 10
|
|
1229
|
-
while True:
|
|
1230
|
-
try:
|
|
1231
|
-
job._update(timeout=600)
|
|
1232
|
-
phase = job._job.phase
|
|
1233
|
-
break
|
|
1234
|
-
except vo.dal.exceptions.DALServiceError as e:
|
|
1235
|
-
msg = f"{i}th query of {t}: DALServiceError: {e}; trying again in 6 min"
|
|
1236
|
-
if _ntries < 10:
|
|
1237
|
-
msg += f' ({_ntries} tries left)'
|
|
1238
|
-
|
|
1239
|
-
logger.warning(msg)
|
|
1240
|
-
time.sleep(60 * 6)
|
|
1241
|
-
if '404 Client Error: Not Found for url' in str(e):
|
|
1242
|
-
_ntries -= 1
|
|
1243
|
-
|
|
1244
|
-
if phase in self.running_tap_phases:
|
|
1245
|
-
self.queue.put((t, i))
|
|
1246
|
-
self.queue.task_done()
|
|
1247
|
-
|
|
1248
|
-
elif phase in self.done_tap_phases:
|
|
1249
|
-
self._thread_wait_and_get_results(t, i)
|
|
1250
|
-
self.queue.task_done()
|
|
1251
|
-
logger.info(f'{self.queue.qsize()} tasks left')
|
|
1252
|
-
|
|
1253
|
-
else:
|
|
1254
|
-
logger.warning(f'queue {i} of {t}: Job not active! Phase is {phase}')
|
|
1255
|
-
|
|
1256
|
-
time.sleep(np.random.uniform(60))
|
|
1257
|
-
|
|
1258
|
-
logger.debug("closing thread")
|
|
1259
|
-
|
|
1260
|
-
def _run_tap_worker_threads(self, nthreads):
|
|
1261
|
-
threads = [threading.Thread(target=self._tap_photometry_worker_thread)
|
|
1262
|
-
for _ in range(nthreads)]
|
|
1263
|
-
|
|
1264
|
-
for t in threads:
|
|
1265
|
-
t.start()
|
|
1266
|
-
|
|
1267
|
-
try:
|
|
1268
|
-
self.queue.join()
|
|
1269
|
-
logger.info('all tap_jobs done!')
|
|
1270
|
-
except KeyboardInterrupt:
|
|
1271
|
-
self.dump_tap_cache()
|
|
1272
|
-
return False
|
|
1273
|
-
finally:
|
|
1274
|
-
for i, t in enumerate(threads):
|
|
1275
|
-
logger.debug(f"{i}th thread alive: {t.is_alive()}")
|
|
1276
|
-
for t in threads:
|
|
1277
|
-
t.join()
|
|
1278
|
-
self.tap_jobs = None
|
|
1279
|
-
del threads
|
|
1280
|
-
|
|
1281
|
-
return True
|
|
1282
|
-
|
|
1283
|
-
def _query_for_photometry(self, tables, chunks, mag, flux, nthreads, query_type):
|
|
1284
|
-
# ----------------------------------------------------------------------
|
|
1285
|
-
# Load TAP cache if it exists
|
|
1286
|
-
# ----------------------------------------------------------------------
|
|
1287
|
-
cache_exists = self.load_tap_cache()
|
|
1288
|
-
|
|
1289
|
-
# ----------------------------------------------------------------------
|
|
1290
|
-
# Do the query
|
|
1291
|
-
# ----------------------------------------------------------------------
|
|
1292
|
-
if not cache_exists:
|
|
1293
|
-
self.tap_jobs = dict()
|
|
1294
|
-
self.queue = queue.Queue() if self.queue is None else self.queue
|
|
1295
|
-
tables = np.atleast_1d(tables)
|
|
1296
|
-
|
|
1297
|
-
for t in tables:
|
|
1298
|
-
self.tap_jobs[t] = dict()
|
|
1299
|
-
for i in chunks:
|
|
1300
|
-
self._submit_job_to_TAP(i, t, mag, flux, query_type)
|
|
1301
|
-
time.sleep(5)
|
|
1302
|
-
|
|
1303
|
-
logger.info(f'added {self.queue.qsize()} tasks to queue')
|
|
1304
|
-
self.dump_tap_cache()
|
|
1305
|
-
logger.info(f"wait some time to give tap_jobs some time")
|
|
1306
|
-
return False
|
|
1307
|
-
|
|
1308
|
-
logger.info(f'starting worker threads to retrieve results, {self.queue.qsize()} tasks in queue')
|
|
1309
|
-
nthreads = min(len(tables) * len(chunks), nthreads)
|
|
1310
|
-
success = self._run_tap_worker_threads(nthreads)
|
|
1311
|
-
self.queue = None
|
|
1312
|
-
return success
|
|
1313
|
-
|
|
1314
|
-
# ----------------------------------------------------------------------
|
|
1315
|
-
# select individual lightcurves and bin
|
|
1316
|
-
# ----------------------------------------------------------------------
|
|
1317
|
-
|
|
1318
|
-
def _select_individual_lightcurves_and_bin(self, ncpu=35, service='tap', chunks=None, mask_by_position=False):
|
|
1319
|
-
logger.info('selecting individual lightcurves and bin ...')
|
|
1320
|
-
ncpu = min(self.n_chunks, ncpu)
|
|
1321
|
-
logger.debug(f"using {ncpu} CPUs")
|
|
1322
|
-
chunk_list = list(range(self.n_chunks)) if not chunks else chunks
|
|
1323
|
-
service_list = [service] * len(chunk_list)
|
|
1324
|
-
jobID_list = [None] * len(chunk_list)
|
|
1325
|
-
pos_mask_list = [mask_by_position] * len(chunk_list)
|
|
1326
|
-
logger.debug(f"multiprocessing arguments: chunks: {chunk_list}, service: {service_list}")
|
|
1327
|
-
|
|
1328
|
-
while True:
|
|
1329
|
-
try:
|
|
1330
|
-
logger.debug(f'trying with {ncpu}')
|
|
1331
|
-
p = mp.Pool(ncpu)
|
|
1332
|
-
break
|
|
1333
|
-
except OSError as e:
|
|
1334
|
-
logger.warning(e)
|
|
1335
|
-
if ncpu == 1:
|
|
1336
|
-
break
|
|
1337
|
-
ncpu = int(round(ncpu - 1))
|
|
1338
|
-
|
|
1339
|
-
if ncpu > 1:
|
|
1340
|
-
r = list(
|
|
1341
|
-
tqdm.tqdm(
|
|
1342
|
-
p.starmap(
|
|
1343
|
-
self._subprocess_select_and_bin,
|
|
1344
|
-
zip(service_list, chunk_list, jobID_list, pos_mask_list)
|
|
1345
|
-
),
|
|
1346
|
-
total=self.n_chunks,
|
|
1347
|
-
desc='select and bin'
|
|
1348
|
-
)
|
|
1349
|
-
)
|
|
1350
|
-
p.close()
|
|
1351
|
-
p.join()
|
|
1352
|
-
else:
|
|
1353
|
-
r = list(map(self._subprocess_select_and_bin, service_list, chunk_list, jobID_list, pos_mask_list))
|
|
1354
|
-
|
|
1355
|
-
def get_unbinned_lightcurves(self, chunk_number, clear=False):
|
|
1356
|
-
"""
|
|
1357
|
-
Get the unbinned lightcurves for a given chunk number.
|
|
1358
|
-
|
|
1359
|
-
:param chunk_number: int
|
|
1360
|
-
:type chunk_number: int
|
|
1361
|
-
:param clear: remove files after loading, defaults to False
|
|
1362
|
-
:type clear: bool, optional
|
|
1363
|
-
"""
|
|
1364
|
-
# load only the files for this chunk
|
|
1365
|
-
fns = [self._cache_photometry_dir / fn
|
|
1366
|
-
for fn in os.listdir(self._cache_photometry_dir)
|
|
1367
|
-
if (fn.startswith(self._cached_raw_photometry_prefix) and fn.endswith(
|
|
1368
|
-
f"{self._split_chunk_key}{chunk_number}.csv"
|
|
1369
|
-
))]
|
|
1370
|
-
logger.debug(f"chunk {chunk_number}: loading {len(fns)} files for chunk {chunk_number}")
|
|
1371
|
-
|
|
1372
|
-
if len(fns) == 0:
|
|
1373
|
-
raise ValueError(f"No unbinned lightcurves found for chunk {chunk_number}!")
|
|
1374
|
-
|
|
1375
|
-
lightcurves = pd.concat([pd.read_csv(fn) for fn in fns]).reset_index()
|
|
1376
|
-
|
|
1377
|
-
if clear:
|
|
1378
|
-
for fn in fns:
|
|
1379
|
-
os.remove(fn)
|
|
1380
|
-
|
|
1381
|
-
return lightcurves
|
|
1382
|
-
|
|
1383
|
-
def _subprocess_select_and_bin(self, service, chunk_number=None, jobID=None, mask_by_position=False):
|
|
1384
|
-
# run through the ids and bin the lightcurves
|
|
1385
|
-
if service == 'tap':
|
|
1386
|
-
lightcurves = self.get_unbinned_lightcurves(chunk_number, clear=self.clear_unbinned_photometry_when_binning)
|
|
1387
|
-
elif service == 'gator':
|
|
1388
|
-
lightcurves = self._get_unbinned_lightcurves_gator(
|
|
1389
|
-
chunk_number,
|
|
1390
|
-
clear=self.clear_unbinned_photometry_when_binning
|
|
1391
|
-
)
|
|
1392
|
-
else:
|
|
1393
|
-
raise ValueError(f"Service {service} not known!")
|
|
1394
|
-
|
|
1395
|
-
if jobID:
|
|
1396
|
-
indices = np.where(self.cluster_jobID_map == jobID)[0]
|
|
1397
|
-
else:
|
|
1398
|
-
indices = lightcurves[self._tap_orig_id_key].unique()
|
|
1399
|
-
|
|
1400
|
-
logger.debug(f"chunk {chunk_number}: going through {len(indices)} IDs")
|
|
1401
|
-
|
|
1402
|
-
data_product = self.load_data_product(service=service, chunk_number=chunk_number, jobID=jobID)
|
|
1403
|
-
|
|
1404
|
-
if data_product is None:
|
|
1405
|
-
logger.info(f"Starting data product for {len(indices)} indices.")
|
|
1406
|
-
data_product = self._start_data_product(parent_sample_indices=indices)
|
|
1407
|
-
|
|
1408
|
-
if mask_by_position:
|
|
1409
|
-
bad_indices = self.get_position_mask(service, chunk_number)
|
|
1410
|
-
else:
|
|
1411
|
-
bad_indices = None
|
|
1412
|
-
|
|
1413
|
-
for parent_sample_entry_id in tqdm.tqdm(indices, desc="binning"):
|
|
1414
|
-
m = lightcurves[self._tap_orig_id_key] == parent_sample_entry_id
|
|
1415
|
-
lightcurve = lightcurves[m]
|
|
1416
|
-
|
|
1417
|
-
if (bad_indices is not None) and (str(parent_sample_entry_id) in bad_indices):
|
|
1418
|
-
pos_m = ~lightcurve.index.isin(bad_indices[str(parent_sample_entry_id)])
|
|
1419
|
-
lightcurve = lightcurve[pos_m]
|
|
1420
|
-
|
|
1421
|
-
if len(lightcurve) < 1:
|
|
1422
|
-
logger.warning(f"No data for {parent_sample_entry_id}")
|
|
1423
|
-
continue
|
|
1424
|
-
|
|
1425
|
-
binned_lc = self.bin_lightcurve(lightcurve)
|
|
1426
|
-
data_product[str(int(parent_sample_entry_id))]["timewise_lightcurve"] = binned_lc.to_dict()
|
|
1427
|
-
|
|
1428
|
-
logger.debug(f"chunk {chunk_number}: saving {len(data_product.keys())} binned lcs")
|
|
1429
|
-
self._save_data_product(data_product, service=service, chunk_number=chunk_number, jobID=jobID, overwrite=True)
|
|
1430
|
-
|
|
1431
|
-
# ---------------------------------------- #
|
|
1432
|
-
# END using TAP to get photometry #
|
|
1433
|
-
# ----------------------------------------------------------------------------------- #
|
|
1434
|
-
|
|
1435
|
-
# ----------------------------------------------------------------------
|
|
1436
|
-
# bin lightcurves
|
|
1437
|
-
# ----------------------------------------------------------------------
|
|
1438
|
-
|
|
1439
|
-
@abc.abstractmethod
|
|
1440
|
-
def bin_lightcurve(self, lightcurve):
|
|
1441
|
-
"""
|
|
1442
|
-
Bins a lightcurve
|
|
1443
|
-
|
|
1444
|
-
:param lightcurve: The unbinned lightcurve
|
|
1445
|
-
:type lightcurve: pandas.DataFrame
|
|
1446
|
-
:return: the binned lightcurve
|
|
1447
|
-
:rtype: pd.DataFrame
|
|
1448
|
-
"""
|
|
1449
|
-
raise NotImplementedError
|
|
1450
|
-
|
|
1451
|
-
# ----------------------------------------------------------------------
|
|
1452
|
-
# bin lightcurves
|
|
1453
|
-
# ----------------------------------------------------------------------
|
|
1454
|
-
|
|
1455
|
-
# ----------------------------------------------------------------------------------- #
|
|
1456
|
-
# START converting to flux densities #
|
|
1457
|
-
# ---------------------------------------------------- #
|
|
1458
|
-
|
|
1459
|
-
def find_color_correction(self, w1_minus_w2):
|
|
1460
|
-
"""
|
|
1461
|
-
Find the color correction based on the W1-W2 color.
|
|
1462
|
-
See `this <https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux>`_
|
|
1463
|
-
|
|
1464
|
-
:param w1_minus_w2:
|
|
1465
|
-
:type w1_minus_w2: float
|
|
1466
|
-
:return: the color correction factor
|
|
1467
|
-
:rtype: float
|
|
1468
|
-
"""
|
|
1469
|
-
w1_minus_w2 = np.atleast_1d(w1_minus_w2)
|
|
1470
|
-
c = pd.DataFrame(columns=self.magnitude_zeropoints_corrections.columns)
|
|
1471
|
-
power_law_values = self.magnitude_zeropoints_corrections.loc[8:16]['[W1 - W2]']
|
|
1472
|
-
for w1mw2 in w1_minus_w2:
|
|
1473
|
-
dif = power_law_values - w1mw2
|
|
1474
|
-
i = abs(dif).argmin()
|
|
1475
|
-
c = c.append(self.magnitude_zeropoints_corrections.loc[i])
|
|
1476
|
-
return c
|
|
1477
|
-
|
|
1478
|
-
def vegamag_to_flux_density(self, vegamag, band, unit='mJy', color_correction=None):
|
|
1479
|
-
"""
|
|
1480
|
-
This converts the detector level brightness m in Mag_vega to a flux density F
|
|
1481
|
-
|
|
1482
|
-
F = (F_nu / f_c) * 10 ^ (-m / 2.5)
|
|
1483
|
-
|
|
1484
|
-
where F_nu is the zeropoint flux for the corresponding band and f_c a color correction factor.
|
|
1485
|
-
See `this <https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux>`_
|
|
1486
|
-
|
|
1487
|
-
:param vegamag:
|
|
1488
|
-
:type vegamag: float or numpy.ndarray
|
|
1489
|
-
:param band:
|
|
1490
|
-
:type band: str
|
|
1491
|
-
:param unit: unit to convert the flux density to
|
|
1492
|
-
:type unit: str
|
|
1493
|
-
:param color_correction: the colorcorection factor, if dict the keys have to be 'f_c("band")'
|
|
1494
|
-
:type color_correction: float or numpy.ndarray or dict
|
|
1495
|
-
:return: the flux densities
|
|
1496
|
-
:rtype: ndarray
|
|
1497
|
-
"""
|
|
1498
|
-
if not isinstance(color_correction, type(None)):
|
|
1499
|
-
key = f'f_c({band})'
|
|
1500
|
-
if key in color_correction:
|
|
1501
|
-
color_correction = color_correction[key]
|
|
1502
|
-
if len(color_correction) != len(vegamag):
|
|
1503
|
-
raise ValueError(f"\nLength of color corrections: {len(color_correction)}:\n{color_correction}; "
|
|
1504
|
-
f"\nLentgh of mags: {len(vegamag)}: \n{vegamag}")
|
|
1505
|
-
else:
|
|
1506
|
-
raise NotImplementedError(color_correction)
|
|
1507
|
-
|
|
1508
|
-
else:
|
|
1509
|
-
color_correction = 1
|
|
1510
|
-
|
|
1511
|
-
color_correction = np.array(color_correction)
|
|
1512
|
-
vegamag = np.array(vegamag)
|
|
1513
|
-
fd = self.magnitude_zeropoints['F_nu'][band].to(unit).value / color_correction * 10 ** (-vegamag / 2.5)
|
|
1514
|
-
if len(fd) != len(vegamag):
|
|
1515
|
-
raise ValueError(f"\nLength of flux densities: {len(fd)}:\n{fd}; "
|
|
1516
|
-
f"\nLentgh of mags: {len(vegamag)}: \n{vegamag}")
|
|
1517
|
-
|
|
1518
|
-
return np.array(list(fd))
|
|
1519
|
-
|
|
1520
|
-
def add_flux_density(self, lightcurve,
|
|
1521
|
-
mag_key, emag_key, mag_ul_key,
|
|
1522
|
-
f_key, ef_key, f_ul_key, do_color_correction=False):
|
|
1523
|
-
"""Adds flux densities to a lightcurves
|
|
1524
|
-
|
|
1525
|
-
:param lightcurve:
|
|
1526
|
-
:type lightcurve: pandas.DataFrame
|
|
1527
|
-
:param mag_key: the key in `lightcurve` that holds the magnitude
|
|
1528
|
-
:type mag_key: str
|
|
1529
|
-
:param emag_key: the key in `lightcurve` that holds the error of the magnitude
|
|
1530
|
-
:type emag_key: str
|
|
1531
|
-
:param mag_ul_key: the key in `lightcurve` that holds the upper limit for the magnitude
|
|
1532
|
-
:type mag_ul_key: str
|
|
1533
|
-
:param f_key: the key that will hold the flux density
|
|
1534
|
-
:type f_key: str
|
|
1535
|
-
:param ef_key: the key that will hold the flux density error
|
|
1536
|
-
:type ef_key: str
|
|
1537
|
-
:param f_ul_key: the key that will hold the flux density upper limit
|
|
1538
|
-
:type f_ul_key: str
|
|
1539
|
-
:param do_color_correction:
|
|
1540
|
-
:type do_color_correction: bool
|
|
1541
|
-
:return: the lightcurve with flux density
|
|
1542
|
-
:rtype: pandas.DataFrame
|
|
1543
|
-
"""
|
|
1544
|
-
|
|
1545
|
-
if isinstance(lightcurve, dict):
|
|
1546
|
-
lightcurve = pd.DataFrame.from_dict(lightcurve, orient='columns')
|
|
1547
|
-
|
|
1548
|
-
if do_color_correction:
|
|
1549
|
-
w1_minus_w2 = lightcurve[f"W1{mag_key}"] - lightcurve[f"W2{mag_key}"]
|
|
1550
|
-
f_c = self.find_color_correction(w1_minus_w2)
|
|
1551
|
-
else:
|
|
1552
|
-
f_c = None
|
|
1553
|
-
|
|
1554
|
-
for b in self.bands:
|
|
1555
|
-
mags = lightcurve[f'{b}{mag_key}']
|
|
1556
|
-
emags = lightcurve[f'{b}{emag_key}']
|
|
1557
|
-
|
|
1558
|
-
flux_densities = self.vegamag_to_flux_density(mags, band=b)
|
|
1559
|
-
upper_eflux_densities = self.vegamag_to_flux_density(mags - emags, band=b, color_correction=f_c)
|
|
1560
|
-
lower_eflux_densities = self.vegamag_to_flux_density(mags + emags, band=b, color_correction=f_c)
|
|
1561
|
-
eflux_densities = upper_eflux_densities - lower_eflux_densities
|
|
1562
|
-
|
|
1563
|
-
lightcurve[f'{b}{f_key}'] = flux_densities
|
|
1564
|
-
lightcurve[f'{b}{ef_key}'] = eflux_densities
|
|
1565
|
-
if mag_ul_key:
|
|
1566
|
-
lightcurve[f'{b}{f_ul_key}'] = lightcurve[f'{b}{mag_ul_key}']
|
|
1567
|
-
|
|
1568
|
-
return lightcurve
|
|
1569
|
-
|
|
1570
|
-
def add_flux_densities_to_saved_lightcurves(self, service):
|
|
1571
|
-
"""Adds flux densities to all downloaded lightcurves
|
|
1572
|
-
|
|
1573
|
-
:param service: The service with which the lightcurves were downloaded
|
|
1574
|
-
:type service: str
|
|
1575
|
-
"""
|
|
1576
|
-
data_product = self.load_data_product(service=service)
|
|
1577
|
-
for i, i_data_product in tqdm.tqdm(data_product.items(), desc='adding flux densities'):
|
|
1578
|
-
data_product[i]["timewise_lightcurve"] = self.add_flux_density(
|
|
1579
|
-
i_data_product["timewise_lightcurve"],
|
|
1580
|
-
mag_key=f'{self.mean_key}{self.mag_key_ext}',
|
|
1581
|
-
emag_key=f'{self.mag_key_ext}{self.rms_key}',
|
|
1582
|
-
mag_ul_key=f'{self.mag_key_ext}{self.upper_limit_key}',
|
|
1583
|
-
f_key=f'{self.mean_key}{self.flux_density_key_ext}',
|
|
1584
|
-
ef_key=f'{self.flux_density_key_ext}{self.rms_key}',
|
|
1585
|
-
f_ul_key=f'{self.flux_density_key_ext}{self.upper_limit_key}'
|
|
1586
|
-
).to_dict()
|
|
1587
|
-
self._save_data_product(data_product, service=service, overwrite=True)
|
|
1588
|
-
|
|
1589
|
-
# ---------------------------------------------------- #
|
|
1590
|
-
# END converting to flux densities #
|
|
1591
|
-
# ----------------------------------------------------------------------------------- #
|
|
1592
|
-
|
|
1593
|
-
# ----------------------------------------------------------------------------------- #
|
|
1594
|
-
# START converting to luminosity #
|
|
1595
|
-
# ---------------------------------------------------- #
|
|
1596
|
-
|
|
1597
|
-
def luminosity_from_flux_density(self, flux_density, band, distance=None, redshift=None,
|
|
1598
|
-
unit='erg s-1', flux_density_unit='mJy'):
|
|
1599
|
-
"""
|
|
1600
|
-
Converts a flux density into a luminosity
|
|
1601
|
-
|
|
1602
|
-
:param flux_density:
|
|
1603
|
-
:type flux_density: float or numpy.ndarray
|
|
1604
|
-
:param band:
|
|
1605
|
-
:type band: str
|
|
1606
|
-
:param distance: distance to source, if not given will use luminosity distance from redshift
|
|
1607
|
-
:type distance: astropy.Quantity
|
|
1608
|
-
:param redshift: redshift to use when calculating luminosity distance
|
|
1609
|
-
:type redshift: float
|
|
1610
|
-
:param unit: unit in which to give the luminosity, default is erg s-1 sm-2
|
|
1611
|
-
:type unit: str or astropy.unit
|
|
1612
|
-
:param flux_density_unit: unit in which the flux density is given, default is mJy
|
|
1613
|
-
:type flux_density_unit: str or astropy.unit
|
|
1614
|
-
:return: the resulting luminosities
|
|
1615
|
-
:rtype: float or ndarray
|
|
1616
|
-
"""
|
|
1617
|
-
|
|
1618
|
-
if not distance:
|
|
1619
|
-
if not redshift:
|
|
1620
|
-
raise ValueError('Either redshift or distance has to be given!')
|
|
1621
|
-
else:
|
|
1622
|
-
distance = Planck18.luminosity_distance(float(redshift))
|
|
1623
|
-
|
|
1624
|
-
F_nu = np.array(flux_density) * u.Unit(flux_density_unit) * 4 * np.pi * distance ** 2
|
|
1625
|
-
nu = constants.c / self.band_wavelengths[band]
|
|
1626
|
-
luminosity = F_nu * nu
|
|
1627
|
-
return luminosity.to(unit).value
|
|
1628
|
-
|
|
1629
|
-
def _add_luminosity(self, lightcurve, f_key, ef_key, f_ul_key, lum_key, elum_key, lum_ul_key, **lum_kwargs):
|
|
1630
|
-
for band in self.bands:
|
|
1631
|
-
fd = lightcurve[band + f_key]
|
|
1632
|
-
fd_e = lightcurve[band + ef_key]
|
|
1633
|
-
l = self.luminosity_from_flux_density(fd, band, **lum_kwargs)
|
|
1634
|
-
el = self.luminosity_from_flux_density(fd_e, band, **lum_kwargs)
|
|
1635
|
-
lightcurve[band + lum_key] = l
|
|
1636
|
-
lightcurve[band + elum_key] = el
|
|
1637
|
-
lightcurve[band + lum_ul_key] = lightcurve[band + f_ul_key]
|
|
1638
|
-
return lightcurve
|
|
1639
|
-
|
|
1640
|
-
def add_luminosity_to_saved_lightcurves(self, service, redshift_key=None, distance_key=None):
|
|
1641
|
-
"""Add luminosities to all lightcurves, calculated from flux densities and distance or redshift
|
|
1642
|
-
|
|
1643
|
-
:param service: the service with which the lightcurves were downloaded
|
|
1644
|
-
:type service: str
|
|
1645
|
-
:param redshift_key: the key in the parent sample data frame that holds the redshift info
|
|
1646
|
-
:type redshift_key: str
|
|
1647
|
-
:param distance_key: the key in the parent sample data frame that holds the distance info
|
|
1648
|
-
:type distance_key: str
|
|
1649
|
-
"""
|
|
1650
|
-
|
|
1651
|
-
if (not redshift_key) and (not distance_key):
|
|
1652
|
-
raise ValueError('Either distance key or redshift key has to be given!')
|
|
1653
|
-
|
|
1654
|
-
data_product = self.load_data_product(service=service)
|
|
1655
|
-
for i, i_data_product in tqdm.tqdm(data_product.items(), desc='adding luminosities'):
|
|
1656
|
-
parent_sample_idx = int(i.split('_')[0])
|
|
1657
|
-
info = self.parent_sample.df.loc[parent_sample_idx]
|
|
1658
|
-
|
|
1659
|
-
if distance_key:
|
|
1660
|
-
distance = info[distance_key]
|
|
1661
|
-
redshift = None
|
|
1662
|
-
else:
|
|
1663
|
-
distance = None
|
|
1664
|
-
redshift = info[redshift_key]
|
|
1665
|
-
|
|
1666
|
-
data_product[i]["timewise_lightcurve"] = self._add_luminosity(
|
|
1667
|
-
pd.DataFrame.from_dict(i_data_product["timewise_lightcurve"]),
|
|
1668
|
-
f_key = self.mean_key + self.flux_density_key_ext,
|
|
1669
|
-
ef_key = self.flux_density_key_ext + self.rms_key,
|
|
1670
|
-
f_ul_key = self.flux_density_key_ext + self.upper_limit_key,
|
|
1671
|
-
lum_key = self.mean_key + self.luminosity_key_ext,
|
|
1672
|
-
elum_key = self.luminosity_key_ext + self.rms_key,
|
|
1673
|
-
lum_ul_key= self.luminosity_key_ext + self.upper_limit_key,
|
|
1674
|
-
redshift = redshift,
|
|
1675
|
-
distance = distance
|
|
1676
|
-
).to_dict()
|
|
1677
|
-
self._save_data_product(data_product, service=service, overwrite=True)
|
|
1678
|
-
|
|
1679
|
-
# ---------------------------------------------------- #
|
|
1680
|
-
# END converting to luminosity #
|
|
1681
|
-
# ----------------------------------------------------------------------------------- #
|
|
1682
|
-
|
|
1683
|
-
#################################
|
|
1684
|
-
# END GET PHOTOMETRY DATA #
|
|
1685
|
-
###########################################################################################################
|
|
1686
|
-
|
|
1687
|
-
###########################################################################################################
|
|
1688
|
-
# START MAKE POSITIONAL MASK #
|
|
1689
|
-
#####################################
|
|
1690
|
-
|
|
1691
|
-
@staticmethod
|
|
1692
|
-
def calculate_position_mask(lightcurve, ra, dec, whitelist_region, return_all=False):
|
|
1693
|
-
"""
|
|
1694
|
-
Estimated the 90th percentile of the angular separations from the given position.
|
|
1695
|
-
Assuming a 2D-Gaussian, calculate the standard deviation for the 90th percentile.
|
|
1696
|
-
Keeps all datapoints within five times the standard deviation.
|
|
1697
|
-
|
|
1698
|
-
:param lightcurve: unstacked lightcurve
|
|
1699
|
-
:type lightcurve: pd.DataFrame
|
|
1700
|
-
:param ra: RA in degrees of the source
|
|
1701
|
-
:type ra: Sequence[float]
|
|
1702
|
-
:param dec: Dec in degrees of the source
|
|
1703
|
-
:type dec: Sequence[float]
|
|
1704
|
-
:param return_all: if True, return all info collected in the selection process
|
|
1705
|
-
:param whitelist_region: region in which to keep all datapoints [arcsec]
|
|
1706
|
-
:type whitelist_region: float
|
|
1707
|
-
:type return_all: bool, optional
|
|
1708
|
-
:return:
|
|
1709
|
-
positional mask (and result of the clustering algorithm and the mask for the closest allwise data
|
|
1710
|
-
if `return_all` is True)
|
|
1711
|
-
:rtype: list (`return_all` is False) or tuple (list, sklearn.cluster.HDBSCAN, list) (`return_all` is True)
|
|
1712
|
-
"""
|
|
1713
|
-
lc_ra_rad = np.deg2rad(lightcurve.ra.values)
|
|
1714
|
-
lc_dec_rad = np.deg2rad(lightcurve.dec.values)
|
|
1715
|
-
source_ra_rad = np.deg2rad(ra)
|
|
1716
|
-
source_dec_rad = np.deg2rad(dec)
|
|
1717
|
-
|
|
1718
|
-
# calculate separation and position angle
|
|
1719
|
-
_angular_separation = angular_separation(source_ra_rad, source_dec_rad, lc_ra_rad, lc_dec_rad)
|
|
1720
|
-
_position_angle = position_angle(source_ra_rad, source_dec_rad, lc_ra_rad, lc_dec_rad)
|
|
1721
|
-
|
|
1722
|
-
# The AllWISE multiframe pipeline detects sources on the deep coadded atlas images and then measures the sources
|
|
1723
|
-
# for all available single-exposure images in all bands simultaneously, while the NEOWISE magnitudes are
|
|
1724
|
-
# obtained by PSF fit to individual exposures directly. Effect: all allwise data points that belong to the same
|
|
1725
|
-
# object have the same position. We take only the closest one and treat it as one datapoint in the clustering.
|
|
1726
|
-
allwise_time_mask = lightcurve["mjd"] < 55594
|
|
1727
|
-
if any(allwise_time_mask):
|
|
1728
|
-
allwise_sep_min = np.min(_angular_separation[allwise_time_mask])
|
|
1729
|
-
closest_allwise_mask = (_angular_separation == allwise_sep_min) & allwise_time_mask
|
|
1730
|
-
closest_allwise_mask_first_entry = ~closest_allwise_mask.duplicated() & closest_allwise_mask
|
|
1731
|
-
|
|
1732
|
-
# the data we want to use is then the selected AllWISE datapoint and the NEOWISE-R data
|
|
1733
|
-
data_mask = closest_allwise_mask_first_entry | ~allwise_time_mask
|
|
1734
|
-
else:
|
|
1735
|
-
closest_allwise_mask_first_entry = closest_allwise_mask = None
|
|
1736
|
-
data_mask = np.ones_like(_angular_separation, dtype=bool)
|
|
1737
|
-
|
|
1738
|
-
# no matter which cluster they belong to, we want to keep all datapoints within 1 arcsec
|
|
1739
|
-
one_arcsec_mask = _angular_separation < np.radians(whitelist_region / 3600)
|
|
1740
|
-
selected_indices = set(lightcurve.index[data_mask & one_arcsec_mask])
|
|
1741
|
-
|
|
1742
|
-
# if there are more than one datapoints, we use a clustering algorithm to potentially find a cluster with
|
|
1743
|
-
# its center within 1 arcsec
|
|
1744
|
-
cluster_res = None
|
|
1745
|
-
if data_mask.sum() > 1:
|
|
1746
|
-
# instead of the polar coordinates separation and position angle we use cartesian coordinates because the
|
|
1747
|
-
# clustering algorithm works better with them
|
|
1748
|
-
cartesian_full = np.array([
|
|
1749
|
-
_angular_separation * np.cos(_position_angle),
|
|
1750
|
-
_angular_separation * np.sin(_position_angle)
|
|
1751
|
-
]).T
|
|
1752
|
-
cartesian = cartesian_full[data_mask]
|
|
1753
|
-
|
|
1754
|
-
# we are now ready to do the clustering
|
|
1755
|
-
cluster_distance_arcsec = 0.5 # distance of clusters to be considered as one [arcsec]
|
|
1756
|
-
cluster_res = HDBSCAN(
|
|
1757
|
-
store_centers="centroid",
|
|
1758
|
-
min_cluster_size=max(min(20, len(cartesian)), 2),
|
|
1759
|
-
allow_single_cluster=True,
|
|
1760
|
-
cluster_selection_epsilon=np.radians(cluster_distance_arcsec / 3600)
|
|
1761
|
-
).fit(cartesian)
|
|
1762
|
-
|
|
1763
|
-
# we select the closest cluster within 1 arcsec
|
|
1764
|
-
cluster_separations = np.sqrt(np.sum(cluster_res.centroids_ ** 2, axis=1))
|
|
1765
|
-
logger.debug(f"Found {len(cluster_separations)} clusters")
|
|
1766
|
-
|
|
1767
|
-
# if there is no cluster or no cluster within 1 arcsec,
|
|
1768
|
-
# only the datapoints within 1 arcsec are selected as we did above
|
|
1769
|
-
if len(cluster_separations) == 0:
|
|
1770
|
-
logger.debug("No cluster found. Selecting all noise datapoints within 1 arcsec.")
|
|
1771
|
-
elif min(cluster_separations) > np.radians(whitelist_region / 3600):
|
|
1772
|
-
logger.debug(f"Closest cluster is at {cluster_separations} arcsec")
|
|
1773
|
-
|
|
1774
|
-
# if there is a cluster within 1 arcsec, we select all datapoints belonging to that cluster
|
|
1775
|
-
# in addition to the datapoints within 1 arcsec
|
|
1776
|
-
else:
|
|
1777
|
-
closest_label = cluster_separations.argmin()
|
|
1778
|
-
selected_cluster_mask = cluster_res.labels_ == closest_label
|
|
1779
|
-
|
|
1780
|
-
# now we have to trace back the selected datapoints to the original lightcurve
|
|
1781
|
-
selected_indices |= set(lightcurve.index[data_mask][selected_cluster_mask])
|
|
1782
|
-
logger.debug(f"Selected {len(selected_indices)} datapoints")
|
|
1783
|
-
|
|
1784
|
-
# if the closest allwise source is selected, we also select all other detections belonging to that
|
|
1785
|
-
# source in the allwise period
|
|
1786
|
-
if (
|
|
1787
|
-
closest_allwise_mask_first_entry is not None
|
|
1788
|
-
and lightcurve.index[closest_allwise_mask_first_entry][0] in selected_indices
|
|
1789
|
-
):
|
|
1790
|
-
closest_allwise_mask_not_first = closest_allwise_mask & ~closest_allwise_mask_first_entry
|
|
1791
|
-
closest_allwise_indices_not_first = lightcurve.index[closest_allwise_mask_not_first]
|
|
1792
|
-
logger.debug(f"Adding remaining {len(closest_allwise_indices_not_first)} from AllWISE period")
|
|
1793
|
-
selected_indices |= set(closest_allwise_indices_not_first)
|
|
1794
|
-
|
|
1795
|
-
# because in most cases we will have more good indices than bad indices, we store the bad indices instead
|
|
1796
|
-
bad_indices = lightcurve.index[~lightcurve.index.isin(selected_indices)]
|
|
1797
|
-
|
|
1798
|
-
if return_all:
|
|
1799
|
-
return_closest_allwise_mask = list(closest_allwise_mask) if closest_allwise_mask is not None else None
|
|
1800
|
-
return list(bad_indices), cluster_res, data_mask, return_closest_allwise_mask
|
|
1801
|
-
else:
|
|
1802
|
-
return list(bad_indices)
|
|
1803
|
-
|
|
1804
|
-
def get_position_mask(self, service, chunk_number):
|
|
1805
|
-
"""
|
|
1806
|
-
Get the position mask for a chunk
|
|
1807
|
-
|
|
1808
|
-
:param service: The service that was used to download the data, either of `gator` or `tap`
|
|
1809
|
-
:type service: str
|
|
1810
|
-
:param chunk_number: chunk number
|
|
1811
|
-
:type chunk_number: int
|
|
1812
|
-
:returns: position masks
|
|
1813
|
-
:rtype: dict
|
|
1814
|
-
"""
|
|
1815
|
-
|
|
1816
|
-
logger.info(f"getting position masks for {service}, chunk {chunk_number}")
|
|
1817
|
-
fn = self.cache_dir / "position_masks" / f"{service}_chunk{chunk_number}.json"
|
|
1818
|
-
|
|
1819
|
-
if not fn.is_file():
|
|
1820
|
-
logger.debug(f"No file {fn}. Calculating position masks.")
|
|
1821
|
-
|
|
1822
|
-
if service == "tap":
|
|
1823
|
-
unbinned_lcs = self.get_unbinned_lightcurves(chunk_number)
|
|
1824
|
-
elif service == "gator":
|
|
1825
|
-
unbinned_lcs = self._get_unbinned_lightcurves_gator(chunk_number)
|
|
1826
|
-
else:
|
|
1827
|
-
raise ValueError(f"Service must be one of 'gator' or 'tap', not {service}!")
|
|
1828
|
-
|
|
1829
|
-
position_masks = dict()
|
|
1830
|
-
|
|
1831
|
-
for i in tqdm.tqdm(unbinned_lcs[self._tap_orig_id_key].unique(), "calculating position masks"):
|
|
1832
|
-
idt = self.parent_sample.df.index.dtype.type(i)
|
|
1833
|
-
ra = self.parent_sample.df.loc[idt, self.parent_sample.default_keymap["ra"]]
|
|
1834
|
-
dec = self.parent_sample.df.loc[idt, self.parent_sample.default_keymap["dec"]]
|
|
1835
|
-
id = self.parent_sample.df.loc[idt, self.parent_sample.default_keymap["id"]]
|
|
1836
|
-
lightcurve = unbinned_lcs[unbinned_lcs[self._tap_orig_id_key] == i]
|
|
1837
|
-
|
|
1838
|
-
logger.debug(f"calculating position mask for {id} ({ra}, {dec})")
|
|
1839
|
-
bad_indices = self.calculate_position_mask(
|
|
1840
|
-
lightcurve,
|
|
1841
|
-
ra,
|
|
1842
|
-
dec,
|
|
1843
|
-
self.whitelist_region.to("arcsec").value
|
|
1844
|
-
)
|
|
1845
|
-
if len(bad_indices) > 0:
|
|
1846
|
-
position_masks[str(i)] = bad_indices
|
|
1847
|
-
|
|
1848
|
-
fn.parent.mkdir(exist_ok=True, parents=True)
|
|
1849
|
-
with open(fn, "w") as f:
|
|
1850
|
-
json.dump(position_masks, f)
|
|
1851
|
-
|
|
1852
|
-
else:
|
|
1853
|
-
logger.debug(f"loading {fn}")
|
|
1854
|
-
with open(fn, "r") as f:
|
|
1855
|
-
position_masks = json.load(f)
|
|
1856
|
-
|
|
1857
|
-
return position_masks
|
|
1858
|
-
|
|
1859
|
-
#####################################
|
|
1860
|
-
# END MAKE POSITIONAL MASK #
|
|
1861
|
-
###########################################################################################################
|
|
1862
|
-
|
|
1863
|
-
###########################################################################################################
|
|
1864
|
-
# START MAKE PLOTTING FUNCTIONS #
|
|
1865
|
-
#####################################
|
|
1866
|
-
|
|
1867
|
-
def plot_lc(self, parent_sample_idx, service='tap', plot_unbinned=False, plot_binned=True,
|
|
1868
|
-
interactive=False, fn=None, ax=None, save=True, lum_key='flux_density', **kwargs):
|
|
1869
|
-
"""Make a pretty plot of a lightcurve
|
|
1870
|
-
|
|
1871
|
-
:param parent_sample_idx: The index in the parent sample of the lightcurve
|
|
1872
|
-
:type parent_sample_idx: int
|
|
1873
|
-
:param service: the service with which the lightcurves were downloaded
|
|
1874
|
-
:type service: str
|
|
1875
|
-
:param plot_unbinned: plot unbinned data
|
|
1876
|
-
:type plot_unbinned: bool
|
|
1877
|
-
:param plot_binned: plot binned lightcurve
|
|
1878
|
-
:type plot_binned: bool
|
|
1879
|
-
:param interactive: interactive mode
|
|
1880
|
-
:type interactive: bool
|
|
1881
|
-
:param fn: filename, defaults to </path/to/timewise/data/dir>/output/plots/<base_name>/<parent_sample_index>_<lum_key>.pdf
|
|
1882
|
-
:type fn: str
|
|
1883
|
-
:param ax: pre-existing matplotlib.Axis
|
|
1884
|
-
:param save: save the plot
|
|
1885
|
-
:type save: bool
|
|
1886
|
-
:param lum_key: the unit of luminosity to use in the plot, either of 'mag', 'flux_density' or 'luminosity'
|
|
1887
|
-
:param kwargs: any additional kwargs will be passed on to `matplotlib.pyplot.subplots()`
|
|
1888
|
-
:return: the matplotlib.Figure and matplotlib.Axes if `interactive=True`
|
|
1889
|
-
"""
|
|
1890
|
-
|
|
1891
|
-
logger.debug(f"loading binned lightcurves")
|
|
1892
|
-
data_product = self.load_data_product(service)
|
|
1893
|
-
_get_unbinned_lcs_fct = self.get_unbinned_lightcurves if service == 'tap' else self._get_unbinned_lightcurves_gator
|
|
1894
|
-
|
|
1895
|
-
wise_id = self.parent_sample.df.loc[int(parent_sample_idx), self.parent_wise_source_id_key]
|
|
1896
|
-
if isinstance(wise_id, float) and not np.isnan(wise_id):
|
|
1897
|
-
wise_id = int(wise_id)
|
|
1898
|
-
logger.debug(f"{wise_id} for {parent_sample_idx}")
|
|
1899
|
-
|
|
1900
|
-
lc = pd.DataFrame.from_dict(data_product[str(int(parent_sample_idx))]["timewise_lightcurve"])
|
|
1901
|
-
|
|
1902
|
-
if plot_unbinned:
|
|
1903
|
-
_chunk_number = self._get_chunk_number(parent_sample_index=parent_sample_idx)
|
|
1904
|
-
|
|
1905
|
-
if service == 'tap':
|
|
1906
|
-
unbinned_lcs = self.get_unbinned_lightcurves(_chunk_number)
|
|
1907
|
-
|
|
1908
|
-
else:
|
|
1909
|
-
unbinned_lcs = self._get_unbinned_lightcurves_gator(_chunk_number)
|
|
1910
|
-
|
|
1911
|
-
unbinned_lc = unbinned_lcs[unbinned_lcs[self._tap_orig_id_key] == int(parent_sample_idx)]
|
|
1912
|
-
|
|
1913
|
-
else:
|
|
1914
|
-
unbinned_lc = None
|
|
1915
|
-
|
|
1916
|
-
_lc = lc if plot_binned else None
|
|
1917
|
-
|
|
1918
|
-
if not fn:
|
|
1919
|
-
fn = self.plots_dir / f"{parent_sample_idx}_{lum_key}.pdf"
|
|
1920
|
-
|
|
1921
|
-
return self._plot_lc(lightcurve=_lc, unbinned_lc=unbinned_lc, interactive=interactive, fn=fn, ax=ax,
|
|
1922
|
-
save=save, lum_key=lum_key, **kwargs)
|
|
1923
|
-
|
|
1924
|
-
def _plot_lc(self, lightcurve=None, unbinned_lc=None, interactive=False, fn=None, ax=None, save=True,
|
|
1925
|
-
lum_key='flux_density', colors=None, **kwargs):
|
|
1926
|
-
|
|
1927
|
-
if not colors:
|
|
1928
|
-
colors = self.band_plot_colors
|
|
1929
|
-
|
|
1930
|
-
if not ax:
|
|
1931
|
-
fig, ax = plt.subplots(**kwargs)
|
|
1932
|
-
else:
|
|
1933
|
-
fig = plt.gcf()
|
|
1934
|
-
|
|
1935
|
-
for b in self.bands:
|
|
1936
|
-
try:
|
|
1937
|
-
if not isinstance(lightcurve, type(None)):
|
|
1938
|
-
ul_mask = np.array(lightcurve[f"{b}_{lum_key}{self.upper_limit_key}"]).astype(bool)
|
|
1939
|
-
ax.errorbar(lightcurve.mean_mjd[~ul_mask], lightcurve[f"{b}{self.mean_key}_{lum_key}"][~ul_mask],
|
|
1940
|
-
yerr=lightcurve[f"{b}_{lum_key}{self.rms_key}"][~ul_mask],
|
|
1941
|
-
label=b, ls='', marker='s', c=colors[b], markersize=4,
|
|
1942
|
-
markeredgecolor='k', ecolor='k', capsize=2)
|
|
1943
|
-
ax.scatter(lightcurve.mean_mjd[ul_mask], lightcurve[f"{b}{self.mean_key}_{lum_key}"][ul_mask],
|
|
1944
|
-
marker='v', c=colors[b], alpha=0.7, s=2)
|
|
1945
|
-
|
|
1946
|
-
if not isinstance(unbinned_lc, type(None)):
|
|
1947
|
-
m = ~unbinned_lc[f"{b}_{lum_key}"].isna()
|
|
1948
|
-
ul_mask = unbinned_lc[f"{b}_{lum_key}{self.error_key_ext}"].isna()
|
|
1949
|
-
|
|
1950
|
-
tot_m = m & ~ul_mask
|
|
1951
|
-
if np.any(tot_m):
|
|
1952
|
-
ax.errorbar(unbinned_lc.mjd[tot_m], unbinned_lc[f"{b}_{lum_key}"][tot_m],
|
|
1953
|
-
yerr=unbinned_lc[f"{b}_{lum_key}{self.error_key_ext}"][tot_m],
|
|
1954
|
-
label=f"{b} unbinned", ls='', marker='o', c=colors[b], markersize=4,
|
|
1955
|
-
alpha=0.3)
|
|
1956
|
-
|
|
1957
|
-
single_ul_m = m & ul_mask
|
|
1958
|
-
if np.any(single_ul_m):
|
|
1959
|
-
label = f"{b} unbinned upper limits" if not np.any(tot_m) else ""
|
|
1960
|
-
ax.scatter(unbinned_lc.mjd[single_ul_m], unbinned_lc[f"{b}_{lum_key}"][single_ul_m],
|
|
1961
|
-
marker="d", c=colors[b], alpha=0.3, s=1, label=label)
|
|
1962
|
-
|
|
1963
|
-
except KeyError as e:
|
|
1964
|
-
raise KeyError(f"Could not find brightness key {e}!")
|
|
1965
|
-
|
|
1966
|
-
if lum_key == 'mag':
|
|
1967
|
-
ylim = ax.get_ylim()
|
|
1968
|
-
ax.set_ylim(max(ylim), min(ylim))
|
|
1969
|
-
|
|
1970
|
-
ax.set_xlabel('MJD')
|
|
1971
|
-
ax.set_ylabel(lum_key)
|
|
1972
|
-
ax.legend()
|
|
1973
|
-
|
|
1974
|
-
if save:
|
|
1975
|
-
logger.debug(f"saving under {fn}")
|
|
1976
|
-
fig.savefig(fn)
|
|
1977
|
-
|
|
1978
|
-
if interactive:
|
|
1979
|
-
return fig, ax
|
|
1980
|
-
else:
|
|
1981
|
-
plt.close()
|
|
1982
|
-
|
|
1983
|
-
#####################################
|
|
1984
|
-
# END MAKE PLOTTING FUNCTIONS #
|
|
1985
|
-
###########################################################################################################
|
|
1986
|
-
|
|
1987
|
-
###########################################################################################################
|
|
1988
|
-
# START CALCULATE METADATA #
|
|
1989
|
-
#####################################
|
|
1990
|
-
|
|
1991
|
-
def calculate_metadata(self, service, chunk_number=None, jobID=None, overwrite=True):
|
|
1992
|
-
"""Calculates the metadata for all downloaded lightcurves.
|
|
1993
|
-
Results will be saved under
|
|
1994
|
-
|
|
1995
|
-
</path/to/timewise/data/dir>/output/<base_name>/lightcurves/metadata_<service>.json
|
|
1996
|
-
|
|
1997
|
-
:param service: the service with which the lightcurves were downloaded
|
|
1998
|
-
:type service: str
|
|
1999
|
-
:param chunk_number: the chunk number to use, default uses all chunks
|
|
2000
|
-
:type chunk_number: int
|
|
2001
|
-
:param jobID: the job ID to use, default uses all lightcurves
|
|
2002
|
-
:type jobID: int
|
|
2003
|
-
:param overwrite: overwrite existing metadata file
|
|
2004
|
-
:type overwrite: bool
|
|
2005
|
-
"""
|
|
2006
|
-
data_product = self.load_data_product(service, chunk_number, jobID)
|
|
2007
|
-
for ID, i_data_product in tqdm.tqdm(data_product.items(), desc="calculating metadata"):
|
|
2008
|
-
if "timewise_lightcurve" in i_data_product:
|
|
2009
|
-
lc = pd.DataFrame.from_dict(i_data_product["timewise_lightcurve"])
|
|
2010
|
-
metadata = self.calculate_metadata_single(lc)
|
|
2011
|
-
data_product[ID]["timewise_metadata"] = metadata
|
|
2012
|
-
|
|
2013
|
-
self._save_data_product(data_product, service, chunk_number, jobID, overwrite=overwrite)
|
|
2014
|
-
|
|
2015
|
-
@abc.abstractmethod
|
|
2016
|
-
def calculate_metadata_single(self, lcs):
|
|
2017
|
-
"""
|
|
2018
|
-
Calculates some properties of the lightcurves
|
|
2019
|
-
|
|
2020
|
-
:param lcs: the lightcurve
|
|
2021
|
-
:type lcs: pandas.DataFrame
|
|
2022
|
-
"""
|
|
2023
|
-
raise NotImplementedError
|
|
2024
|
-
|
|
2025
|
-
#####################################
|
|
2026
|
-
# END CALCULATE METADATA #
|
|
2027
|
-
###########################################################################################################
|