timewise 0.5.4__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. timewise/__init__.py +1 -5
  2. timewise/backend/__init__.py +6 -0
  3. timewise/backend/base.py +36 -0
  4. timewise/backend/filesystem.py +80 -0
  5. timewise/chunking.py +50 -0
  6. timewise/cli.py +117 -11
  7. timewise/config.py +34 -0
  8. timewise/io/__init__.py +1 -0
  9. timewise/io/config.py +64 -0
  10. timewise/io/download.py +302 -0
  11. timewise/io/stable_tap.py +121 -0
  12. timewise/plot/__init__.py +3 -0
  13. timewise/plot/diagnostic.py +242 -0
  14. timewise/plot/lightcurve.py +112 -0
  15. timewise/plot/panstarrs.py +260 -0
  16. timewise/plot/sdss.py +109 -0
  17. timewise/process/__init__.py +2 -0
  18. timewise/process/config.py +34 -0
  19. timewise/process/interface.py +143 -0
  20. timewise/process/keys.py +10 -0
  21. timewise/process/stacking.py +322 -0
  22. timewise/process/template.yml +49 -0
  23. timewise/query/__init__.py +6 -0
  24. timewise/query/base.py +45 -0
  25. timewise/query/positional.py +40 -0
  26. timewise/tables/__init__.py +10 -0
  27. timewise/tables/allwise_p3as_mep.py +22 -0
  28. timewise/tables/base.py +9 -0
  29. timewise/tables/neowiser_p1bs_psd.py +22 -0
  30. timewise/types.py +30 -0
  31. timewise/util/backoff.py +12 -0
  32. timewise/util/csv_utils.py +12 -0
  33. timewise/util/error_threading.py +70 -0
  34. timewise/util/visits.py +33 -0
  35. timewise-1.0.0a2.dist-info/METADATA +205 -0
  36. timewise-1.0.0a2.dist-info/RECORD +39 -0
  37. timewise-1.0.0a2.dist-info/entry_points.txt +3 -0
  38. timewise/big_parent_sample.py +0 -106
  39. timewise/config_loader.py +0 -157
  40. timewise/general.py +0 -52
  41. timewise/parent_sample_base.py +0 -89
  42. timewise/point_source_utils.py +0 -68
  43. timewise/utils.py +0 -558
  44. timewise/wise_bigdata_desy_cluster.py +0 -1407
  45. timewise/wise_data_base.py +0 -2027
  46. timewise/wise_data_by_visit.py +0 -672
  47. timewise/wise_flux_conversion_correction.dat +0 -19
  48. timewise-0.5.4.dist-info/METADATA +0 -56
  49. timewise-0.5.4.dist-info/RECORD +0 -17
  50. timewise-0.5.4.dist-info/entry_points.txt +0 -3
  51. {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/WHEEL +0 -0
  52. {timewise-0.5.4.dist-info → timewise-1.0.0a2.dist-info}/licenses/LICENSE +0 -0
@@ -1,2027 +0,0 @@
1
- import abc
2
- import sys
3
-
4
- import backoff
5
- import copy
6
- import json
7
- import logging
8
- import multiprocessing as mp
9
- import os
10
- import queue
11
- import requests
12
- import subprocess
13
- import threading
14
- import time
15
- import tqdm
16
- from pathlib import Path
17
-
18
- import astropy.units as u
19
- import matplotlib.pyplot as plt
20
- import numpy as np
21
- import pandas as pd
22
- import pyvo as vo
23
- from collections.abc import Sequence
24
- from astropy import constants
25
- from astropy.cosmology import Planck18
26
- from astropy.io import ascii
27
- from astropy.table import Table
28
- from astropy.coordinates.angle_utilities import angular_separation, position_angle
29
- from sklearn.cluster import HDBSCAN
30
-
31
- from timewise.general import get_directories, logger_format, backoff_hndlr
32
- from timewise.utils import StableAsyncTAPJob, StableTAPService
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
-
37
- class WISEDataBase(abc.ABC):
38
- """
39
- Base class for WISE Data
40
-
41
-
42
- :param parent_sample_class: class for parent sample
43
- :type parent_sample_class: `ParentSample` class
44
- :param base_name: unique name to determine storage directories
45
- :type base_name: str
46
- :param min_sep: query region around source for positional query
47
- :type min_sep: astropy.units.Quantity
48
- :param whitelist_region: region around source where all datapoints are accepted in positional query
49
- :type whitelist_region: astropy.units.Quantity
50
- :param n_chunks: number of chunks in declination
51
- :type n_chunks: int
52
- :param parent_wise_source_id_key: key for the WISE source ID in the parent sample
53
- :type parent_wise_source_id_key: str
54
- :param parent_sample_wise_skysep_key: key for the angular separation to the WISE source in the parent sample
55
- :type parent_sample_wise_skysep_key: str
56
- :param parent_sample_default_entries: default entries for the parent sample
57
- :type parent_sample_default_entries: dict
58
- :param cache_dir: directory for cached data
59
- :type cache_dir: Path
60
- :param cluster_dir: directory for cluster data
61
- :param cluster_log_dir: directory for cluster logs
62
- :type cluster_dir: Path
63
- :param output_dir: directory for output data
64
- :type output_dir: Path
65
- :param lightcurve_dir: directory for lightcurve data
66
- :type lightcurve_dir: Path
67
- :param plots_dir: directory for plots
68
- :type plots_dir: Path
69
- :param submit_file: file for cluster submission
70
- :type submit_file: Path
71
- :param tap_jobs: TAP job URLs
72
- :type tap_jobs: list[str]
73
- :param queue: queue for cluster jobs
74
- :type queue: multiprocessing.Queue
75
- :param clear_unbinned_photometry_when_binning: whether to clear unbinned photometry when binning
76
- :type clear_unbinned_photometry_when_binning: bool
77
- :param chunk_map: map of chunks
78
- :type chunk_map: np.ndarray
79
- :param service_url: URL of the TAP service
80
- :type service_url: str
81
- :param service: custom TAP service, making sure that the TAP jobs are stable
82
- :type service: `timewise.utils.StableTAPService`
83
- :param active_tap_phases: phases of TAP jobs that are still active
84
- :type active_tap_phases: set
85
- :param running_tap_phases: phases of TAP jobs that are still running
86
- :type running_tap_phases: list
87
- :param done_tap_phases: phases of TAP jobs that are done
88
- :type done_tap_phases: set
89
- :param query_types: query types
90
- :type query_types: list
91
- :param table_names: map nice and program table names of WISE data tables
92
- :type table_names: pd.DataFrame
93
- :param bands: WISE bands
94
- :type bands: list
95
- :param flux_key_ext: key extension for flux keys
96
- :type flux_key_ext: str
97
- :param flux_density_key_ext: key extension for flux density keys
98
- :type flux_density_key_ext: str
99
- :param mag_key_ext: key extension for magnitude keys
100
- :type mag_key_ext: str
101
- :param luminosity_key_ext: key extension for luminosity keys
102
- :type luminosity_key_ext: str
103
- :param error_key_ext: key extension for error keys
104
- :type error_key_ext: str
105
- :param band_plot_colors: plot colors for bands
106
- :type band_plot_colors: dict
107
- :param photometry_table_keymap:
108
- keymap for photometry tables, listing the column names for flux, mag etc for the different WISE data tables
109
- :type photometry_table_keymap: dict
110
- :param magnitude_zeropoints: magnitude zeropoints from `here <https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux>`_
111
- :type magnitude_zeropoints: dict
112
- :param constraints: constraints for TAP queries selecting good datapoints as explained in the explanatory supplements
113
- :type constraints: list
114
- :param parent_wise_source_id_key: key for the WISE source ID in the parent sample
115
- :type parent_wise_source_id_key: str
116
- :param parent_sample_wise_skysep_key: key for the angular separation to the WISE source in the parent sample
117
- :type parent_sample_wise_skysep_key: str
118
- """
119
-
120
- service_url = 'https://irsa.ipac.caltech.edu/TAP'
121
- service = StableTAPService(service_url)
122
- active_tap_phases = {"QUEUED", "EXECUTING", "RUN", "COMPLETED", "ERROR", "UNKNOWN"}
123
- running_tap_phases = ["QUEUED", "EXECUTING", "RUN"]
124
- done_tap_phases = {"COMPLETED", "ABORTED", "ERROR"}
125
-
126
- query_types = ['positional', 'by_allwise_id']
127
-
128
- table_names = pd.DataFrame([
129
- ('AllWISE Multiepoch Photometry Table', 'allwise_p3as_mep'),
130
- ('AllWISE Source Catalog', 'allwise_p3as_psd'),
131
- ('WISE 3-Band Cryo Single Exposure (L1b) Source Table', 'allsky_3band_p1bs_psd'),
132
- ('NEOWISE-R Single Exposure (L1b) Source Table', 'neowiser_p1bs_psd'),
133
- ('WISE All-Sky Source Catalog', 'allsky_4band_p3as_psd')
134
- ], columns=['nice_table_name', 'table_name'])
135
-
136
- bands = ['W1', 'W2']
137
- flux_key_ext = "_flux"
138
- flux_density_key_ext = "_flux_density"
139
- mag_key_ext = "_mag"
140
- luminosity_key_ext = "_luminosity"
141
- error_key_ext = "_error"
142
- band_plot_colors = {'W1': 'r', 'W2': 'b'}
143
-
144
- photometry_table_keymap = {
145
- 'AllWISE Multiepoch Photometry Table': {
146
- 'flux': {
147
- 'w1flux_ep': f'W1{flux_key_ext}',
148
- 'w1sigflux_ep': f'W1{flux_key_ext}{error_key_ext}',
149
- 'w2flux_ep': f'W2{flux_key_ext}',
150
- 'w2sigflux_ep': f'W2{flux_key_ext}{error_key_ext}'
151
- },
152
- 'mag': {
153
- 'w1mpro_ep': f'W1{mag_key_ext}',
154
- 'w1sigmpro_ep': f'W1{mag_key_ext}{error_key_ext}',
155
- 'w2mpro_ep': f'W2{mag_key_ext}',
156
- 'w2sigmpro_ep': f'W2{mag_key_ext}{error_key_ext}'
157
- }
158
- },
159
- 'NEOWISE-R Single Exposure (L1b) Source Table': {
160
- 'flux': {
161
- 'w1flux': f'W1{flux_key_ext}',
162
- 'w1sigflux': f'W1{flux_key_ext}{error_key_ext}',
163
- 'w2flux': f'W2{flux_key_ext}',
164
- 'w2sigflux': f'W2{flux_key_ext}{error_key_ext}'
165
- },
166
- 'mag': {
167
- 'w1mpro': f'W1{mag_key_ext}',
168
- 'w1sigmpro': f'W1{mag_key_ext}{error_key_ext}',
169
- 'w2mpro': f'W2{mag_key_ext}',
170
- 'w2sigmpro': f'W2{mag_key_ext}{error_key_ext}'
171
- }
172
- }
173
- }
174
-
175
- # zero points come from https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux
176
- # published in Jarret et al. (2011): https://ui.adsabs.harvard.edu/abs/2011ApJ...735..112J/abstract
177
- magnitude_zeropoints = {
178
- 'F_nu': {
179
- 'W1': 309.54 * u.Jy,
180
- 'W2': 171.787 * u.Jy
181
- },
182
- 'Fstar_nu': {
183
- 'W1': 306.682 * u.Jy,
184
- 'W2': 170.663 * u.Jy
185
- },
186
- 'Mag': {
187
- 'W1': 20.752,
188
- 'W2': 19.596
189
- }
190
- }
191
-
192
- aperture_corrections = {
193
- 'W1': 0.222,
194
- 'W2': 0.280
195
- }
196
-
197
- _this_dir = Path(__file__).absolute().parent
198
- magnitude_zeropoints_corrections = ascii.read(
199
- _this_dir / 'wise_flux_conversion_correction.dat',
200
- delimiter='\t'
201
- ).to_pandas()
202
-
203
- band_wavelengths = {
204
- 'W1': 3.368 * 1e-6 * u.m,
205
- 'W2': 4.618 * 1e-6 * u.m
206
- }
207
-
208
- constraints = [
209
- "nb < 2",
210
- "na < 1",
211
- "cc_flags like '00%'",
212
- "qi_fact >= 1",
213
- "saa_sep >= 5",
214
- "moon_masked like '00%'"
215
- ]
216
-
217
- parent_wise_source_id_key = 'AllWISE_id'
218
- parent_sample_wise_skysep_key = 'sep_to_WISE_source'
219
-
220
- def __init__(self,
221
- base_name: str,
222
- parent_sample_class,
223
- min_sep_arcsec,
224
- n_chunks):
225
- """
226
- Base class for WISE Data
227
-
228
- :param base_name: unique name to determine storage directories
229
- :type base_name: str
230
- :param parent_sample_class: class for parent sample
231
- :type parent_sample_class: `ParentSample` class
232
- :param min_sep_arcsec: query region around source for positional query
233
- :type min_sep_arcsec: float
234
- :param n_chunks: number of chunks in declination
235
- :type n_chunks: int
236
- :param tap_url_cache_name: TAP job URLs are stored here to be able to resume them
237
- :type tap_url_cache_name: str
238
- """
239
-
240
- #######################################################################################
241
- # START SET-UP #
242
- #########################
243
-
244
- self.parent_sample_class = parent_sample_class
245
- self.base_name = base_name
246
- self.min_sep = min_sep_arcsec * u.arcsec
247
- self.whitelist_region = 1 * u.arcsec
248
- self._n_chunks = n_chunks
249
-
250
- # --------------------------- vvvv set up parent sample vvvv --------------------------- #
251
- self.parent_wise_source_id_key = WISEDataBase.parent_wise_source_id_key
252
- self.parent_sample_wise_skysep_key = WISEDataBase.parent_sample_wise_skysep_key
253
- self.parent_sample_default_entries = {
254
- self.parent_wise_source_id_key: "",
255
- self.parent_sample_wise_skysep_key: np.inf
256
- }
257
-
258
- self._parent_sample = None
259
- self._no_allwise_source = None
260
- # --------------------------- ^^^^ set up parent sample ^^^^ --------------------------- #
261
-
262
- # set up directories
263
- directories = get_directories() # type: dict[str, Path]
264
- self.cache_dir = directories['cache_dir'] / base_name
265
- self._cache_photometry_dir = self.cache_dir / "photometry"
266
- self.cluster_dir = self.cache_dir / 'cluster'
267
- self.cluster_log_dir = self.cluster_dir / 'logs'
268
- self.output_dir = directories["output_dir"] / base_name
269
- self.lightcurve_dir = self.output_dir / "lightcurves"
270
- self.plots_dir = directories["plots_dir"] / base_name
271
- self.tap_jobs_cache_dir = self.cache_dir / 'tap_cache'
272
-
273
- for d in [self.cache_dir, self._cache_photometry_dir, self.cluster_dir, self.cluster_log_dir,
274
- self.output_dir, self.lightcurve_dir, self.plots_dir]:
275
- d.mkdir(parents=True, exist_ok=True)
276
-
277
- file_handler = logging.FileHandler(filename=self.cache_dir / 'log.err', mode="a")
278
- file_handler.setLevel("WARNING")
279
- file_handler.setFormatter(logger_format)
280
- logger.addHandler(file_handler)
281
-
282
- self.submit_file = self.cluster_dir / 'submit.txt'
283
-
284
- # set up result attributes
285
- self._split_chunk_key = '__chunk'
286
- self._cached_raw_photometry_prefix = 'raw_photometry'
287
- self.tap_jobs = None
288
- self.queue = None
289
- self.clear_unbinned_photometry_when_binning = False
290
- self._cached_final_products = {
291
- 'lightcurves': dict(),
292
- 'metadata': dict()
293
- }
294
-
295
- self._tap_wise_id_key = 'wise_id'
296
- self._tap_orig_id_key = 'orig_id'
297
-
298
- # Any class that wants to implement cluster operation has to use this variable
299
- # It specifies which chunks will be processed by which jobs
300
- self.cluster_jobID_map = None
301
-
302
- #########################
303
- # END SET-UP #
304
- #######################################################################################
305
-
306
- #######################################################################################
307
- # START CHUNK MASK #
308
- #########################
309
-
310
- self._chunk_map = None
311
- self.n_chunks = self._n_chunks
312
-
313
- @property
314
- def parent_sample(self):
315
- if self.parent_sample_class is None:
316
- raise ValueError("Can not load ParentSample because no parent sample class was given!")
317
-
318
- if self._parent_sample is None:
319
- self._parent_sample = self.parent_sample_class()
320
- for k, default in self.parent_sample_default_entries.items():
321
- if k not in self._parent_sample.df.columns:
322
- self.parent_sample.df[k] = default
323
-
324
- self._no_allwise_source = self._parent_sample.df[self.parent_sample_wise_skysep_key] == np.inf
325
-
326
- return self._parent_sample
327
-
328
- @property
329
- def n_chunks(self):
330
- return self._n_chunks
331
-
332
- @n_chunks.setter
333
- def n_chunks(self, value):
334
- """Sets the private variable _n_chunks"""
335
- self._n_chunks = value
336
- # if a new value is set, set _chunk_map to None to trigger re-evaluation
337
- if self._n_chunks != value:
338
- self._chunk_map = None
339
-
340
- @property
341
- def chunk_map(self):
342
-
343
- if self.parent_sample_class is None:
344
- raise ValueError("No parent sample given! Can not calculate chunk map!")
345
-
346
- if self._chunk_map is None:
347
- self._chunk_map = np.zeros(len(self.parent_sample.df))
348
- n_in_chunk = int(round(len(self._chunk_map) / self._n_chunks))
349
- for i in range(self._n_chunks):
350
- start_ind = i * n_in_chunk
351
- end_ind = start_ind + n_in_chunk
352
- self._chunk_map[start_ind:end_ind] = int(i)
353
-
354
- return self._chunk_map
355
-
356
- def _get_chunk_number(self, wise_id=None, parent_sample_index=None):
357
- if isinstance(wise_id, type(None)) and isinstance(parent_sample_index, type(None)):
358
- raise Exception
359
-
360
- if not isinstance(wise_id, type(None)):
361
- parent_sample_index = np.where(self.parent_sample.df[self.parent_wise_source_id_key] == int(wise_id))[0]
362
- logger.debug(f"wise ID {wise_id} at index {parent_sample_index}")
363
-
364
- loc = self.parent_sample.df.loc[int(parent_sample_index)].name
365
- iloc = self.parent_sample.df.index.get_loc(loc)
366
- _chunk_number = int(self.chunk_map[int(iloc)])
367
- logger.debug(f"chunk number is {_chunk_number} for {parent_sample_index}")
368
- return _chunk_number
369
-
370
- #########################
371
- # END CHUNK MASK #
372
- #######################################################################################
373
-
374
- def _start_data_product(self, parent_sample_indices):
375
-
376
- # get all rows in this chunk and columns, specified in the keymap
377
- parent_sample_sel = self.parent_sample.df.loc[
378
- parent_sample_indices,
379
- list(self.parent_sample.default_keymap.values())
380
- ]
381
-
382
- # invert the keymap to rename the columns
383
- inverse_keymap = {v: k for k, v in self.parent_sample.default_keymap.items()}
384
- parent_sample_sel.rename(columns=inverse_keymap, inplace=True)
385
- parent_sample_sel.set_index(parent_sample_sel.index.astype(str), inplace=True)
386
-
387
- # save to data_product
388
- data_product = parent_sample_sel.to_dict(orient="index")
389
-
390
- return data_product
391
-
392
- @staticmethod
393
- def get_db_name(table_name, nice=False):
394
- """
395
- Get the right table name
396
-
397
- :param table_name: str, table name
398
- :param nice: bool, whether to get the nice table name
399
- :return: str
400
- """
401
- source_column = 'nice_table_name' if not nice else 'table_name'
402
- target_column = 'table_name' if not nice else 'nice_table_name'
403
-
404
- m = WISEDataBase.table_names[source_column] == table_name
405
- if np.any(m):
406
- table_name = WISEDataBase.table_names[target_column][m].iloc[0]
407
- else:
408
- logger.debug(f"{table_name} not in Table. Assuming it is the right name already.")
409
- return table_name
410
-
411
- ###########################################################################################################
412
- # START MATCH PARENT SAMPLE TO WISE SOURCES #
413
- #####################################################
414
-
415
- def match_all_chunks(self,
416
- table_name="AllWISE Source Catalog",
417
- save_when_done=True,
418
- additional_columns=None):
419
- """
420
- Match the parent sample to a WISE catalogue and add the result to the parent sample.
421
-
422
- :param table_name: The name of the table you want to match against
423
- :type table_name: str
424
- :param save_when_done: save the parent sample dataframe with the matching info when done
425
- :type save_when_done: bool
426
- :param additional_columns: optional, additional columns to add to the matching table
427
- :type additional_columns: list
428
- :return:
429
- """
430
-
431
- logger.info(f'matching all chunks to {table_name}')
432
-
433
- if additional_columns is None:
434
- additional_columns = []
435
-
436
- for i in range(self.n_chunks):
437
- self._match_single_chunk(i, table_name, additional_columns)
438
-
439
- _dupe_mask = self._get_dubplicated_wise_id_mask()
440
-
441
- self._no_allwise_source = self.parent_sample.df[self.parent_sample_wise_skysep_key] == np.inf
442
- if np.any(self._no_allwise_source):
443
- logger.warning(f"{len(self.parent_sample.df[self._no_allwise_source])} of {len(self.parent_sample.df)} "
444
- f"entries without match!")
445
-
446
- if np.any(self._get_dubplicated_wise_id_mask()):
447
- logger.warning(self.parent_sample.df[self._get_dubplicated_wise_id_mask()])
448
-
449
- if save_when_done:
450
- self.parent_sample.save_local()
451
-
452
- def _run_gator_match(self, in_file, out_file, table_name,
453
- one_to_one=True, minsep_arcsec=None, additional_keys='', silent=False, constraints=None):
454
- _one_to_one = '-F one_to_one=1 ' if one_to_one else ''
455
- _minsep_arcsec = self.min_sep.to("arcsec").value if minsep_arcsec is None else minsep_arcsec
456
- _db_name = self.get_db_name(table_name)
457
- _silent = "-s " if silent else ""
458
- _constraints = '-F constraints="' + " and ".join(constraints).replace('%', '%%') + '" ' if constraints else ""
459
-
460
- if _db_name == "allwise_p3as_mep":
461
- _sigpos = _source_id = _des = ""
462
- _id_key = "cntr_mf,cntr"
463
- elif _db_name == "allsky_4band_p3as_psd":
464
- _sigpos = 'sigra,sigdec,'
465
- _source_id = "source_id,"
466
- _des = 'designation,'
467
- _id_key = 'cntr'
468
- else:
469
- _sigpos = 'sigra,sigdec,'
470
- _source_id = "source_id,"
471
- _des = 'designation,' if 'allwise' in _db_name else ''
472
- _id_key = 'cntr' if 'allwise' in _db_name else 'allwise_cntr,cntr'
473
-
474
- submit_cmd = f'curl ' \
475
- f'--connect-timeout 3600 ' \
476
- f'--max-time 3600 ' \
477
- f'{_silent}' \
478
- f'-o {out_file} ' \
479
- f'-F filename=@{in_file} ' \
480
- f'-F catalog={_db_name} ' \
481
- f'-F spatial=Upload ' \
482
- f'-F uradius={_minsep_arcsec} ' \
483
- f'-F outfmt=1 ' \
484
- f'{_one_to_one}' \
485
- f'{_constraints}' \
486
- f'-F selcols={_des}{_source_id}ra,dec,{_sigpos}{_id_key}{additional_keys} ' \
487
- f'"https://irsa.ipac.caltech.edu/cgi-bin/Gator/nph-query"'
488
-
489
- logger.debug(f'submit command: {submit_cmd}')
490
- N_tries = 10
491
- while True:
492
- try:
493
- process = subprocess.Popen(submit_cmd, stdout=subprocess.PIPE, shell=True)
494
- break
495
- except OSError as e:
496
- if N_tries < 1:
497
- raise OSError(e)
498
- logger.warning(f"{e}, retry")
499
- N_tries -= 1
500
-
501
- out_msg, err_msg = process.communicate()
502
- if out_msg:
503
- logger.info(out_msg.decode())
504
- if err_msg:
505
- logger.error(err_msg.decode())
506
- process.terminate()
507
- if Path(out_file).is_file():
508
- return 1
509
- else:
510
- return 0
511
-
512
- def _match_to_wise(
513
- self,
514
- in_filename,
515
- out_filename,
516
- mask,
517
- table_name,
518
- N_retries=10,
519
- **gator_kwargs
520
- ):
521
- ra_key = self.parent_sample.default_keymap["ra"]
522
- dec_key = self.parent_sample.default_keymap["dec"]
523
- selected_parent_sample = copy.copy(self.parent_sample.df.loc[mask, [ra_key, dec_key]])
524
- selected_parent_sample.rename(columns={dec_key: 'dec', ra_key: 'ra'}, inplace=True)
525
- logger.debug(f"{len(selected_parent_sample)} selected")
526
-
527
- # write to IPAC formatted table
528
- _selected_parent_sample_astrotab = Table.from_pandas(selected_parent_sample, index=True)
529
- logger.debug(f"writing {len(_selected_parent_sample_astrotab)} "
530
- f"objects to {in_filename}")
531
- _selected_parent_sample_astrotab.write(in_filename, format='ipac', overwrite=True)
532
- _done = False
533
-
534
- while True:
535
- if N_retries == 0:
536
- raise RuntimeError('Failed with retries')
537
-
538
- try:
539
- # use Gator to query IRSA
540
- success = self._run_gator_match(in_filename, out_filename, table_name, **gator_kwargs)
541
-
542
- if not success:
543
- # if not successful try again
544
- logger.warning("no success, try again")
545
- continue
546
-
547
- # load the result file
548
- gator_res = Table.read(out_filename, format='ipac')
549
- logger.debug(f"found {len(gator_res)} results")
550
- return gator_res
551
-
552
- except ValueError:
553
- # this will happen if the gator match returns an output containing the error message
554
- # read and display error message, then try again
555
- with open(out_filename, 'r') as f:
556
- err_msg = f.read()
557
- logger.warning(f"{err_msg}: try again")
558
-
559
- finally:
560
- N_retries -= 1
561
-
562
- def _match_single_chunk(self, chunk_number, table_name, additional_columns=None):
563
- """
564
- Match the parent sample to WISE
565
-
566
- :param chunk_number: number of the declination chunk
567
- :type chunk_number: int
568
- :param table_name: optional, WISE table to match to, default is AllWISE Source Catalog
569
- :type table_name: str,
570
- :param additional_columns: optional, additional columns to be added to the parent sample
571
- :type additional_columns: list
572
- """
573
-
574
- dec_intervall_mask = self.chunk_map == chunk_number
575
- logger.debug(f"Any selected: {np.any(dec_intervall_mask)}")
576
- _parent_sample_declination_band_file = self.cache_dir / f"parent_sample_chunk{chunk_number}.xml"
577
- _output_file = self.cache_dir / f"parent_sample_chunk{chunk_number}.tbl"
578
-
579
- additional_keys = (
580
- "," + ",".join(additional_columns)
581
- if (additional_columns is not None) and (len(additional_columns) > 0)
582
- else ""
583
- )
584
-
585
- gator_res = self._match_to_wise(
586
- in_filename=_parent_sample_declination_band_file,
587
- out_filename=_output_file,
588
- mask=dec_intervall_mask,
589
- table_name=table_name,
590
- additional_keys=additional_keys,
591
- )
592
-
593
- for fn in [_parent_sample_declination_band_file, _output_file]:
594
- try:
595
- logger.debug(f"removing {fn}")
596
- os.remove(fn)
597
- except FileNotFoundError:
598
- logger.warning(f"No File!!")
599
-
600
- # insert the corresponding separation to the WISE source into the parent sample
601
- self.parent_sample.df.loc[
602
- dec_intervall_mask,
603
- self.parent_sample_wise_skysep_key
604
- ] = list(gator_res["dist_x"])
605
-
606
- # insert the corresponding WISE IDs into the parent sample
607
- self.parent_sample.df.loc[
608
- dec_intervall_mask,
609
- self.parent_wise_source_id_key
610
- ] = list(gator_res["cntr"])
611
-
612
- if len(additional_columns) > 0:
613
- for col in additional_columns:
614
- logger.debug(f"inserting {col}")
615
-
616
- if col not in self.parent_sample.df.columns:
617
- self.parent_sample.df[col] = np.nan
618
-
619
- self.parent_sample.df.loc[
620
- dec_intervall_mask,
621
- col
622
- ] = list(gator_res[col])
623
-
624
- _no_match_mask = self.parent_sample.df[self.parent_sample_wise_skysep_key].isna() & dec_intervall_mask
625
- for k, default in self.parent_sample_default_entries.items():
626
- self.parent_sample.df.loc[_no_match_mask, k] = default
627
-
628
- def _get_dubplicated_wise_id_mask(self):
629
- idf_sorted_sep = self.parent_sample.df.sort_values(self.parent_sample_wise_skysep_key)
630
- idf_sorted_sep['duplicate'] = idf_sorted_sep[self.parent_wise_source_id_key].duplicated(keep='first')
631
- idf_sorted_sep.sort_index(inplace=True)
632
- _inf_mask = idf_sorted_sep[self.parent_sample_wise_skysep_key] < np.inf
633
- _dupe_mask = idf_sorted_sep['duplicate'] & (_inf_mask)
634
- if np.any(_dupe_mask):
635
- _N_dupe = len(self.parent_sample.df[_dupe_mask])
636
- logger.info(f"{_N_dupe} duplicated entries in parent sample")
637
- return _dupe_mask
638
-
639
- ###################################################
640
- # END MATCH PARENT SAMPLE TO WISE SOURCES #
641
- ###########################################################################################################
642
-
643
- ###########################################################################################################
644
- # START GET PHOTOMETRY DATA #
645
- ###################################
646
-
647
- def get_photometric_data(self, tables=None, perc=1, service=None, nthreads=100,
648
- chunks=None, overwrite=True, remove_chunks=False, query_type='positional',
649
- skip_download=False, mask_by_position=False):
650
- """
651
- Load photometric data from the IRSA server for the matched sample. The result will be saved under
652
-
653
- </path/to/timewise/data/dir>/output/<base_name>/lightcurves/binned_lightcurves_<service>.json
654
-
655
- If service is 'tap' then the process exists on the first call to give the jobs running on the IRSA
656
- servers some time. The job infos are cached and loaded on the next function call. `timewise` will
657
- then wait on the jobs to finish. If the process is terminated via the keyboard during the waiting
658
- the TAP connections will also be cached to be resumed at a later time.
659
-
660
- :param remove_chunks: remove single chunk files after binning
661
- :type remove_chunks: bools
662
- :param overwrite: overwrite already existing lightcurves and metadata
663
- :type overwrite: bool
664
- :param tables: WISE tables to use for photometry query, defaults to AllWISE and NOEWISER photometry
665
- :type tables: str or list-like
666
- :param perc: percentage of sources to load photometry for, default 1
667
- :type perc: float
668
- :param nthreads: max number of threads to launch
669
- :type nthreads: int
670
- :param service: either of 'gator' or 'tap', selects base on elements per chunk by default
671
- :type service: str
672
- :param chunks: containing indices of chunks to download
673
- :type chunks: list-like
674
- :param query_type: 'positional': query photometry based on distance from object, 'by_allwise_id': select all photometry points within a radius of 50 arcsec with the corresponding AllWISE ID
675
- :type query_type: str
676
- :param skip_download: if `True` skip downloading and only do binning
677
- :type skip_download: bool
678
- :param mask_by_position: if `True` mask single exposures that are too far away from the bulk
679
- :type mask_by_position: bool
680
- :return: The status of the processing
681
- :rtype: bool
682
- """
683
-
684
- mag = True
685
- flux = True
686
-
687
- if tables is None:
688
- tables = [
689
- 'AllWISE Multiepoch Photometry Table',
690
- 'NEOWISE-R Single Exposure (L1b) Source Table'
691
- ]
692
-
693
- if query_type not in self.query_types:
694
- raise ValueError(f"Unknown query type {query_type}! Choose one of {self.query_types}")
695
-
696
- if chunks is None:
697
- chunks = list(range(round(int(self.n_chunks * perc))))
698
- else:
699
- cm = [c not in self.chunk_map for c in chunks]
700
- if np.any(cm):
701
- raise ValueError(f"Chunks {np.array(chunks)[cm]} are not in chunk map. "
702
- f"Probably they are larger than the set chunk number of {self._n_chunks}")
703
-
704
- if service is None:
705
- elements_per_chunk = len(self.parent_sample.df) / self.n_chunks
706
- service = 'tap' if elements_per_chunk > 300 else 'gator'
707
-
708
- if (query_type == 'by_allwise_id') and (service == 'gator'):
709
- raise ValueError(f"Query type 'by_allwise_id' only implemented for service 'tap'!")
710
-
711
- if not skip_download:
712
-
713
- logger.debug(f"Getting {perc * 100:.2f}% of lightcurve chunks ({len(chunks)}) via {service} "
714
- f"in {'magnitude' if mag else ''} {'flux' if flux else ''} "
715
- f"from {tables}")
716
-
717
- if service == 'tap':
718
- done = self._query_for_photometry(tables, chunks, mag, flux, nthreads, query_type)
719
- if not done:
720
- logger.info("Some TAP jobs still running. Exit here and resume later.")
721
- return False
722
-
723
- elif service == 'gator':
724
- self._query_for_photometry_gator(tables, chunks, mag, flux, nthreads)
725
- else:
726
- raise ValueError(f"Unknown service {service}! Choose one of 'tap' or 'gator'")
727
-
728
- else:
729
- logger.info("skipping download, assume data is already downloaded.")
730
-
731
- logger.info("Download done, processing lightcurves")
732
- self._select_individual_lightcurves_and_bin(service=service, chunks=chunks, mask_by_position=mask_by_position)
733
- for c in chunks:
734
- self.calculate_metadata(service=service, chunk_number=c, overwrite=True)
735
-
736
- self._combine_data_products(service=service, remove=remove_chunks, overwrite=overwrite)
737
-
738
- return True
739
-
740
- def _data_product_filename(self, service, chunk_number=None, jobID=None):
741
-
742
- n = "timewise_data_product_"
743
-
744
- if (chunk_number is None) and (jobID is None):
745
- return self.lightcurve_dir / f"{n}{service}.json"
746
- else:
747
- fn = f"{n}{service}{self._split_chunk_key}{chunk_number}"
748
- if (chunk_number is not None) and (jobID is None):
749
- return self._cache_photometry_dir / (fn + ".json")
750
- else:
751
- return self._cache_photometry_dir / (fn + f"_{jobID}.json")
752
-
753
- @staticmethod
754
- def _verify_contains_lightcurves(data_product):
755
- mask = ["timewise_lightcurve" in data.keys() for data in data_product.values()]
756
- if not any(mask):
757
- raise KeyError(f"'timewise_lightcurves' in none of the results."
758
- f"Cluster job probably did not finish.")
759
-
760
- def load_data_product(
761
- self,
762
- service,
763
- chunk_number=None,
764
- jobID=None,
765
- return_filename=False,
766
- verify_contains_lightcurves=False
767
- ):
768
- """
769
- Load data product from disk
770
-
771
- :param service: service used to download data ('tap' or 'gator')
772
- :type service: str
773
- :param chunk_number: chunk number to load, if None load combined file for this service
774
- :type chunk_number: int, optional
775
- :param jobID: jobID to load, if None load the combined file for this chunk
776
- :type jobID: int, optional
777
- :param return_filename: return filename of data product, defaults to False
778
- :type return_filename: bool, optional
779
- :param verify_contains_lightcurves: verify that the data product contains lightcurves, defaults to False
780
- :type verify_contains_lightcurves: bool, optional
781
- """
782
- fn = self._data_product_filename(service, chunk_number, jobID)
783
- logger.debug(f"loading {fn}")
784
- try:
785
- with open(fn, "r") as f:
786
- lcs = json.load(f)
787
-
788
- if verify_contains_lightcurves:
789
- try:
790
- self._verify_contains_lightcurves(lcs)
791
- except KeyError as e:
792
- raise KeyError(f"{fn}: {e}")
793
-
794
- if return_filename:
795
- return lcs, fn
796
- return lcs
797
- except FileNotFoundError:
798
- logger.warning(f"No file {fn}")
799
-
800
- def _save_data_product(self, data_product, service, chunk_number=None, jobID=None, overwrite=False):
801
- fn = self._data_product_filename(service, chunk_number, jobID)
802
- logger.debug(f"saving {len(data_product)} new lightcurves to {fn}")
803
-
804
- if fn == self._data_product_filename(service):
805
- self._cached_final_products['lightcurves'][service] = data_product
806
-
807
- if not overwrite:
808
- try:
809
- old_data_product = self.load_data_product(service=service, chunk_number=chunk_number, jobID=jobID)
810
- logger.debug(f"Found {len(old_data_product)}. Combining")
811
- data_product = data_product.update(old_data_product)
812
- except FileNotFoundError as e:
813
- logger.info(f"FileNotFoundError: {e}. Making new binned lightcurves.")
814
-
815
- with open(fn, "w") as f:
816
- json.dump(data_product, f, indent=4)
817
-
818
- def _combine_data_products(
819
- self,
820
- service=None,
821
- chunk_number=None,
822
- remove=False,
823
- overwrite=False
824
- ):
825
- if not service:
826
- logger.info("Combining all lightcuves collected with all services")
827
- itr = ['service', ['gator', 'tap']]
828
- kwargs = {}
829
- elif chunk_number is None:
830
- logger.info(f"Combining all lightcurves collected with {service}")
831
- itr = ['chunk_number', range(self.n_chunks)]
832
- kwargs = {'service': service}
833
- elif chunk_number is not None:
834
- logger.info(f"Combining all lightcurves collected with {service} for chunk {chunk_number}")
835
- itr = ['jobID',
836
- list(self.clusterJob_chunk_map.index[self.clusterJob_chunk_map.chunk_number == chunk_number])]
837
- kwargs = {'service': service, 'chunk_number': chunk_number}
838
- else:
839
- raise NotImplementedError
840
-
841
- lcs = None
842
- fns = list()
843
- missing_files = False
844
- erroneous_files = False
845
- for i in itr[1]:
846
- kw = dict(kwargs)
847
- kw[itr[0]] = i
848
- kw['return_filename'] = True
849
- kw["verify_contains_lightcurves"] = True
850
-
851
- try:
852
- res = self.load_data_product(**kw)
853
- if res is not None:
854
- ilcs, ifn = res
855
- fns.append(ifn)
856
- if isinstance(lcs, type(None)):
857
- lcs = dict(ilcs)
858
- else:
859
- lcs.update(ilcs)
860
-
861
- else:
862
- missing_files = True
863
-
864
- except KeyError as e:
865
- logger.error(e)
866
- erroneous_files = True
867
-
868
- if missing_files:
869
- logger.warning(f"Missing files for {service}")
870
-
871
- if erroneous_files:
872
- logger.warning(f"Erroneous files for {service}")
873
-
874
- if erroneous_files or missing_files:
875
- _chunk_str = f" for chunk {chunk_number}" if chunk_number is not None else ""
876
- logger.warning(f"Not saving combined data product{_chunk_str}")
877
- break
878
-
879
- if not (erroneous_files or missing_files):
880
- self._save_data_product(lcs, service=service, chunk_number=chunk_number, overwrite=overwrite)
881
-
882
- if remove:
883
- for fn in tqdm.tqdm(fns, desc="removing files"):
884
- os.remove(fn)
885
-
886
- return True
887
-
888
- else:
889
- return False
890
-
891
- # ----------------------------------------------------------------------------------- #
892
- # START using GATOR to get photometry #
893
- # ------------------------------------------ #
894
-
895
- def _gator_chunk_photometry_cache_filename(self, table_nice_name, chunk_number,
896
- additional_neowise_query=False, gator_input=False):
897
- table_name = self.get_db_name(table_nice_name)
898
- _additional_neowise_query = '_neowise_gator' if additional_neowise_query else ''
899
- _gator_input = '_gator_input' if gator_input else ''
900
- _ending = '.xml' if gator_input else'.tbl'
901
- fn = f"{self._cached_raw_photometry_prefix}_{table_name}{_additional_neowise_query}{_gator_input}" \
902
- f"{self._split_chunk_key}{chunk_number}{_ending}"
903
- return self._cache_photometry_dir / fn
904
-
905
- def _thread_query_photometry_gator(self, chunk_number, table_name, mag, flux):
906
- _infile = self._gator_chunk_photometry_cache_filename(table_name, chunk_number, gator_input=True)
907
- _outfile = self._gator_chunk_photometry_cache_filename(table_name, chunk_number)
908
- _nice_name = self.get_db_name(table_name, nice=True)
909
- _additional_keys_list = ['mjd']
910
- if mag:
911
- _additional_keys_list += list(self.photometry_table_keymap[_nice_name]['mag'].keys())
912
- if flux:
913
- _additional_keys_list += list(self.photometry_table_keymap[_nice_name]['flux'].keys())
914
-
915
- _additional_keys = "," + ",".join(_additional_keys_list)
916
- _deci_mask = self.chunk_map == chunk_number
917
- _mask = _deci_mask #& (~self._no_allwise_source)
918
-
919
- res = self._match_to_wise(
920
- in_filename=_infile,
921
- out_filename=_outfile,
922
- mask=_mask,
923
- table_name=table_name,
924
- one_to_one=False,
925
- additional_keys=_additional_keys,
926
- minsep_arcsec=self.min_sep.to('arcsec').value,
927
- silent=True,
928
- constraints=self.constraints
929
- )
930
-
931
- os.remove(_infile)
932
- return res
933
-
934
- def _gator_photometry_worker_thread(self):
935
- while True:
936
- try:
937
- args = self.queue.get(block=False)
938
- except (AttributeError, queue.Empty):
939
- logger.debug('No more tasks, exiting')
940
- break
941
- logger.debug(f"{args}")
942
- self._thread_query_photometry_gator(*args)
943
- self.queue.task_done()
944
- logger.info(f"{self.queue.qsize()} tasks remaining")
945
-
946
- def _query_for_photometry_gator(self, tables, chunks, mag, flux, nthreads):
947
- nthreads = min(nthreads, len(chunks))
948
- logger.debug(f'starting {nthreads} workers')
949
- threads = [threading.Thread(target=self._gator_photometry_worker_thread) for _ in range(nthreads)]
950
-
951
- logger.debug(f"using {len(chunks)} chunks")
952
- self.queue = queue.Queue()
953
- for t in np.atleast_1d(tables):
954
- for i in chunks:
955
- self.queue.put([i, t, mag, flux])
956
-
957
- logger.info(f"added {self.queue.qsize()} tasks to queue")
958
- for t in threads:
959
- t.start()
960
- self.queue.join()
961
- self.queue = None
962
-
963
- for t in threads:
964
- t.join()
965
-
966
- return True
967
-
968
- def _get_unbinned_lightcurves_gator(self, chunk_number, clear=False):
969
- # load only the files for this chunk
970
- fns = [self._cache_photometry_dir / fn
971
- for fn in os.listdir(self._cache_photometry_dir)
972
- if (fn.startswith(self._cached_raw_photometry_prefix) and
973
- fn.endswith(f"{self._split_chunk_key}{chunk_number}.tbl"))
974
- ]
975
-
976
- logger.debug(f"chunk {chunk_number}: loading {len(fns)} files for chunk {chunk_number}")
977
-
978
- _data = list()
979
- for fn in fns:
980
- data_table = Table.read(fn, format='ipac').to_pandas()
981
-
982
- t = 'allwise_p3as_mep' if 'allwise' in str(fn) else 'neowiser_p1bs_psd'
983
- nice_name = self.get_db_name(t, nice=True)
984
- cols = {'index_01': self._tap_orig_id_key}
985
- cols.update(self.photometry_table_keymap[nice_name]['mag'])
986
- cols.update(self.photometry_table_keymap[nice_name]['flux'])
987
- if 'allwise' in str(fn):
988
- cols['cntr_mf'] = 'allwise_cntr'
989
-
990
- data_table = data_table.rename(columns=cols)
991
- _data.append(data_table)
992
-
993
- if clear:
994
- os.remove(fn)
995
-
996
- lightcurves = pd.concat(_data)
997
- return lightcurves
998
-
999
- # ------------------------------------------ #
1000
- # END using GATOR to get photometry #
1001
- # ----------------------------------------------------------------------------------- #
1002
-
1003
- # ----------------------------------------------------------------------------------- #
1004
- # START using TAP to get photometry #
1005
- # ---------------------------------------- #
1006
-
1007
- @property
1008
- def tap_cache_filenames(self):
1009
- return (
1010
- self.tap_jobs_cache_dir / f"tap_jobs.json",
1011
- self.tap_jobs_cache_dir / f"queue.json"
1012
- )
1013
-
1014
- def dump_tap_cache(self):
1015
- self.tap_jobs_cache_dir.mkdir(parents=True, exist_ok=True)
1016
-
1017
- tap_jobs_fn, queue_fn = self.tap_cache_filenames
1018
- logger.debug(f"saving TAP jobs to {tap_jobs_fn}")
1019
- tap_jobs_fn.parent.mkdir(parents=True, exist_ok=True)
1020
- with tap_jobs_fn.open("w") as f:
1021
- json.dump(self.tap_jobs, f, indent=4)
1022
-
1023
- queue_fn.parent.mkdir(parents=True, exist_ok=True)
1024
- logger.debug(f"saving queue to {queue_fn}")
1025
- with queue_fn.open("w") as f:
1026
- json.dump(list(self.queue.queue), f, indent=4)
1027
-
1028
- def load_tap_cache(self):
1029
- tap_jobs_fn, queue_fn = self.tap_cache_filenames
1030
-
1031
- logger.debug(f"loading TAP jobs from {tap_jobs_fn}")
1032
- if tap_jobs_fn.is_file():
1033
- with tap_jobs_fn.open("r") as f:
1034
- tap_jobs_json = json.load(f)
1035
- # JSON keys are always strings while we need the chunk numbers
1036
- # to be integers in the dictionary
1037
- self.tap_jobs = {
1038
- t: {int(i): url for i, url in v.items()} for t, v in tap_jobs_json.items()
1039
- }
1040
- logger.debug(f"removing {tap_jobs_fn}")
1041
- tap_jobs_fn.unlink()
1042
- else:
1043
- logger.warning(f"No file {tap_jobs_fn}")
1044
- self.tap_jobs = None
1045
-
1046
- logger.debug(f"loading queue from {queue_fn}")
1047
- if queue_fn.is_file():
1048
- with queue_fn.open("r") as f:
1049
- ql = json.load(f)
1050
- logger.debug(f"loaded {len(ql)} queue elements")
1051
- self.queue = queue.Queue()
1052
- for q in ql:
1053
- self.queue.put(q)
1054
- logger.debug(f"removing {queue_fn}")
1055
- queue_fn.unlink()
1056
- else:
1057
- logger.warning(f"No file {queue_fn}")
1058
- self.queue = None
1059
-
1060
- cache_exists = (self.tap_jobs is not None) and (self.queue is not None)
1061
- return cache_exists
1062
-
1063
- def _get_photometry_query_string(self, table_name, mag, flux, query_type):
1064
- """
1065
- Construct a query string to submit to IRSA
1066
- :param table_name: str, table name
1067
- :type table_name: str
1068
- :return: str
1069
- """
1070
- logger.debug(f"constructing query for {table_name}")
1071
- db_name = self.get_db_name(table_name)
1072
- nice_name = self.get_db_name(table_name, nice=True)
1073
- id_key = 'cntr_mf' if 'allwise' in db_name else 'allwise_cntr'
1074
- lum_keys = list()
1075
- if mag:
1076
- lum_keys += list(self.photometry_table_keymap[nice_name]['mag'].keys())
1077
- if flux:
1078
- lum_keys += list(self.photometry_table_keymap[nice_name]['flux'].keys())
1079
- keys = ['ra', 'dec', 'mjd', id_key] + lum_keys
1080
- _constraints = list(self.constraints)
1081
-
1082
- q = 'SELECT \n\t'
1083
- for k in keys:
1084
- q += f'{db_name}.{k}, '
1085
- q += f'\n\tmine.{self._tap_orig_id_key} \n'
1086
- q += f'FROM\n\tTAP_UPLOAD.ids AS mine \n'
1087
-
1088
- if query_type == 'positional':
1089
- q += f'RIGHT JOIN\n\t{db_name} \n'
1090
- radius = self.min_sep
1091
-
1092
- if query_type == 'by_allwise_id':
1093
- q += f'INNER JOIN\n\t{db_name} ON {db_name}.{id_key} = mine.{self._tap_wise_id_key} \n'
1094
- radius = 15 * u.arcsec
1095
-
1096
- q += 'WHERE \n'
1097
-
1098
- if query_type == 'positional':
1099
- q += f"\tCONTAINS(POINT('J2000',{db_name}.ra,{db_name}.dec)," \
1100
- f"CIRCLE('J2000',mine.ra_in,mine.dec_in,{radius.to('deg').value}))=1 "
1101
-
1102
- if len(_constraints) > 0:
1103
-
1104
- if query_type == 'positional':
1105
- q += ' AND (\n'
1106
-
1107
- for c in _constraints:
1108
- q += f'\t{db_name}.{c} AND \n'
1109
- q = q.strip(" AND \n")
1110
-
1111
- if query_type == 'positional':
1112
- q += '\t)'
1113
-
1114
- logger.debug(f"\n{q}")
1115
- return q
1116
-
1117
- def _submit_job_to_TAP(self, chunk_number, table_name, mag, flux, query_type):
1118
- i = chunk_number
1119
- t = table_name
1120
- m = self.chunk_map == i
1121
-
1122
- # if perc is smaller than one select only a subset of wise IDs
1123
- sel = self.parent_sample.df[np.array(m)]
1124
-
1125
- tab_d = dict()
1126
-
1127
- tab_d[self._tap_orig_id_key] = np.array(sel.index).astype(int)
1128
- tab_d['ra_in'] = np.array(sel[self.parent_sample.default_keymap['ra']]).astype(float)
1129
- tab_d['dec_in'] = np.array(sel[self.parent_sample.default_keymap['dec']]).astype(float)
1130
-
1131
- if query_type == 'by_allwise_id':
1132
- tab_d[self._tap_wise_id_key] = np.array(sel[self.parent_wise_source_id_key]).astype(int)
1133
-
1134
- del sel
1135
-
1136
- logger.debug(f"{chunk_number}th query of {table_name}: uploading {len(list(tab_d.values())[0])} objects.")
1137
- qstring = self._get_photometry_query_string(t, mag, flux, query_type)
1138
-
1139
- N_tries = 5
1140
- while True:
1141
- if N_tries == 0:
1142
- logger.warning("No more tries left!")
1143
- raise vo.dal.exceptions.DALServiceError(f"Submission failed "
1144
- f"for {i}th chunk "
1145
- f"of {t} "
1146
- f"after {N_tries} attempts")
1147
- try:
1148
- job = self.service.submit_job(qstring, uploads={'ids': Table(tab_d)})
1149
- job.run()
1150
- logger.debug(job.url)
1151
- time.sleep(5) # wait a bit until checking phase
1152
-
1153
- if isinstance(job.phase, type(None)):
1154
- raise vo.dal.DALServiceError(
1155
- f"Job submission failed. No phase!"
1156
- f"response: {job.submit_response}"
1157
- )
1158
-
1159
- logger.info(f'submitted job for {t} for chunk {i}: ')
1160
- logger.debug(f'Job: {job.url}; {job.phase}')
1161
- self.tap_jobs[t][i] = job.url
1162
- self.queue.put((t, i))
1163
- break
1164
-
1165
- except (
1166
- requests.exceptions.ConnectionError,
1167
- vo.dal.exceptions.DALServiceError,
1168
- requests.HTTPError
1169
- ) as e:
1170
- wait = 60
1171
- N_tries -= 1
1172
- logger.warning(f"{chunk_number}th query of {table_name}: Could not submit TAP job!\n"
1173
- f"{e}. Waiting {wait}s and try again. {N_tries} tries left.")
1174
- time.sleep(wait)
1175
-
1176
- def _chunk_photometry_cache_filename(self, table_nice_name, chunk_number, additional_neowise_query=False):
1177
- table_name = self.get_db_name(table_nice_name)
1178
- _additional_neowise_query = '_neowise_gator' if additional_neowise_query else ''
1179
- fn = f"{self._cached_raw_photometry_prefix}_{table_name}{_additional_neowise_query}" \
1180
- f"{self._split_chunk_key}{chunk_number}.csv"
1181
- return self._cache_photometry_dir / fn
1182
-
1183
- @staticmethod
1184
- def _give_up_tap(e):
1185
- return ("Job is not active!" in str(e))
1186
-
1187
- @backoff.on_exception(
1188
- backoff.expo,
1189
- vo.dal.exceptions.DALServiceError,
1190
- giveup=_give_up_tap,
1191
- max_tries=50,
1192
- on_backoff=backoff_hndlr
1193
- )
1194
- def _thread_wait_and_get_results(self, t, i):
1195
- logger.info(f"Waiting on {i}th query of {t} ........")
1196
-
1197
- _job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
1198
- _job.wait()
1199
- logger.info(f'{i}th query of {t}: Done!')
1200
-
1201
- lightcurve = _job.fetch_result().to_table().to_pandas()
1202
- fn = self._chunk_photometry_cache_filename(t, i)
1203
- logger.debug(f"{i}th query of {t}: saving under {fn}")
1204
-
1205
- table_nice_name = self.get_db_name(t, nice=True)
1206
- cols = dict(self.photometry_table_keymap[table_nice_name]['mag'])
1207
- cols.update(self.photometry_table_keymap[table_nice_name]['flux'])
1208
-
1209
- if 'allwise' in t:
1210
- cols['cntr_mf'] = 'allwise_cntr'
1211
-
1212
- lightcurve.rename(columns=cols).to_csv(fn)
1213
- return
1214
-
1215
- def _tap_photometry_worker_thread(self):
1216
- while True:
1217
- try:
1218
- t, i = self.queue.get(block=False)
1219
- except queue.Empty:
1220
- logger.debug("No more tasks, exiting")
1221
- break
1222
- except AttributeError:
1223
- logger.debug(f"No more queue. exiting")
1224
- break
1225
-
1226
- job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
1227
-
1228
- _ntries = 10
1229
- while True:
1230
- try:
1231
- job._update(timeout=600)
1232
- phase = job._job.phase
1233
- break
1234
- except vo.dal.exceptions.DALServiceError as e:
1235
- msg = f"{i}th query of {t}: DALServiceError: {e}; trying again in 6 min"
1236
- if _ntries < 10:
1237
- msg += f' ({_ntries} tries left)'
1238
-
1239
- logger.warning(msg)
1240
- time.sleep(60 * 6)
1241
- if '404 Client Error: Not Found for url' in str(e):
1242
- _ntries -= 1
1243
-
1244
- if phase in self.running_tap_phases:
1245
- self.queue.put((t, i))
1246
- self.queue.task_done()
1247
-
1248
- elif phase in self.done_tap_phases:
1249
- self._thread_wait_and_get_results(t, i)
1250
- self.queue.task_done()
1251
- logger.info(f'{self.queue.qsize()} tasks left')
1252
-
1253
- else:
1254
- logger.warning(f'queue {i} of {t}: Job not active! Phase is {phase}')
1255
-
1256
- time.sleep(np.random.uniform(60))
1257
-
1258
- logger.debug("closing thread")
1259
-
1260
- def _run_tap_worker_threads(self, nthreads):
1261
- threads = [threading.Thread(target=self._tap_photometry_worker_thread)
1262
- for _ in range(nthreads)]
1263
-
1264
- for t in threads:
1265
- t.start()
1266
-
1267
- try:
1268
- self.queue.join()
1269
- logger.info('all tap_jobs done!')
1270
- except KeyboardInterrupt:
1271
- self.dump_tap_cache()
1272
- return False
1273
- finally:
1274
- for i, t in enumerate(threads):
1275
- logger.debug(f"{i}th thread alive: {t.is_alive()}")
1276
- for t in threads:
1277
- t.join()
1278
- self.tap_jobs = None
1279
- del threads
1280
-
1281
- return True
1282
-
1283
- def _query_for_photometry(self, tables, chunks, mag, flux, nthreads, query_type):
1284
- # ----------------------------------------------------------------------
1285
- # Load TAP cache if it exists
1286
- # ----------------------------------------------------------------------
1287
- cache_exists = self.load_tap_cache()
1288
-
1289
- # ----------------------------------------------------------------------
1290
- # Do the query
1291
- # ----------------------------------------------------------------------
1292
- if not cache_exists:
1293
- self.tap_jobs = dict()
1294
- self.queue = queue.Queue() if self.queue is None else self.queue
1295
- tables = np.atleast_1d(tables)
1296
-
1297
- for t in tables:
1298
- self.tap_jobs[t] = dict()
1299
- for i in chunks:
1300
- self._submit_job_to_TAP(i, t, mag, flux, query_type)
1301
- time.sleep(5)
1302
-
1303
- logger.info(f'added {self.queue.qsize()} tasks to queue')
1304
- self.dump_tap_cache()
1305
- logger.info(f"wait some time to give tap_jobs some time")
1306
- return False
1307
-
1308
- logger.info(f'starting worker threads to retrieve results, {self.queue.qsize()} tasks in queue')
1309
- nthreads = min(len(tables) * len(chunks), nthreads)
1310
- success = self._run_tap_worker_threads(nthreads)
1311
- self.queue = None
1312
- return success
1313
-
1314
- # ----------------------------------------------------------------------
1315
- # select individual lightcurves and bin
1316
- # ----------------------------------------------------------------------
1317
-
1318
- def _select_individual_lightcurves_and_bin(self, ncpu=35, service='tap', chunks=None, mask_by_position=False):
1319
- logger.info('selecting individual lightcurves and bin ...')
1320
- ncpu = min(self.n_chunks, ncpu)
1321
- logger.debug(f"using {ncpu} CPUs")
1322
- chunk_list = list(range(self.n_chunks)) if not chunks else chunks
1323
- service_list = [service] * len(chunk_list)
1324
- jobID_list = [None] * len(chunk_list)
1325
- pos_mask_list = [mask_by_position] * len(chunk_list)
1326
- logger.debug(f"multiprocessing arguments: chunks: {chunk_list}, service: {service_list}")
1327
-
1328
- while True:
1329
- try:
1330
- logger.debug(f'trying with {ncpu}')
1331
- p = mp.Pool(ncpu)
1332
- break
1333
- except OSError as e:
1334
- logger.warning(e)
1335
- if ncpu == 1:
1336
- break
1337
- ncpu = int(round(ncpu - 1))
1338
-
1339
- if ncpu > 1:
1340
- r = list(
1341
- tqdm.tqdm(
1342
- p.starmap(
1343
- self._subprocess_select_and_bin,
1344
- zip(service_list, chunk_list, jobID_list, pos_mask_list)
1345
- ),
1346
- total=self.n_chunks,
1347
- desc='select and bin'
1348
- )
1349
- )
1350
- p.close()
1351
- p.join()
1352
- else:
1353
- r = list(map(self._subprocess_select_and_bin, service_list, chunk_list, jobID_list, pos_mask_list))
1354
-
1355
- def get_unbinned_lightcurves(self, chunk_number, clear=False):
1356
- """
1357
- Get the unbinned lightcurves for a given chunk number.
1358
-
1359
- :param chunk_number: int
1360
- :type chunk_number: int
1361
- :param clear: remove files after loading, defaults to False
1362
- :type clear: bool, optional
1363
- """
1364
- # load only the files for this chunk
1365
- fns = [self._cache_photometry_dir / fn
1366
- for fn in os.listdir(self._cache_photometry_dir)
1367
- if (fn.startswith(self._cached_raw_photometry_prefix) and fn.endswith(
1368
- f"{self._split_chunk_key}{chunk_number}.csv"
1369
- ))]
1370
- logger.debug(f"chunk {chunk_number}: loading {len(fns)} files for chunk {chunk_number}")
1371
-
1372
- if len(fns) == 0:
1373
- raise ValueError(f"No unbinned lightcurves found for chunk {chunk_number}!")
1374
-
1375
- lightcurves = pd.concat([pd.read_csv(fn) for fn in fns]).reset_index()
1376
-
1377
- if clear:
1378
- for fn in fns:
1379
- os.remove(fn)
1380
-
1381
- return lightcurves
1382
-
1383
- def _subprocess_select_and_bin(self, service, chunk_number=None, jobID=None, mask_by_position=False):
1384
- # run through the ids and bin the lightcurves
1385
- if service == 'tap':
1386
- lightcurves = self.get_unbinned_lightcurves(chunk_number, clear=self.clear_unbinned_photometry_when_binning)
1387
- elif service == 'gator':
1388
- lightcurves = self._get_unbinned_lightcurves_gator(
1389
- chunk_number,
1390
- clear=self.clear_unbinned_photometry_when_binning
1391
- )
1392
- else:
1393
- raise ValueError(f"Service {service} not known!")
1394
-
1395
- if jobID:
1396
- indices = np.where(self.cluster_jobID_map == jobID)[0]
1397
- else:
1398
- indices = lightcurves[self._tap_orig_id_key].unique()
1399
-
1400
- logger.debug(f"chunk {chunk_number}: going through {len(indices)} IDs")
1401
-
1402
- data_product = self.load_data_product(service=service, chunk_number=chunk_number, jobID=jobID)
1403
-
1404
- if data_product is None:
1405
- logger.info(f"Starting data product for {len(indices)} indices.")
1406
- data_product = self._start_data_product(parent_sample_indices=indices)
1407
-
1408
- if mask_by_position:
1409
- bad_indices = self.get_position_mask(service, chunk_number)
1410
- else:
1411
- bad_indices = None
1412
-
1413
- for parent_sample_entry_id in tqdm.tqdm(indices, desc="binning"):
1414
- m = lightcurves[self._tap_orig_id_key] == parent_sample_entry_id
1415
- lightcurve = lightcurves[m]
1416
-
1417
- if (bad_indices is not None) and (str(parent_sample_entry_id) in bad_indices):
1418
- pos_m = ~lightcurve.index.isin(bad_indices[str(parent_sample_entry_id)])
1419
- lightcurve = lightcurve[pos_m]
1420
-
1421
- if len(lightcurve) < 1:
1422
- logger.warning(f"No data for {parent_sample_entry_id}")
1423
- continue
1424
-
1425
- binned_lc = self.bin_lightcurve(lightcurve)
1426
- data_product[str(int(parent_sample_entry_id))]["timewise_lightcurve"] = binned_lc.to_dict()
1427
-
1428
- logger.debug(f"chunk {chunk_number}: saving {len(data_product.keys())} binned lcs")
1429
- self._save_data_product(data_product, service=service, chunk_number=chunk_number, jobID=jobID, overwrite=True)
1430
-
1431
- # ---------------------------------------- #
1432
- # END using TAP to get photometry #
1433
- # ----------------------------------------------------------------------------------- #
1434
-
1435
- # ----------------------------------------------------------------------
1436
- # bin lightcurves
1437
- # ----------------------------------------------------------------------
1438
-
1439
- @abc.abstractmethod
1440
- def bin_lightcurve(self, lightcurve):
1441
- """
1442
- Bins a lightcurve
1443
-
1444
- :param lightcurve: The unbinned lightcurve
1445
- :type lightcurve: pandas.DataFrame
1446
- :return: the binned lightcurve
1447
- :rtype: pd.DataFrame
1448
- """
1449
- raise NotImplementedError
1450
-
1451
- # ----------------------------------------------------------------------
1452
- # bin lightcurves
1453
- # ----------------------------------------------------------------------
1454
-
1455
- # ----------------------------------------------------------------------------------- #
1456
- # START converting to flux densities #
1457
- # ---------------------------------------------------- #
1458
-
1459
- def find_color_correction(self, w1_minus_w2):
1460
- """
1461
- Find the color correction based on the W1-W2 color.
1462
- See `this <https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux>`_
1463
-
1464
- :param w1_minus_w2:
1465
- :type w1_minus_w2: float
1466
- :return: the color correction factor
1467
- :rtype: float
1468
- """
1469
- w1_minus_w2 = np.atleast_1d(w1_minus_w2)
1470
- c = pd.DataFrame(columns=self.magnitude_zeropoints_corrections.columns)
1471
- power_law_values = self.magnitude_zeropoints_corrections.loc[8:16]['[W1 - W2]']
1472
- for w1mw2 in w1_minus_w2:
1473
- dif = power_law_values - w1mw2
1474
- i = abs(dif).argmin()
1475
- c = c.append(self.magnitude_zeropoints_corrections.loc[i])
1476
- return c
1477
-
1478
- def vegamag_to_flux_density(self, vegamag, band, unit='mJy', color_correction=None):
1479
- """
1480
- This converts the detector level brightness m in Mag_vega to a flux density F
1481
-
1482
- F = (F_nu / f_c) * 10 ^ (-m / 2.5)
1483
-
1484
- where F_nu is the zeropoint flux for the corresponding band and f_c a color correction factor.
1485
- See `this <https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#conv2flux>`_
1486
-
1487
- :param vegamag:
1488
- :type vegamag: float or numpy.ndarray
1489
- :param band:
1490
- :type band: str
1491
- :param unit: unit to convert the flux density to
1492
- :type unit: str
1493
- :param color_correction: the colorcorection factor, if dict the keys have to be 'f_c("band")'
1494
- :type color_correction: float or numpy.ndarray or dict
1495
- :return: the flux densities
1496
- :rtype: ndarray
1497
- """
1498
- if not isinstance(color_correction, type(None)):
1499
- key = f'f_c({band})'
1500
- if key in color_correction:
1501
- color_correction = color_correction[key]
1502
- if len(color_correction) != len(vegamag):
1503
- raise ValueError(f"\nLength of color corrections: {len(color_correction)}:\n{color_correction}; "
1504
- f"\nLentgh of mags: {len(vegamag)}: \n{vegamag}")
1505
- else:
1506
- raise NotImplementedError(color_correction)
1507
-
1508
- else:
1509
- color_correction = 1
1510
-
1511
- color_correction = np.array(color_correction)
1512
- vegamag = np.array(vegamag)
1513
- fd = self.magnitude_zeropoints['F_nu'][band].to(unit).value / color_correction * 10 ** (-vegamag / 2.5)
1514
- if len(fd) != len(vegamag):
1515
- raise ValueError(f"\nLength of flux densities: {len(fd)}:\n{fd}; "
1516
- f"\nLentgh of mags: {len(vegamag)}: \n{vegamag}")
1517
-
1518
- return np.array(list(fd))
1519
-
1520
- def add_flux_density(self, lightcurve,
1521
- mag_key, emag_key, mag_ul_key,
1522
- f_key, ef_key, f_ul_key, do_color_correction=False):
1523
- """Adds flux densities to a lightcurves
1524
-
1525
- :param lightcurve:
1526
- :type lightcurve: pandas.DataFrame
1527
- :param mag_key: the key in `lightcurve` that holds the magnitude
1528
- :type mag_key: str
1529
- :param emag_key: the key in `lightcurve` that holds the error of the magnitude
1530
- :type emag_key: str
1531
- :param mag_ul_key: the key in `lightcurve` that holds the upper limit for the magnitude
1532
- :type mag_ul_key: str
1533
- :param f_key: the key that will hold the flux density
1534
- :type f_key: str
1535
- :param ef_key: the key that will hold the flux density error
1536
- :type ef_key: str
1537
- :param f_ul_key: the key that will hold the flux density upper limit
1538
- :type f_ul_key: str
1539
- :param do_color_correction:
1540
- :type do_color_correction: bool
1541
- :return: the lightcurve with flux density
1542
- :rtype: pandas.DataFrame
1543
- """
1544
-
1545
- if isinstance(lightcurve, dict):
1546
- lightcurve = pd.DataFrame.from_dict(lightcurve, orient='columns')
1547
-
1548
- if do_color_correction:
1549
- w1_minus_w2 = lightcurve[f"W1{mag_key}"] - lightcurve[f"W2{mag_key}"]
1550
- f_c = self.find_color_correction(w1_minus_w2)
1551
- else:
1552
- f_c = None
1553
-
1554
- for b in self.bands:
1555
- mags = lightcurve[f'{b}{mag_key}']
1556
- emags = lightcurve[f'{b}{emag_key}']
1557
-
1558
- flux_densities = self.vegamag_to_flux_density(mags, band=b)
1559
- upper_eflux_densities = self.vegamag_to_flux_density(mags - emags, band=b, color_correction=f_c)
1560
- lower_eflux_densities = self.vegamag_to_flux_density(mags + emags, band=b, color_correction=f_c)
1561
- eflux_densities = upper_eflux_densities - lower_eflux_densities
1562
-
1563
- lightcurve[f'{b}{f_key}'] = flux_densities
1564
- lightcurve[f'{b}{ef_key}'] = eflux_densities
1565
- if mag_ul_key:
1566
- lightcurve[f'{b}{f_ul_key}'] = lightcurve[f'{b}{mag_ul_key}']
1567
-
1568
- return lightcurve
1569
-
1570
- def add_flux_densities_to_saved_lightcurves(self, service):
1571
- """Adds flux densities to all downloaded lightcurves
1572
-
1573
- :param service: The service with which the lightcurves were downloaded
1574
- :type service: str
1575
- """
1576
- data_product = self.load_data_product(service=service)
1577
- for i, i_data_product in tqdm.tqdm(data_product.items(), desc='adding flux densities'):
1578
- data_product[i]["timewise_lightcurve"] = self.add_flux_density(
1579
- i_data_product["timewise_lightcurve"],
1580
- mag_key=f'{self.mean_key}{self.mag_key_ext}',
1581
- emag_key=f'{self.mag_key_ext}{self.rms_key}',
1582
- mag_ul_key=f'{self.mag_key_ext}{self.upper_limit_key}',
1583
- f_key=f'{self.mean_key}{self.flux_density_key_ext}',
1584
- ef_key=f'{self.flux_density_key_ext}{self.rms_key}',
1585
- f_ul_key=f'{self.flux_density_key_ext}{self.upper_limit_key}'
1586
- ).to_dict()
1587
- self._save_data_product(data_product, service=service, overwrite=True)
1588
-
1589
- # ---------------------------------------------------- #
1590
- # END converting to flux densities #
1591
- # ----------------------------------------------------------------------------------- #
1592
-
1593
- # ----------------------------------------------------------------------------------- #
1594
- # START converting to luminosity #
1595
- # ---------------------------------------------------- #
1596
-
1597
- def luminosity_from_flux_density(self, flux_density, band, distance=None, redshift=None,
1598
- unit='erg s-1', flux_density_unit='mJy'):
1599
- """
1600
- Converts a flux density into a luminosity
1601
-
1602
- :param flux_density:
1603
- :type flux_density: float or numpy.ndarray
1604
- :param band:
1605
- :type band: str
1606
- :param distance: distance to source, if not given will use luminosity distance from redshift
1607
- :type distance: astropy.Quantity
1608
- :param redshift: redshift to use when calculating luminosity distance
1609
- :type redshift: float
1610
- :param unit: unit in which to give the luminosity, default is erg s-1 sm-2
1611
- :type unit: str or astropy.unit
1612
- :param flux_density_unit: unit in which the flux density is given, default is mJy
1613
- :type flux_density_unit: str or astropy.unit
1614
- :return: the resulting luminosities
1615
- :rtype: float or ndarray
1616
- """
1617
-
1618
- if not distance:
1619
- if not redshift:
1620
- raise ValueError('Either redshift or distance has to be given!')
1621
- else:
1622
- distance = Planck18.luminosity_distance(float(redshift))
1623
-
1624
- F_nu = np.array(flux_density) * u.Unit(flux_density_unit) * 4 * np.pi * distance ** 2
1625
- nu = constants.c / self.band_wavelengths[band]
1626
- luminosity = F_nu * nu
1627
- return luminosity.to(unit).value
1628
-
1629
- def _add_luminosity(self, lightcurve, f_key, ef_key, f_ul_key, lum_key, elum_key, lum_ul_key, **lum_kwargs):
1630
- for band in self.bands:
1631
- fd = lightcurve[band + f_key]
1632
- fd_e = lightcurve[band + ef_key]
1633
- l = self.luminosity_from_flux_density(fd, band, **lum_kwargs)
1634
- el = self.luminosity_from_flux_density(fd_e, band, **lum_kwargs)
1635
- lightcurve[band + lum_key] = l
1636
- lightcurve[band + elum_key] = el
1637
- lightcurve[band + lum_ul_key] = lightcurve[band + f_ul_key]
1638
- return lightcurve
1639
-
1640
- def add_luminosity_to_saved_lightcurves(self, service, redshift_key=None, distance_key=None):
1641
- """Add luminosities to all lightcurves, calculated from flux densities and distance or redshift
1642
-
1643
- :param service: the service with which the lightcurves were downloaded
1644
- :type service: str
1645
- :param redshift_key: the key in the parent sample data frame that holds the redshift info
1646
- :type redshift_key: str
1647
- :param distance_key: the key in the parent sample data frame that holds the distance info
1648
- :type distance_key: str
1649
- """
1650
-
1651
- if (not redshift_key) and (not distance_key):
1652
- raise ValueError('Either distance key or redshift key has to be given!')
1653
-
1654
- data_product = self.load_data_product(service=service)
1655
- for i, i_data_product in tqdm.tqdm(data_product.items(), desc='adding luminosities'):
1656
- parent_sample_idx = int(i.split('_')[0])
1657
- info = self.parent_sample.df.loc[parent_sample_idx]
1658
-
1659
- if distance_key:
1660
- distance = info[distance_key]
1661
- redshift = None
1662
- else:
1663
- distance = None
1664
- redshift = info[redshift_key]
1665
-
1666
- data_product[i]["timewise_lightcurve"] = self._add_luminosity(
1667
- pd.DataFrame.from_dict(i_data_product["timewise_lightcurve"]),
1668
- f_key = self.mean_key + self.flux_density_key_ext,
1669
- ef_key = self.flux_density_key_ext + self.rms_key,
1670
- f_ul_key = self.flux_density_key_ext + self.upper_limit_key,
1671
- lum_key = self.mean_key + self.luminosity_key_ext,
1672
- elum_key = self.luminosity_key_ext + self.rms_key,
1673
- lum_ul_key= self.luminosity_key_ext + self.upper_limit_key,
1674
- redshift = redshift,
1675
- distance = distance
1676
- ).to_dict()
1677
- self._save_data_product(data_product, service=service, overwrite=True)
1678
-
1679
- # ---------------------------------------------------- #
1680
- # END converting to luminosity #
1681
- # ----------------------------------------------------------------------------------- #
1682
-
1683
- #################################
1684
- # END GET PHOTOMETRY DATA #
1685
- ###########################################################################################################
1686
-
1687
- ###########################################################################################################
1688
- # START MAKE POSITIONAL MASK #
1689
- #####################################
1690
-
1691
- @staticmethod
1692
- def calculate_position_mask(lightcurve, ra, dec, whitelist_region, return_all=False):
1693
- """
1694
- Estimated the 90th percentile of the angular separations from the given position.
1695
- Assuming a 2D-Gaussian, calculate the standard deviation for the 90th percentile.
1696
- Keeps all datapoints within five times the standard deviation.
1697
-
1698
- :param lightcurve: unstacked lightcurve
1699
- :type lightcurve: pd.DataFrame
1700
- :param ra: RA in degrees of the source
1701
- :type ra: Sequence[float]
1702
- :param dec: Dec in degrees of the source
1703
- :type dec: Sequence[float]
1704
- :param return_all: if True, return all info collected in the selection process
1705
- :param whitelist_region: region in which to keep all datapoints [arcsec]
1706
- :type whitelist_region: float
1707
- :type return_all: bool, optional
1708
- :return:
1709
- positional mask (and result of the clustering algorithm and the mask for the closest allwise data
1710
- if `return_all` is True)
1711
- :rtype: list (`return_all` is False) or tuple (list, sklearn.cluster.HDBSCAN, list) (`return_all` is True)
1712
- """
1713
- lc_ra_rad = np.deg2rad(lightcurve.ra.values)
1714
- lc_dec_rad = np.deg2rad(lightcurve.dec.values)
1715
- source_ra_rad = np.deg2rad(ra)
1716
- source_dec_rad = np.deg2rad(dec)
1717
-
1718
- # calculate separation and position angle
1719
- _angular_separation = angular_separation(source_ra_rad, source_dec_rad, lc_ra_rad, lc_dec_rad)
1720
- _position_angle = position_angle(source_ra_rad, source_dec_rad, lc_ra_rad, lc_dec_rad)
1721
-
1722
- # The AllWISE multiframe pipeline detects sources on the deep coadded atlas images and then measures the sources
1723
- # for all available single-exposure images in all bands simultaneously, while the NEOWISE magnitudes are
1724
- # obtained by PSF fit to individual exposures directly. Effect: all allwise data points that belong to the same
1725
- # object have the same position. We take only the closest one and treat it as one datapoint in the clustering.
1726
- allwise_time_mask = lightcurve["mjd"] < 55594
1727
- if any(allwise_time_mask):
1728
- allwise_sep_min = np.min(_angular_separation[allwise_time_mask])
1729
- closest_allwise_mask = (_angular_separation == allwise_sep_min) & allwise_time_mask
1730
- closest_allwise_mask_first_entry = ~closest_allwise_mask.duplicated() & closest_allwise_mask
1731
-
1732
- # the data we want to use is then the selected AllWISE datapoint and the NEOWISE-R data
1733
- data_mask = closest_allwise_mask_first_entry | ~allwise_time_mask
1734
- else:
1735
- closest_allwise_mask_first_entry = closest_allwise_mask = None
1736
- data_mask = np.ones_like(_angular_separation, dtype=bool)
1737
-
1738
- # no matter which cluster they belong to, we want to keep all datapoints within 1 arcsec
1739
- one_arcsec_mask = _angular_separation < np.radians(whitelist_region / 3600)
1740
- selected_indices = set(lightcurve.index[data_mask & one_arcsec_mask])
1741
-
1742
- # if there are more than one datapoints, we use a clustering algorithm to potentially find a cluster with
1743
- # its center within 1 arcsec
1744
- cluster_res = None
1745
- if data_mask.sum() > 1:
1746
- # instead of the polar coordinates separation and position angle we use cartesian coordinates because the
1747
- # clustering algorithm works better with them
1748
- cartesian_full = np.array([
1749
- _angular_separation * np.cos(_position_angle),
1750
- _angular_separation * np.sin(_position_angle)
1751
- ]).T
1752
- cartesian = cartesian_full[data_mask]
1753
-
1754
- # we are now ready to do the clustering
1755
- cluster_distance_arcsec = 0.5 # distance of clusters to be considered as one [arcsec]
1756
- cluster_res = HDBSCAN(
1757
- store_centers="centroid",
1758
- min_cluster_size=max(min(20, len(cartesian)), 2),
1759
- allow_single_cluster=True,
1760
- cluster_selection_epsilon=np.radians(cluster_distance_arcsec / 3600)
1761
- ).fit(cartesian)
1762
-
1763
- # we select the closest cluster within 1 arcsec
1764
- cluster_separations = np.sqrt(np.sum(cluster_res.centroids_ ** 2, axis=1))
1765
- logger.debug(f"Found {len(cluster_separations)} clusters")
1766
-
1767
- # if there is no cluster or no cluster within 1 arcsec,
1768
- # only the datapoints within 1 arcsec are selected as we did above
1769
- if len(cluster_separations) == 0:
1770
- logger.debug("No cluster found. Selecting all noise datapoints within 1 arcsec.")
1771
- elif min(cluster_separations) > np.radians(whitelist_region / 3600):
1772
- logger.debug(f"Closest cluster is at {cluster_separations} arcsec")
1773
-
1774
- # if there is a cluster within 1 arcsec, we select all datapoints belonging to that cluster
1775
- # in addition to the datapoints within 1 arcsec
1776
- else:
1777
- closest_label = cluster_separations.argmin()
1778
- selected_cluster_mask = cluster_res.labels_ == closest_label
1779
-
1780
- # now we have to trace back the selected datapoints to the original lightcurve
1781
- selected_indices |= set(lightcurve.index[data_mask][selected_cluster_mask])
1782
- logger.debug(f"Selected {len(selected_indices)} datapoints")
1783
-
1784
- # if the closest allwise source is selected, we also select all other detections belonging to that
1785
- # source in the allwise period
1786
- if (
1787
- closest_allwise_mask_first_entry is not None
1788
- and lightcurve.index[closest_allwise_mask_first_entry][0] in selected_indices
1789
- ):
1790
- closest_allwise_mask_not_first = closest_allwise_mask & ~closest_allwise_mask_first_entry
1791
- closest_allwise_indices_not_first = lightcurve.index[closest_allwise_mask_not_first]
1792
- logger.debug(f"Adding remaining {len(closest_allwise_indices_not_first)} from AllWISE period")
1793
- selected_indices |= set(closest_allwise_indices_not_first)
1794
-
1795
- # because in most cases we will have more good indices than bad indices, we store the bad indices instead
1796
- bad_indices = lightcurve.index[~lightcurve.index.isin(selected_indices)]
1797
-
1798
- if return_all:
1799
- return_closest_allwise_mask = list(closest_allwise_mask) if closest_allwise_mask is not None else None
1800
- return list(bad_indices), cluster_res, data_mask, return_closest_allwise_mask
1801
- else:
1802
- return list(bad_indices)
1803
-
1804
- def get_position_mask(self, service, chunk_number):
1805
- """
1806
- Get the position mask for a chunk
1807
-
1808
- :param service: The service that was used to download the data, either of `gator` or `tap`
1809
- :type service: str
1810
- :param chunk_number: chunk number
1811
- :type chunk_number: int
1812
- :returns: position masks
1813
- :rtype: dict
1814
- """
1815
-
1816
- logger.info(f"getting position masks for {service}, chunk {chunk_number}")
1817
- fn = self.cache_dir / "position_masks" / f"{service}_chunk{chunk_number}.json"
1818
-
1819
- if not fn.is_file():
1820
- logger.debug(f"No file {fn}. Calculating position masks.")
1821
-
1822
- if service == "tap":
1823
- unbinned_lcs = self.get_unbinned_lightcurves(chunk_number)
1824
- elif service == "gator":
1825
- unbinned_lcs = self._get_unbinned_lightcurves_gator(chunk_number)
1826
- else:
1827
- raise ValueError(f"Service must be one of 'gator' or 'tap', not {service}!")
1828
-
1829
- position_masks = dict()
1830
-
1831
- for i in tqdm.tqdm(unbinned_lcs[self._tap_orig_id_key].unique(), "calculating position masks"):
1832
- idt = self.parent_sample.df.index.dtype.type(i)
1833
- ra = self.parent_sample.df.loc[idt, self.parent_sample.default_keymap["ra"]]
1834
- dec = self.parent_sample.df.loc[idt, self.parent_sample.default_keymap["dec"]]
1835
- id = self.parent_sample.df.loc[idt, self.parent_sample.default_keymap["id"]]
1836
- lightcurve = unbinned_lcs[unbinned_lcs[self._tap_orig_id_key] == i]
1837
-
1838
- logger.debug(f"calculating position mask for {id} ({ra}, {dec})")
1839
- bad_indices = self.calculate_position_mask(
1840
- lightcurve,
1841
- ra,
1842
- dec,
1843
- self.whitelist_region.to("arcsec").value
1844
- )
1845
- if len(bad_indices) > 0:
1846
- position_masks[str(i)] = bad_indices
1847
-
1848
- fn.parent.mkdir(exist_ok=True, parents=True)
1849
- with open(fn, "w") as f:
1850
- json.dump(position_masks, f)
1851
-
1852
- else:
1853
- logger.debug(f"loading {fn}")
1854
- with open(fn, "r") as f:
1855
- position_masks = json.load(f)
1856
-
1857
- return position_masks
1858
-
1859
- #####################################
1860
- # END MAKE POSITIONAL MASK #
1861
- ###########################################################################################################
1862
-
1863
- ###########################################################################################################
1864
- # START MAKE PLOTTING FUNCTIONS #
1865
- #####################################
1866
-
1867
- def plot_lc(self, parent_sample_idx, service='tap', plot_unbinned=False, plot_binned=True,
1868
- interactive=False, fn=None, ax=None, save=True, lum_key='flux_density', **kwargs):
1869
- """Make a pretty plot of a lightcurve
1870
-
1871
- :param parent_sample_idx: The index in the parent sample of the lightcurve
1872
- :type parent_sample_idx: int
1873
- :param service: the service with which the lightcurves were downloaded
1874
- :type service: str
1875
- :param plot_unbinned: plot unbinned data
1876
- :type plot_unbinned: bool
1877
- :param plot_binned: plot binned lightcurve
1878
- :type plot_binned: bool
1879
- :param interactive: interactive mode
1880
- :type interactive: bool
1881
- :param fn: filename, defaults to </path/to/timewise/data/dir>/output/plots/<base_name>/<parent_sample_index>_<lum_key>.pdf
1882
- :type fn: str
1883
- :param ax: pre-existing matplotlib.Axis
1884
- :param save: save the plot
1885
- :type save: bool
1886
- :param lum_key: the unit of luminosity to use in the plot, either of 'mag', 'flux_density' or 'luminosity'
1887
- :param kwargs: any additional kwargs will be passed on to `matplotlib.pyplot.subplots()`
1888
- :return: the matplotlib.Figure and matplotlib.Axes if `interactive=True`
1889
- """
1890
-
1891
- logger.debug(f"loading binned lightcurves")
1892
- data_product = self.load_data_product(service)
1893
- _get_unbinned_lcs_fct = self.get_unbinned_lightcurves if service == 'tap' else self._get_unbinned_lightcurves_gator
1894
-
1895
- wise_id = self.parent_sample.df.loc[int(parent_sample_idx), self.parent_wise_source_id_key]
1896
- if isinstance(wise_id, float) and not np.isnan(wise_id):
1897
- wise_id = int(wise_id)
1898
- logger.debug(f"{wise_id} for {parent_sample_idx}")
1899
-
1900
- lc = pd.DataFrame.from_dict(data_product[str(int(parent_sample_idx))]["timewise_lightcurve"])
1901
-
1902
- if plot_unbinned:
1903
- _chunk_number = self._get_chunk_number(parent_sample_index=parent_sample_idx)
1904
-
1905
- if service == 'tap':
1906
- unbinned_lcs = self.get_unbinned_lightcurves(_chunk_number)
1907
-
1908
- else:
1909
- unbinned_lcs = self._get_unbinned_lightcurves_gator(_chunk_number)
1910
-
1911
- unbinned_lc = unbinned_lcs[unbinned_lcs[self._tap_orig_id_key] == int(parent_sample_idx)]
1912
-
1913
- else:
1914
- unbinned_lc = None
1915
-
1916
- _lc = lc if plot_binned else None
1917
-
1918
- if not fn:
1919
- fn = self.plots_dir / f"{parent_sample_idx}_{lum_key}.pdf"
1920
-
1921
- return self._plot_lc(lightcurve=_lc, unbinned_lc=unbinned_lc, interactive=interactive, fn=fn, ax=ax,
1922
- save=save, lum_key=lum_key, **kwargs)
1923
-
1924
- def _plot_lc(self, lightcurve=None, unbinned_lc=None, interactive=False, fn=None, ax=None, save=True,
1925
- lum_key='flux_density', colors=None, **kwargs):
1926
-
1927
- if not colors:
1928
- colors = self.band_plot_colors
1929
-
1930
- if not ax:
1931
- fig, ax = plt.subplots(**kwargs)
1932
- else:
1933
- fig = plt.gcf()
1934
-
1935
- for b in self.bands:
1936
- try:
1937
- if not isinstance(lightcurve, type(None)):
1938
- ul_mask = np.array(lightcurve[f"{b}_{lum_key}{self.upper_limit_key}"]).astype(bool)
1939
- ax.errorbar(lightcurve.mean_mjd[~ul_mask], lightcurve[f"{b}{self.mean_key}_{lum_key}"][~ul_mask],
1940
- yerr=lightcurve[f"{b}_{lum_key}{self.rms_key}"][~ul_mask],
1941
- label=b, ls='', marker='s', c=colors[b], markersize=4,
1942
- markeredgecolor='k', ecolor='k', capsize=2)
1943
- ax.scatter(lightcurve.mean_mjd[ul_mask], lightcurve[f"{b}{self.mean_key}_{lum_key}"][ul_mask],
1944
- marker='v', c=colors[b], alpha=0.7, s=2)
1945
-
1946
- if not isinstance(unbinned_lc, type(None)):
1947
- m = ~unbinned_lc[f"{b}_{lum_key}"].isna()
1948
- ul_mask = unbinned_lc[f"{b}_{lum_key}{self.error_key_ext}"].isna()
1949
-
1950
- tot_m = m & ~ul_mask
1951
- if np.any(tot_m):
1952
- ax.errorbar(unbinned_lc.mjd[tot_m], unbinned_lc[f"{b}_{lum_key}"][tot_m],
1953
- yerr=unbinned_lc[f"{b}_{lum_key}{self.error_key_ext}"][tot_m],
1954
- label=f"{b} unbinned", ls='', marker='o', c=colors[b], markersize=4,
1955
- alpha=0.3)
1956
-
1957
- single_ul_m = m & ul_mask
1958
- if np.any(single_ul_m):
1959
- label = f"{b} unbinned upper limits" if not np.any(tot_m) else ""
1960
- ax.scatter(unbinned_lc.mjd[single_ul_m], unbinned_lc[f"{b}_{lum_key}"][single_ul_m],
1961
- marker="d", c=colors[b], alpha=0.3, s=1, label=label)
1962
-
1963
- except KeyError as e:
1964
- raise KeyError(f"Could not find brightness key {e}!")
1965
-
1966
- if lum_key == 'mag':
1967
- ylim = ax.get_ylim()
1968
- ax.set_ylim(max(ylim), min(ylim))
1969
-
1970
- ax.set_xlabel('MJD')
1971
- ax.set_ylabel(lum_key)
1972
- ax.legend()
1973
-
1974
- if save:
1975
- logger.debug(f"saving under {fn}")
1976
- fig.savefig(fn)
1977
-
1978
- if interactive:
1979
- return fig, ax
1980
- else:
1981
- plt.close()
1982
-
1983
- #####################################
1984
- # END MAKE PLOTTING FUNCTIONS #
1985
- ###########################################################################################################
1986
-
1987
- ###########################################################################################################
1988
- # START CALCULATE METADATA #
1989
- #####################################
1990
-
1991
- def calculate_metadata(self, service, chunk_number=None, jobID=None, overwrite=True):
1992
- """Calculates the metadata for all downloaded lightcurves.
1993
- Results will be saved under
1994
-
1995
- </path/to/timewise/data/dir>/output/<base_name>/lightcurves/metadata_<service>.json
1996
-
1997
- :param service: the service with which the lightcurves were downloaded
1998
- :type service: str
1999
- :param chunk_number: the chunk number to use, default uses all chunks
2000
- :type chunk_number: int
2001
- :param jobID: the job ID to use, default uses all lightcurves
2002
- :type jobID: int
2003
- :param overwrite: overwrite existing metadata file
2004
- :type overwrite: bool
2005
- """
2006
- data_product = self.load_data_product(service, chunk_number, jobID)
2007
- for ID, i_data_product in tqdm.tqdm(data_product.items(), desc="calculating metadata"):
2008
- if "timewise_lightcurve" in i_data_product:
2009
- lc = pd.DataFrame.from_dict(i_data_product["timewise_lightcurve"])
2010
- metadata = self.calculate_metadata_single(lc)
2011
- data_product[ID]["timewise_metadata"] = metadata
2012
-
2013
- self._save_data_product(data_product, service, chunk_number, jobID, overwrite=overwrite)
2014
-
2015
- @abc.abstractmethod
2016
- def calculate_metadata_single(self, lcs):
2017
- """
2018
- Calculates some properties of the lightcurves
2019
-
2020
- :param lcs: the lightcurve
2021
- :type lcs: pandas.DataFrame
2022
- """
2023
- raise NotImplementedError
2024
-
2025
- #####################################
2026
- # END CALCULATE METADATA #
2027
- ###########################################################################################################