timewise 0.4.12__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
timewise/__init__.py CHANGED
@@ -2,4 +2,4 @@ from timewise.wise_data_by_visit import WiseDataByVisit
2
2
  from timewise.wise_bigdata_desy_cluster import WISEDataDESYCluster
3
3
  from timewise.parent_sample_base import ParentSampleBase
4
4
 
5
- __version__ = "0.4.12"
5
+ __version__ = "0.5.3"
@@ -1,5 +1,4 @@
1
1
  import gc
2
- import os
3
2
  import pickle
4
3
  import threading
5
4
  import time
@@ -30,7 +29,7 @@ class BigParentSampleBase(ParentSampleBase):
30
29
  self._keep_df_in_memory = keep_file_in_memory
31
30
  self._time_when_df_was_used_last = time.time()
32
31
  self._df = None
33
- self._cache_file = os.path.join(self.cache_dir, "cache.pkl")
32
+ self._cache_file = self.cache_dir / "cache.pkl"
34
33
  self._lock_cache_file = False
35
34
 
36
35
  self._clean_thread = threading.Thread(target=self._periodically_drop_df_to_disk, daemon=True, name='ParentSampleCleanThread').start()
@@ -50,7 +49,7 @@ class BigParentSampleBase(ParentSampleBase):
50
49
 
51
50
  if isinstance(self._df, type(None)):
52
51
 
53
- if os.path.isfile(self._cache_file):
52
+ if self._cache_file.is_file():
54
53
  logger.debug(f'loading from {self._cache_file}')
55
54
  self._wait_for_unlock_cache_file()
56
55
  self._lock_cache_file = True
@@ -97,9 +96,9 @@ class BigParentSampleBase(ParentSampleBase):
97
96
  logger.debug('stopped clean thread')
98
97
 
99
98
  def __del__(self):
100
- if hasattr(self, "_cache_file") and os.path.isfile(self._cache_file):
99
+ if hasattr(self, "_cache_file") and self._cache_file.is_file():
101
100
  logger.debug(f'removing {self._cache_file}')
102
- os.remove(self._cache_file)
101
+ self._cache_file.unlink()
103
102
 
104
103
  if hasattr(self, "clean_thread"):
105
104
  logger.debug(f'stopping clean thread')
timewise/config_loader.py CHANGED
@@ -1,11 +1,11 @@
1
1
  import logging
2
2
  import yaml
3
3
  import json
4
- import os
5
4
  import inspect
6
5
  from pydantic import BaseModel, validator
7
6
  import pandas as pd
8
7
  import importlib
8
+ from pathlib import Path
9
9
 
10
10
  from timewise.parent_sample_base import ParentSampleBase
11
11
  from timewise.wise_data_base import WISEDataBase
@@ -80,7 +80,7 @@ class TimewiseConfigLoader(BaseModel):
80
80
  @validator("filename")
81
81
  def validate_file(cls, v: str):
82
82
  if v is not None:
83
- if not os.path.isfile(v):
83
+ if not Path(v).is_file():
84
84
  raise ValueError(f"No file {v}!")
85
85
  return v
86
86
 
timewise/general.py CHANGED
@@ -1,4 +1,6 @@
1
- import logging, os
1
+ import logging
2
+ import os
3
+ from pathlib import Path
2
4
 
3
5
 
4
6
  # Setting up the Logger
@@ -13,29 +15,35 @@ main_logger.propagate = False # do not propagate to root logger
13
15
 
14
16
  logger = logging.getLogger(__name__)
15
17
 
16
- # Setting up data directory
17
- DATA_DIR_KEY = 'TIMEWISE_DATA'
18
- if DATA_DIR_KEY in os.environ:
19
- data_dir = os.path.expanduser(os.environ[DATA_DIR_KEY])
20
- else:
21
- logger.warning(f'{DATA_DIR_KEY} not set! Using home directory.')
22
- data_dir = os.path.expanduser('~/')
23
-
24
- BIGDATA_DIR_KEY = 'TIMEWISE_BIGDATA'
25
- if BIGDATA_DIR_KEY in os.environ:
26
- bigdata_dir = os.path.expanduser(os.environ[BIGDATA_DIR_KEY])
27
- logger.info(f"Using bigdata directory {bigdata_dir}")
28
- else:
29
- bigdata_dir = None
30
- logger.info(f"No bigdata directory set.")
31
-
32
- output_dir = os.path.join(data_dir, 'output')
33
- plots_dir = os.path.join(output_dir, 'plots')
34
- cache_dir = os.path.join(data_dir, 'cache')
35
-
36
- for d in [data_dir, output_dir, plots_dir, cache_dir]:
37
- if not os.path.isdir(d):
38
- os.mkdir(os.path.abspath(d))
18
+
19
+ def get_directories() -> dict[str, Path | None]:
20
+ # Setting up data directory
21
+ DATA_DIR_KEY = 'TIMEWISE_DATA'
22
+ if DATA_DIR_KEY in os.environ:
23
+ data_dir = Path(os.environ[DATA_DIR_KEY]).expanduser()
24
+ else:
25
+ logger.warning(f'{DATA_DIR_KEY} not set! Using home directory.')
26
+ data_dir = Path('~/').expanduser()
27
+
28
+ BIGDATA_DIR_KEY = 'TIMEWISE_BIGDATA'
29
+ if BIGDATA_DIR_KEY in os.environ:
30
+ bigdata_dir = Path(os.environ[BIGDATA_DIR_KEY]).expanduser()
31
+ logger.info(f"Using bigdata directory {bigdata_dir}")
32
+ else:
33
+ bigdata_dir = None
34
+ logger.info(f"No bigdata directory set.")
35
+
36
+ output_dir = data_dir / 'output'
37
+ plots_dir = output_dir / 'plots'
38
+ cache_dir = data_dir / 'cache'
39
+
40
+ return {
41
+ 'data_dir': data_dir,
42
+ 'bigdata_dir': bigdata_dir,
43
+ 'output_dir': output_dir,
44
+ 'plots_dir': plots_dir,
45
+ 'cache_dir': cache_dir
46
+ }
39
47
 
40
48
 
41
49
  def backoff_hndlr(details):
@@ -3,7 +3,7 @@ import pandas as pd
3
3
  import numpy as np
4
4
  import logging
5
5
 
6
- from timewise.general import cache_dir, plots_dir
6
+ from timewise.general import get_directories
7
7
  from timewise.utils import plot_sdss_cutout, plot_panstarrs_cutout
8
8
 
9
9
 
@@ -26,14 +26,14 @@ class ParentSampleBase(abc.ABC):
26
26
 
27
27
  def __init__(self, base_name):
28
28
  # set up directories
29
- self.cache_dir = os.path.join(cache_dir, base_name)
30
- self.plots_dir = os.path.join(plots_dir, base_name)
29
+ d = get_directories()
30
+ self.cache_dir = d["cache_dir"] / base_name
31
+ self.plots_dir = d["plots_dir"] / base_name
31
32
 
32
33
  for d in [self.cache_dir, self.plots_dir]:
33
- if not os.path.isdir(d):
34
- os.makedirs(d)
34
+ d.parent.mkdir(parents=True, exist_ok=True)
35
35
 
36
- self.local_sample_copy = os.path.join(self.cache_dir, 'sample.csv')
36
+ self.local_sample_copy = self.cache_dir / 'sample.csv'
37
37
 
38
38
  def plot_cutout(self, ind, arcsec=20, interactive=False, **kwargs):
39
39
  """
@@ -54,9 +54,10 @@ class ParentSampleBase(abc.ABC):
54
54
 
55
55
  fn = kwargs.pop(
56
56
  "fn",
57
- [os.path.join(self.plots_dir, f"{i}_{r[self.default_keymap['id']]}.pdf")
57
+ [self.plots_dir / f"{i}_{r[self.default_keymap['id']]}.pdf"
58
58
  for i, r in sel.iterrows()]
59
59
  )
60
+ self.plots_dir.mkdir(parents=True, exist_ok=True)
60
61
 
61
62
  logger.debug(f"\nRA: {ra}\nDEC: {dec}\nTITLE: {title}\nFN: {fn}")
62
63
  ou = list()
@@ -84,4 +85,5 @@ class ParentSampleBase(abc.ABC):
84
85
 
85
86
  def save_local(self):
86
87
  logger.debug(f"saving under {self.local_sample_copy}")
88
+ self.local_sample_copy.parent.mkdir(parents=True, exist_ok=True)
87
89
  self.df.to_csv(self.local_sample_copy)
timewise/utils.py CHANGED
@@ -13,14 +13,19 @@ from astropy.table import Table
13
13
  from PIL import Image
14
14
  from io import BytesIO
15
15
  import hashlib
16
+ from threading import Thread
17
+ from queue import Queue
18
+ import sys
16
19
 
17
-
18
- from timewise.general import cache_dir, backoff_hndlr
20
+ from timewise.general import backoff_hndlr, get_directories
19
21
 
20
22
 
21
23
  logger = logging.getLogger(__name__)
22
24
  mirong_url = 'http://staff.ustc.edu.cn/~jnac/data_public/wisevar.txt'
23
- local_copy = os.path.join(cache_dir, 'mirong_sample.csv')
25
+
26
+
27
+ def get_mirong_path():
28
+ return get_directories()['cache_dir'] / 'mirong_sample.csv'
24
29
 
25
30
 
26
31
  @cache
@@ -30,7 +35,8 @@ def get_2d_gaussian_correction(cl):
30
35
 
31
36
  def get_mirong_sample():
32
37
 
33
- if not os.path.isfile(local_copy):
38
+ mirong_path = get_mirong_path()
39
+ if not mirong_path.is_file():
34
40
 
35
41
  logger.info(f'getting MIRONG sample from {mirong_url}')
36
42
  r = requests.get(mirong_url)
@@ -45,16 +51,16 @@ def get_mirong_sample():
45
51
  mirong_sample = pd.DataFrame(lll[1:-1], columns=lll[0])
46
52
  mirong_sample['ra'] = mirong_sample['RA']
47
53
  mirong_sample['dec'] = mirong_sample['DEC']
48
- logger.debug(f'saving to {local_copy}')
49
-
50
- mirong_sample.to_csv(local_copy, index=False)
51
54
  logger.info(f'found {len(mirong_sample)} objects in MIRONG Sample')
55
+
52
56
  mirong_sample.drop(columns=['ra', 'dec'], inplace=True)
53
- mirong_sample.to_csv(local_copy, index=False)
57
+ logger.debug(f'saving to {mirong_path}')
58
+ mirong_path.parent.mkdir(parents=True, exist_ok=True)
59
+ mirong_sample.to_csv(mirong_path, index=False)
54
60
 
55
61
  else:
56
- logger.debug(f'loading {local_copy}')
57
- mirong_sample = pd.read_csv(local_copy)
62
+ logger.debug(f'loading {mirong_path}')
63
+ mirong_sample = pd.read_csv(mirong_path)
58
64
 
59
65
  return mirong_sample
60
66
 
@@ -154,9 +160,10 @@ class PanSTARRSQueryError(Exception):
154
160
  def load_cache_or_download(url):
155
161
  logger.debug(f"loading or downloading {url}")
156
162
  h = hashlib.md5(url.encode()).hexdigest()
157
- cache_file = os.path.join(cache_dir, h + ".cache")
163
+ cache_dir = get_directories()['cache_dir']
164
+ cache_file = cache_dir / (h + ".cache")
158
165
  logger.debug(f"cache file is {cache_file}")
159
- if not os.path.isfile(cache_file):
166
+ if not cache_file.is_file():
160
167
  logger.debug(f"downloading {url}")
161
168
  r = requests.get(url)
162
169
  with open(cache_file, 'wb') as f:
@@ -424,9 +431,48 @@ def get_excess_variance(y, y_err, mu):
424
431
 
425
432
  class StableAsyncTAPJob(vo.dal.AsyncTAPJob):
426
433
  """
427
- Implements backoff for call of phase which otherwise breaks the code if there are connection issues
434
+ Implements backoff for call of phase which otherwise breaks the code if there are connection issues.
435
+ Also stores the response of TapQuery.submit() under self.submit_response for debugging
428
436
  """
429
437
 
438
+ def __init__(self, url, *, session=None, delete=True):
439
+ super(StableAsyncTAPJob, self).__init__(url, session=session, delete=delete)
440
+ self.submit_response = None
441
+
442
+ @classmethod
443
+ def create(
444
+ cls, baseurl, query, *, language="ADQL", maxrec=None, uploads=None,
445
+ session=None, **keywords):
446
+ """
447
+ creates a async tap job on the server under ``baseurl``
448
+ Raises requests.HTTPError if TAPQuery.submit() failes.
449
+
450
+ Parameters
451
+ ----------
452
+ baseurl : str
453
+ the TAP baseurl
454
+ query : str
455
+ the query string
456
+ language : str
457
+ specifies the query language, default ADQL.
458
+ useful for services which allow to use the backend query language.
459
+ maxrec : int
460
+ the maximum records to return. defaults to the service default
461
+ uploads : dict
462
+ a mapping from table names to objects containing a votable
463
+ session : object
464
+ optional session to use for network requests
465
+ """
466
+ tapquery = vo.dal.TAPQuery(
467
+ baseurl, query, mode="async", language=language, maxrec=maxrec,
468
+ uploads=uploads, session=session, **keywords)
469
+ response = tapquery.submit()
470
+ response.raise_for_status()
471
+ job = cls(response.url, session=session)
472
+ job._client_set_maxrec = maxrec
473
+ job.submit_response = response
474
+ return job
475
+
430
476
  @property
431
477
  @backoff.on_exception(
432
478
  backoff.expo,
@@ -446,6 +492,7 @@ class StableTAPService(vo.dal.TAPService):
446
492
  def submit_job(
447
493
  self,
448
494
  query,
495
+ *,
449
496
  language="ADQL",
450
497
  maxrec=None,
451
498
  uploads=None,
@@ -454,10 +501,10 @@ class StableTAPService(vo.dal.TAPService):
454
501
  return StableAsyncTAPJob.create(
455
502
  self.baseurl,
456
503
  query,
457
- language,
458
- maxrec,
459
- uploads,
460
- self._session,
504
+ language=language,
505
+ maxrec=maxrec,
506
+ uploads=uploads,
507
+ session=self._session,
461
508
  **keywords
462
509
  )
463
510
 
@@ -465,3 +512,47 @@ class StableTAPService(vo.dal.TAPService):
465
512
  #######################################################
466
513
  # END CUSTOM TAP Service #
467
514
  ###########################################################################################################
515
+
516
+
517
+ ###########################################################################################################
518
+ # START CUSTOM TAP Service #
519
+ #######################################################
520
+
521
+
522
+ class ErrorQueue(Queue):
523
+ """Queue subclass whose join() re-raises exceptions from worker threads."""
524
+
525
+ def __init__(self, *args, **kwargs):
526
+ super().__init__(*args, **kwargs)
527
+ self.error_queue = Queue()
528
+
529
+ def report_error(self, exc_info):
530
+ """Called by workers to push an exception into the error queue."""
531
+ self.error_queue.put(exc_info)
532
+ # Also decrement unfinished_tasks, so join() won't block forever
533
+ with self.all_tasks_done:
534
+ self.unfinished_tasks = max(0, self.unfinished_tasks - 1)
535
+ self.all_tasks_done.notify_all()
536
+
537
+ def join(self):
538
+ """Wait until all tasks are done, or raise if a worker failed."""
539
+ with self.all_tasks_done:
540
+ while self.unfinished_tasks:
541
+ if not self.error_queue.empty():
542
+ exc_info = self.error_queue.get()
543
+ raise exc_info[1].with_traceback(exc_info[2])
544
+ self.all_tasks_done.wait()
545
+
546
+
547
+ class ExceptionSafeThread(Thread):
548
+ """Thread subclass that reports uncaught exceptions to the ErrorQueue."""
549
+
550
+ def __init__(self, error_queue: ErrorQueue, *args, **kwargs):
551
+ super().__init__(*args, **kwargs)
552
+ self.error_queue = error_queue
553
+
554
+ def run(self):
555
+ try:
556
+ super().run()
557
+ except Exception:
558
+ self.error_queue.report_error(sys.exc_info())
@@ -1,5 +1,4 @@
1
1
  import getpass
2
- import glob
3
2
  import os
4
3
  import json
5
4
  import subprocess
@@ -15,6 +14,7 @@ import shutil
15
14
  import gc
16
15
  import tqdm
17
16
  import sys
17
+ from pathlib import Path
18
18
 
19
19
  from functools import cache
20
20
  from scipy.stats import chi2, f
@@ -28,8 +28,9 @@ import logging
28
28
 
29
29
  from typing import List
30
30
 
31
- from timewise.general import data_dir, bigdata_dir, backoff_hndlr
31
+ from timewise.general import get_directories, backoff_hndlr
32
32
  from timewise.wise_data_by_visit import WiseDataByVisit
33
+ from timewise.utils import StableAsyncTAPJob, ErrorQueue, ExceptionSafeThread
33
34
 
34
35
 
35
36
  logger = logging.getLogger(__name__)
@@ -41,9 +42,9 @@ class WISEDataDESYCluster(WiseDataByVisit):
41
42
  In addition to the attributes of `WiseDataByVisit` this class has the following attributes:
42
43
 
43
44
  :param executable_filename: the filename of the executable that will be submitted to the cluster
44
- :type executable_filename: str
45
+ :type executable_filename: Path
45
46
  :param submit_file_filename: the filename of the submit file that will be submitted to the cluster
46
- :type submit_file_filename: str
47
+ :type submit_file_filename: Path
47
48
  :param job_id: the job id of the submitted job
48
49
  :type job_id: str
49
50
  :param cluster_jobID_map: a dictionary mapping the chunk number to the cluster job id
@@ -51,13 +52,16 @@ class WISEDataDESYCluster(WiseDataByVisit):
51
52
  :param clusterJob_chunk_map: a dictionary mapping the cluster job id to the chunk number
52
53
  :type clusterJob_chunk_map: dict
53
54
  :param cluster_info_file: the filename of the file that stores the cluster info, loaded by the cluster jobs
54
- :type cluster_info_file: str
55
+ :type cluster_info_file: Path
55
56
  :param start_time: the time when the download started
56
57
  :type start_time: float
57
58
  """
58
59
  status_cmd = f'qstat -u {getpass.getuser()}'
59
60
  # finding the file that contains the setup function
60
- BASHFILE = os.getenv('TIMEWISE_DESY_CLUSTER_BASHFILE', os.path.expanduser('~/.bashrc'))
61
+ if (env_file := os.getenv('TIMEWISE_DESY_CLUSTER_BASHFILE')) is not None:
62
+ BASHFILE = Path(env_file)
63
+ else:
64
+ BASHFILE = Path("~/.bashrc").expanduser()
61
65
 
62
66
  def __init__(
63
67
  self,
@@ -89,13 +93,14 @@ class WISEDataDESYCluster(WiseDataByVisit):
89
93
 
90
94
  # set up cluster stuff
91
95
  self._status_output = None
92
- self.executable_filename = os.path.join(self.cluster_dir, "run_timewise.sh")
93
- self.submit_file_filename = os.path.join(self.cluster_dir, "submit_file.submit")
96
+ directories = get_directories()
97
+ self.executable_filename = self.cluster_dir / "run_timewise.sh"
98
+ self.submit_file_filename = self.cluster_dir / "submit_file.submit"
94
99
  self.job_id = None
95
100
 
96
101
  self.cluster_jobID_map = None
97
102
  self.clusterJob_chunk_map = None
98
- self.cluster_info_file = os.path.join(self.cluster_dir, 'cluster_info.pkl')
103
+ self.cluster_info_file = self.cluster_dir / 'cluster_info.pkl'
99
104
  self._overwrite = True
100
105
 
101
106
  # these attributes will be set later and are used to pass them to the threads
@@ -121,9 +126,10 @@ class WISEDataDESYCluster(WiseDataByVisit):
121
126
  fn = super(WISEDataDESYCluster, self)._data_product_filename(service, chunk_number=chunk_number, jobID=jobID)
122
127
 
123
128
  if use_bigdata_dir:
124
- fn = fn.replace(data_dir, bigdata_dir)
129
+ d = get_directories()
130
+ fn = str(fn).replace(str(d["data_dir"]), str(d["bigdata_dir"]))
125
131
 
126
- return fn + ".gz"
132
+ return Path(str(fn) + ".gz")
127
133
 
128
134
  def load_data_product(
129
135
  self,
@@ -199,7 +205,7 @@ class WISEDataDESYCluster(WiseDataByVisit):
199
205
  def get_sample_photometric_data(self, max_nTAPjobs=8, perc=1, tables=None, chunks=None,
200
206
  cluster_jobs_per_chunk=100, wait=5, remove_chunks=False,
201
207
  query_type='positional', overwrite=True,
202
- storage_directory=bigdata_dir,
208
+ storage_directory=None,
203
209
  node_memory='8G',
204
210
  skip_download=False,
205
211
  skip_input=False,
@@ -225,8 +231,8 @@ class WISEDataDESYCluster(WiseDataByVisit):
225
231
  :type query_type: str
226
232
  :param overwrite: overwrite already existing lightcurves and metadata
227
233
  :type overwrite: bool
228
- :param storage_directory: move binned files and raw data here after work is done
229
- :type storage_directory: str
234
+ :param storage_directory: move binned files and raw data here after work is done, defaults to TIMEWISE_BIGDATA_DIR
235
+ :type storage_directory: str | Path
230
236
  :param node_memory: memory per node on the cluster, default is 8G
231
237
  :type node_memory: str
232
238
  :param skip_download: if True, assume data is already downloaded, only do binning in that case
@@ -281,25 +287,29 @@ class WISEDataDESYCluster(WiseDataByVisit):
281
287
  self.clear_cluster_log_dir()
282
288
  self._save_cluster_info()
283
289
  self._overwrite = overwrite
284
- self._storage_dir = storage_directory
290
+ self._storage_dir = get_directories()['bigdata_dir'] if storage_directory is None else Path(storage_directory)
285
291
 
286
292
  # --------------------------- set up queues --------------------------- #
287
293
 
288
294
  self.queue = queue.Queue()
289
- self._tap_queue = queue.Queue()
290
- self._cluster_queue = queue.Queue()
295
+ self._tap_queue = ErrorQueue()
296
+ self._cluster_queue = ErrorQueue()
291
297
  self._io_queue = queue.PriorityQueue()
292
298
  self._io_queue_done = queue.Queue()
293
- self._combining_queue = queue.Queue()
299
+ self._combining_queue = ErrorQueue()
294
300
 
295
301
  # --------------------------- starting threads --------------------------- #
296
302
 
297
- tap_threads = [threading.Thread(target=self._tap_thread, daemon=True, name=f"TAPThread{_}")
298
- for _ in range(max_nTAPjobs)]
299
- cluster_threads = [threading.Thread(target=self._cluster_thread, daemon=True, name=f"ClusterThread{_}")
300
- for _ in range(max_nTAPjobs)]
303
+ tap_threads = [
304
+ ExceptionSafeThread(error_queue=self._tap_queue, target=self._tap_thread, daemon=True, name=f"TAPThread{_}")
305
+ for _ in range(max_nTAPjobs)
306
+ ]
307
+ cluster_threads = [
308
+ ExceptionSafeThread(error_queue=self._cluster_queue, target=self._cluster_thread, daemon=True, name=f"ClusterThread{_}")
309
+ for _ in range(max_nTAPjobs)
310
+ ]
301
311
  io_thread = threading.Thread(target=self._io_thread, daemon=True, name="IOThread")
302
- combining_thread = threading.Thread(target=self._combining_thread, daemon=True, name="CombiningThread")
312
+ combining_thread = ExceptionSafeThread(error_queue=self._combining_queue, target=self._combining_thread, daemon=True, name="CombiningThread")
303
313
  status_thread = threading.Thread(target=self._status_thread, daemon=True, name='StatusThread')
304
314
 
305
315
  for t in tap_threads + cluster_threads + [io_thread, combining_thread]:
@@ -349,13 +359,13 @@ class WISEDataDESYCluster(WiseDataByVisit):
349
359
  )
350
360
  def _wait_for_job(self, t, i):
351
361
  logger.info(f"Waiting on {i}th query of {t} ........")
352
- _job = self.tap_jobs[t][i]
362
+ _job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
353
363
  _job.wait()
354
364
  logger.info(f'{i}th query of {t}: Done!')
355
365
 
356
366
  def _get_results_from_job(self, t, i):
357
367
  logger.debug(f"getting results for {i}th query of {t} .........")
358
- _job = self.tap_jobs[t][i]
368
+ _job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
359
369
  lightcurve = _job.fetch_result().to_table().to_pandas()
360
370
  fn = self._chunk_photometry_cache_filename(t, i)
361
371
  table_nice_name = self.get_db_name(t, nice=True)
@@ -431,8 +441,8 @@ class WISEDataDESYCluster(WiseDataByVisit):
431
441
  # -------------- get results of TAP job for chunk i-1 ------------- #
432
442
  if i > 0:
433
443
  t_before = tables[i - 1]
434
-
435
- if self.tap_jobs[t_before][chunk].phase == "COMPLETED":
444
+ phase = StableAsyncTAPJob(url=self.tap_jobs[t_before][chunk]).phase
445
+ if phase == "COMPLETED":
436
446
  result_method = "_get_results_from_job"
437
447
  result_args = [t_before, chunk]
438
448
  self._io_queue.put((2, result_method, result_args))
@@ -441,7 +451,7 @@ class WISEDataDESYCluster(WiseDataByVisit):
441
451
  else:
442
452
  logger.warning(
443
453
  f"No completion for {chunk}th query of {t_before}! "
444
- f"Phase is {self.tap_jobs[t_before][chunk].phase}!"
454
+ f"Phase is {phase}!"
445
455
  )
446
456
  submit_to_cluster = False
447
457
 
@@ -466,19 +476,16 @@ class WISEDataDESYCluster(WiseDataByVisit):
466
476
  gc.collect()
467
477
 
468
478
  def _move_file_to_storage(self, filename):
469
- dst_fn = filename.replace(data_dir, self._storage_dir)
470
-
471
- dst_dir = os.path.dirname(dst_fn)
472
- if not os.path.isdir(dst_dir):
473
- logger.debug(f"making directory {dst_dir}")
474
- os.makedirs(dst_dir)
479
+ data_dir = str(get_directories()['data_dir'])
480
+ dst_fn = Path(str(filename).replace(str(data_dir), str(self._storage_dir)))
481
+ dst_fn.parent.mkdir(parents=True, exist_ok=True)
475
482
 
476
483
  logger.debug(f"copy {filename} to {dst_fn}")
477
484
 
478
485
  try:
479
486
  shutil.copy2(filename, dst_fn)
480
487
 
481
- if os.path.getsize(filename) == os.path.getsize(dst_fn):
488
+ if Path(filename).stat().st_size == dst_fn.stat().st_size:
482
489
  logger.debug(f"copy successful, removing {filename}")
483
490
  os.remove(filename)
484
491
  else:
@@ -509,8 +516,8 @@ class WISEDataDESYCluster(WiseDataByVisit):
509
516
  self.wait_for_job(job_id)
510
517
  logger.debug(f'cluster done for chunk {chunk} (Cluster job {job_id}).')
511
518
 
512
- log_files = glob.glob(f"./{job_id}_*")
513
- log_files_abs = [os.path.abspath(p) for p in log_files]
519
+ log_files = Path("./").glob(f"{job_id}_*")
520
+ log_files_abs = [p.absolute() for p in log_files]
514
521
  logger.debug(f"moving {len(log_files_abs)} log files to {self.cluster_log_dir}")
515
522
  for f in log_files_abs:
516
523
  shutil.move(f, self.cluster_log_dir)
@@ -708,9 +715,9 @@ class WISEDataDESYCluster(WiseDataByVisit):
708
715
  """
709
716
  Clears the directory where cluster logs are stored
710
717
  """
711
- fns = os.listdir(self.cluster_log_dir)
718
+ fns = self.cluster_log_dir.glob("*")
712
719
  for fn in fns:
713
- os.remove(os.path.join(self.cluster_log_dir, fn))
720
+ (self.cluster_log_dir / fn).unlink()
714
721
 
715
722
  def make_executable_file(self):
716
723
  """
@@ -729,8 +736,8 @@ class WISEDataDESYCluster(WiseDataByVisit):
729
736
  f'--mask_by_position $2'
730
737
  )
731
738
 
732
- logger.debug("writing executable to " + self.executable_filename)
733
- with open(self.executable_filename, "w") as f:
739
+ logger.debug("writing executable to " + str(self.executable_filename))
740
+ with self.executable_filename.open("w") as f:
734
741
  f.write(txt)
735
742
 
736
743
  def get_submit_file_filename(self, ids):
@@ -744,7 +751,7 @@ class WISEDataDESYCluster(WiseDataByVisit):
744
751
  """
745
752
  ids = np.atleast_1d(ids)
746
753
  ids_string = f"{min(ids)}-{max(ids)}"
747
- return os.path.join(self.cluster_dir, f"ids{ids_string}.submit")
754
+ return self.cluster_dir / f"ids{ids_string}.submit"
748
755
 
749
756
  def make_submit_file(
750
757
  self,
@@ -764,6 +771,9 @@ class WISEDataDESYCluster(WiseDataByVisit):
764
771
  """
765
772
 
766
773
  q = "1 job_id in " + ", ".join(np.atleast_1d(job_ids).astype(str))
774
+ d = get_directories()
775
+ data_dir = str(d['data_dir'])
776
+ bigdata_dir = str(d['bigdata_dir'])
767
777
 
768
778
  text = (
769
779
  f"executable = {self.executable_filename} \n"
@@ -939,7 +949,7 @@ class WISEDataDESYCluster(WiseDataByVisit):
939
949
  _lc = lc if plot_binned else None
940
950
 
941
951
  if not fn:
942
- fn = os.path.join(self.plots_dir, f"{parent_sample_idx}_{lum_key}.pdf")
952
+ fn = self.plots_dir / f"{parent_sample_idx}_{lum_key}.pdf"
943
953
 
944
954
  return self._plot_lc(lightcurve=_lc, unbinned_lc=unbinned_lc, interactive=interactive, fn=fn, ax=ax,
945
955
  save=save, lum_key=lum_key, **kwargs)
@@ -1173,10 +1183,8 @@ class WISEDataDESYCluster(WiseDataByVisit):
1173
1183
  chunk_str = "chunks_" + "_".join([str(c) for c in chunks]) \
1174
1184
  if len(chunks) != self.n_chunks \
1175
1185
  else "all_chunks"
1176
- fn = os.path.join(self.plots_dir, f"chi2_plots", lum_key, f"{n}_datapoints_{kind}_{chunk_str}.pdf")
1177
- d = os.path.dirname(fn)
1178
- if not os.path.isdir(d):
1179
- os.makedirs(d)
1186
+ fn = self.plots_dir / f"chi2_plots" / lum_key / f"{n}_datapoints_{kind}_{chunk_str}.pdf"
1187
+ fn.parent.mkdir(parents=True, exist_ok=True)
1180
1188
  logger.debug(f"saving under {fn}")
1181
1189
  fig.savefig(fn)
1182
1190
 
@@ -1337,10 +1345,8 @@ class WISEDataDESYCluster(WiseDataByVisit):
1337
1345
  chunk_str = "chunks_" + "_".join([str(c) for c in chunks]) \
1338
1346
  if len(chunks) != self.n_chunks \
1339
1347
  else "all_chunks"
1340
- fn = os.path.join(self.plots_dir, f"coverage_plots", lum_key, f"{chunk_str}.pdf")
1341
- d = os.path.dirname(fn)
1342
- if not os.path.isdir(d):
1343
- os.makedirs(d)
1348
+ fn = self.plots_dir / f"coverage_plots" / lum_key / f"{chunk_str}.pdf"
1349
+ fn.parent.mkdir(parents=True, exist_ok=True)
1344
1350
  logger.debug(f"saving under {fn}")
1345
1351
  fig.savefig(fn)
1346
1352
 
@@ -1,4 +1,6 @@
1
1
  import abc
2
+ import sys
3
+
2
4
  import backoff
3
5
  import copy
4
6
  import json
@@ -11,6 +13,7 @@ import subprocess
11
13
  import threading
12
14
  import time
13
15
  import tqdm
16
+ from pathlib import Path
14
17
 
15
18
  import astropy.units as u
16
19
  import matplotlib.pyplot as plt
@@ -25,8 +28,8 @@ from astropy.table import Table
25
28
  from astropy.coordinates.angle_utilities import angular_separation, position_angle
26
29
  from sklearn.cluster import HDBSCAN
27
30
 
28
- from timewise.general import cache_dir, plots_dir, output_dir, logger_format, backoff_hndlr
29
- from timewise.utils import StableTAPService
31
+ from timewise.general import get_directories, logger_format, backoff_hndlr
32
+ from timewise.utils import StableAsyncTAPJob, StableTAPService
30
33
 
31
34
  logger = logging.getLogger(__name__)
32
35
 
@@ -53,20 +56,20 @@ class WISEDataBase(abc.ABC):
53
56
  :param parent_sample_default_entries: default entries for the parent sample
54
57
  :type parent_sample_default_entries: dict
55
58
  :param cache_dir: directory for cached data
56
- :type cache_dir: str
59
+ :type cache_dir: Path
57
60
  :param cluster_dir: directory for cluster data
58
61
  :param cluster_log_dir: directory for cluster logs
59
- :type cluster_dir: str
62
+ :type cluster_dir: Path
60
63
  :param output_dir: directory for output data
61
- :type output_dir: str
64
+ :type output_dir: Path
62
65
  :param lightcurve_dir: directory for lightcurve data
63
- :type lightcurve_dir: str
66
+ :type lightcurve_dir: Path
64
67
  :param plots_dir: directory for plots
65
- :type plots_dir: str
68
+ :type plots_dir: Path
66
69
  :param submit_file: file for cluster submission
67
- :type submit_file: str
68
- :param tap_jobs: TAP jobs
69
- :type tap_jobs: list[pyvo.dal.tap.TAPJob]
70
+ :type submit_file: Path
71
+ :param tap_jobs: TAP job URLs
72
+ :type tap_jobs: list[str]
70
73
  :param queue: queue for cluster jobs
71
74
  :type queue: multiprocessing.Queue
72
75
  :param clear_unbinned_photometry_when_binning: whether to clear unbinned photometry when binning
@@ -191,9 +194,11 @@ class WISEDataBase(abc.ABC):
191
194
  'W2': 0.280
192
195
  }
193
196
 
194
- _this_dir = os.path.abspath(os.path.dirname(__file__))
195
- magnitude_zeropoints_corrections = ascii.read(f'{_this_dir}/wise_flux_conversion_correction.dat',
196
- delimiter='\t').to_pandas()
197
+ _this_dir = Path(__file__).absolute().parent
198
+ magnitude_zeropoints_corrections = ascii.read(
199
+ _this_dir / 'wise_flux_conversion_correction.dat',
200
+ delimiter='\t'
201
+ ).to_pandas()
197
202
 
198
203
  band_wavelengths = {
199
204
  'W1': 3.368 * 1e-6 * u.m,
@@ -213,7 +218,7 @@ class WISEDataBase(abc.ABC):
213
218
  parent_sample_wise_skysep_key = 'sep_to_WISE_source'
214
219
 
215
220
  def __init__(self,
216
- base_name,
221
+ base_name: str,
217
222
  parent_sample_class,
218
223
  min_sep_arcsec,
219
224
  n_chunks):
@@ -228,6 +233,8 @@ class WISEDataBase(abc.ABC):
228
233
  :type min_sep_arcsec: float
229
234
  :param n_chunks: number of chunks in declination
230
235
  :type n_chunks: int
236
+ :param tap_url_cache_name: TAP job URLs are stored here to be able to resume them
237
+ :type tap_url_cache_name: str
231
238
  """
232
239
 
233
240
  #######################################################################################
@@ -253,26 +260,26 @@ class WISEDataBase(abc.ABC):
253
260
  # --------------------------- ^^^^ set up parent sample ^^^^ --------------------------- #
254
261
 
255
262
  # set up directories
256
- self.cache_dir = os.path.join(cache_dir, base_name)
257
- self._cache_photometry_dir = os.path.join(self.cache_dir, "photometry")
258
- self.cluster_dir = os.path.join(self.cache_dir, 'cluster')
259
- self.cluster_log_dir = os.path.join(self.cluster_dir, 'logs')
260
- self.output_dir = os.path.join(output_dir, base_name)
261
- self.lightcurve_dir = os.path.join(self.output_dir, "lightcurves")
262
- self.plots_dir = os.path.join(plots_dir, base_name)
263
+ directories = get_directories() # type: dict[str, Path]
264
+ self.cache_dir = directories['cache_dir'] / base_name
265
+ self._cache_photometry_dir = self.cache_dir / "photometry"
266
+ self.cluster_dir = self.cache_dir / 'cluster'
267
+ self.cluster_log_dir = self.cluster_dir / 'logs'
268
+ self.output_dir = directories["output_dir"] / base_name
269
+ self.lightcurve_dir = self.output_dir / "lightcurves"
270
+ self.plots_dir = directories["plots_dir"] / base_name
271
+ self.tap_jobs_cache_dir = self.cache_dir / 'tap_cache'
263
272
 
264
273
  for d in [self.cache_dir, self._cache_photometry_dir, self.cluster_dir, self.cluster_log_dir,
265
274
  self.output_dir, self.lightcurve_dir, self.plots_dir]:
266
- if not os.path.isdir(d):
267
- logger.debug(f"making directory {d}")
268
- os.makedirs(d)
275
+ d.mkdir(parents=True, exist_ok=True)
269
276
 
270
- file_handler = logging.FileHandler(filename=self.cache_dir + '/log.err', mode="a")
277
+ file_handler = logging.FileHandler(filename=self.cache_dir / 'log.err', mode="a")
271
278
  file_handler.setLevel("WARNING")
272
279
  file_handler.setFormatter(logger_format)
273
280
  logger.addHandler(file_handler)
274
281
 
275
- self.submit_file = os.path.join(self.cluster_dir, 'submit.txt')
282
+ self.submit_file = self.cluster_dir / 'submit.txt'
276
283
 
277
284
  # set up result attributes
278
285
  self._split_chunk_key = '__chunk'
@@ -497,7 +504,7 @@ class WISEDataBase(abc.ABC):
497
504
  if err_msg:
498
505
  logger.error(err_msg.decode())
499
506
  process.terminate()
500
- if os.path.isfile(out_file):
507
+ if Path(out_file).is_file():
501
508
  return 1
502
509
  else:
503
510
  return 0
@@ -566,8 +573,8 @@ class WISEDataBase(abc.ABC):
566
573
 
567
574
  dec_intervall_mask = self.chunk_map == chunk_number
568
575
  logger.debug(f"Any selected: {np.any(dec_intervall_mask)}")
569
- _parent_sample_declination_band_file = os.path.join(self.cache_dir, f"parent_sample_chunk{chunk_number}.xml")
570
- _output_file = os.path.join(self.cache_dir, f"parent_sample_chunk{chunk_number}.tbl")
576
+ _parent_sample_declination_band_file = self.cache_dir / f"parent_sample_chunk{chunk_number}.xml"
577
+ _output_file = self.cache_dir / f"parent_sample_chunk{chunk_number}.tbl"
571
578
 
572
579
  additional_keys = (
573
580
  "," + ",".join(additional_columns)
@@ -637,7 +644,7 @@ class WISEDataBase(abc.ABC):
637
644
  # START GET PHOTOMETRY DATA #
638
645
  ###################################
639
646
 
640
- def get_photometric_data(self, tables=None, perc=1, wait=0, service=None, nthreads=100,
647
+ def get_photometric_data(self, tables=None, perc=1, service=None, nthreads=100,
641
648
  chunks=None, overwrite=True, remove_chunks=False, query_type='positional',
642
649
  skip_download=False, mask_by_position=False):
643
650
  """
@@ -645,6 +652,11 @@ class WISEDataBase(abc.ABC):
645
652
 
646
653
  </path/to/timewise/data/dir>/output/<base_name>/lightcurves/binned_lightcurves_<service>.json
647
654
 
655
+ If service is 'tap' then the process exists on the first call to give the jobs running on the IRSA
656
+ servers some time. The job infos are cached and loaded on the next function call. `timewise` will
657
+ then wait on the jobs to finish. If the process is terminated via the keyboard during the waiting
658
+ the TAP connections will also be cached to be resumed at a later time.
659
+
648
660
  :param remove_chunks: remove single chunk files after binning
649
661
  :type remove_chunks: bools
650
662
  :param overwrite: overwrite already existing lightcurves and metadata
@@ -657,8 +669,6 @@ class WISEDataBase(abc.ABC):
657
669
  :type nthreads: int
658
670
  :param service: either of 'gator' or 'tap', selects base on elements per chunk by default
659
671
  :type service: str
660
- :param wait: time in hours to wait after submitting TAP jobs
661
- :type wait: float
662
672
  :param chunks: containing indices of chunks to download
663
673
  :type chunks: list-like
664
674
  :param query_type: 'positional': query photometry based on distance from object, 'by_allwise_id': select all photometry points within a radius of 50 arcsec with the corresponding AllWISE ID
@@ -667,6 +677,8 @@ class WISEDataBase(abc.ABC):
667
677
  :type skip_download: bool
668
678
  :param mask_by_position: if `True` mask single exposures that are too far away from the bulk
669
679
  :type mask_by_position: bool
680
+ :return: The status of the processing
681
+ :rtype: bool
670
682
  """
671
683
 
672
684
  mag = True
@@ -703,32 +715,40 @@ class WISEDataBase(abc.ABC):
703
715
  f"from {tables}")
704
716
 
705
717
  if service == 'tap':
706
- self._query_for_photometry(tables, chunks, wait, mag, flux, nthreads, query_type)
718
+ done = self._query_for_photometry(tables, chunks, mag, flux, nthreads, query_type)
719
+ if not done:
720
+ logger.info("Some TAP jobs still running. Exit here and resume later.")
721
+ return False
707
722
 
708
723
  elif service == 'gator':
709
724
  self._query_for_photometry_gator(tables, chunks, mag, flux, nthreads)
725
+ else:
726
+ raise ValueError(f"Unknown service {service}! Choose one of 'tap' or 'gator'")
710
727
 
711
728
  else:
712
729
  logger.info("skipping download, assume data is already downloaded.")
713
730
 
731
+ logger.info("Download done, processing lightcurves")
714
732
  self._select_individual_lightcurves_and_bin(service=service, chunks=chunks, mask_by_position=mask_by_position)
715
733
  for c in chunks:
716
734
  self.calculate_metadata(service=service, chunk_number=c, overwrite=True)
717
735
 
718
736
  self._combine_data_products(service=service, remove=remove_chunks, overwrite=overwrite)
719
737
 
738
+ return True
739
+
720
740
  def _data_product_filename(self, service, chunk_number=None, jobID=None):
721
741
 
722
742
  n = "timewise_data_product_"
723
743
 
724
744
  if (chunk_number is None) and (jobID is None):
725
- return os.path.join(self.lightcurve_dir, f"{n}{service}.json")
745
+ return self.lightcurve_dir / f"{n}{service}.json"
726
746
  else:
727
747
  fn = f"{n}{service}{self._split_chunk_key}{chunk_number}"
728
748
  if (chunk_number is not None) and (jobID is None):
729
- return os.path.join(self._cache_photometry_dir, fn + ".json")
749
+ return self._cache_photometry_dir / (fn + ".json")
730
750
  else:
731
- return os.path.join(self._cache_photometry_dir, fn + f"_{jobID}.json")
751
+ return self._cache_photometry_dir / (fn + f"_{jobID}.json")
732
752
 
733
753
  @staticmethod
734
754
  def _verify_contains_lightcurves(data_product):
@@ -880,7 +900,7 @@ class WISEDataBase(abc.ABC):
880
900
  _ending = '.xml' if gator_input else'.tbl'
881
901
  fn = f"{self._cached_raw_photometry_prefix}_{table_name}{_additional_neowise_query}{_gator_input}" \
882
902
  f"{self._split_chunk_key}{chunk_number}{_ending}"
883
- return os.path.join(self._cache_photometry_dir, fn)
903
+ return self._cache_photometry_dir / fn
884
904
 
885
905
  def _thread_query_photometry_gator(self, chunk_number, table_name, mag, flux):
886
906
  _infile = self._gator_chunk_photometry_cache_filename(table_name, chunk_number, gator_input=True)
@@ -943,9 +963,11 @@ class WISEDataBase(abc.ABC):
943
963
  for t in threads:
944
964
  t.join()
945
965
 
966
+ return True
967
+
946
968
  def _get_unbinned_lightcurves_gator(self, chunk_number, clear=False):
947
969
  # load only the files for this chunk
948
- fns = [os.path.join(self._cache_photometry_dir, fn)
970
+ fns = [self._cache_photometry_dir / fn
949
971
  for fn in os.listdir(self._cache_photometry_dir)
950
972
  if (fn.startswith(self._cached_raw_photometry_prefix) and
951
973
  fn.endswith(f"{self._split_chunk_key}{chunk_number}.tbl"))
@@ -957,12 +979,12 @@ class WISEDataBase(abc.ABC):
957
979
  for fn in fns:
958
980
  data_table = Table.read(fn, format='ipac').to_pandas()
959
981
 
960
- t = 'allwise_p3as_mep' if 'allwise' in fn else 'neowiser_p1bs_psd'
982
+ t = 'allwise_p3as_mep' if 'allwise' in str(fn) else 'neowiser_p1bs_psd'
961
983
  nice_name = self.get_db_name(t, nice=True)
962
984
  cols = {'index_01': self._tap_orig_id_key}
963
985
  cols.update(self.photometry_table_keymap[nice_name]['mag'])
964
986
  cols.update(self.photometry_table_keymap[nice_name]['flux'])
965
- if 'allwise' in fn:
987
+ if 'allwise' in str(fn):
966
988
  cols['cntr_mf'] = 'allwise_cntr'
967
989
 
968
990
  data_table = data_table.rename(columns=cols)
@@ -982,6 +1004,62 @@ class WISEDataBase(abc.ABC):
982
1004
  # START using TAP to get photometry #
983
1005
  # ---------------------------------------- #
984
1006
 
1007
+ @property
1008
+ def tap_cache_filenames(self):
1009
+ return (
1010
+ self.tap_jobs_cache_dir / f"tap_jobs.json",
1011
+ self.tap_jobs_cache_dir / f"queue.json"
1012
+ )
1013
+
1014
+ def dump_tap_cache(self):
1015
+ self.tap_jobs_cache_dir.mkdir(parents=True, exist_ok=True)
1016
+
1017
+ tap_jobs_fn, queue_fn = self.tap_cache_filenames
1018
+ logger.debug(f"saving TAP jobs to {tap_jobs_fn}")
1019
+ tap_jobs_fn.parent.mkdir(parents=True, exist_ok=True)
1020
+ with tap_jobs_fn.open("w") as f:
1021
+ json.dump(self.tap_jobs, f, indent=4)
1022
+
1023
+ queue_fn.parent.mkdir(parents=True, exist_ok=True)
1024
+ logger.debug(f"saving queue to {queue_fn}")
1025
+ with queue_fn.open("w") as f:
1026
+ json.dump(list(self.queue.queue), f, indent=4)
1027
+
1028
+ def load_tap_cache(self):
1029
+ tap_jobs_fn, queue_fn = self.tap_cache_filenames
1030
+
1031
+ logger.debug(f"loading TAP jobs from {tap_jobs_fn}")
1032
+ if tap_jobs_fn.is_file():
1033
+ with tap_jobs_fn.open("r") as f:
1034
+ tap_jobs_json = json.load(f)
1035
+ # JSON keys are always strings while we need the chunk numbers
1036
+ # to be integers in the dictionary
1037
+ self.tap_jobs = {
1038
+ t: {int(i): url for i, url in v.items()} for t, v in tap_jobs_json.items()
1039
+ }
1040
+ logger.debug(f"removing {tap_jobs_fn}")
1041
+ tap_jobs_fn.unlink()
1042
+ else:
1043
+ logger.warning(f"No file {tap_jobs_fn}")
1044
+ self.tap_jobs = None
1045
+
1046
+ logger.debug(f"loading queue from {queue_fn}")
1047
+ if queue_fn.is_file():
1048
+ with queue_fn.open("r") as f:
1049
+ ql = json.load(f)
1050
+ logger.debug(f"loaded {len(ql)} queue elements")
1051
+ self.queue = queue.Queue()
1052
+ for q in ql:
1053
+ self.queue.put(q)
1054
+ logger.debug(f"removing {queue_fn}")
1055
+ queue_fn.unlink()
1056
+ else:
1057
+ logger.warning(f"No file {queue_fn}")
1058
+ self.queue = None
1059
+
1060
+ cache_exists = (self.tap_jobs is not None) and (self.queue is not None)
1061
+ return cache_exists
1062
+
985
1063
  def _get_photometry_query_string(self, table_name, mag, flux, query_type):
986
1064
  """
987
1065
  Construct a query string to submit to IRSA
@@ -1069,17 +1147,26 @@ class WISEDataBase(abc.ABC):
1069
1147
  try:
1070
1148
  job = self.service.submit_job(qstring, uploads={'ids': Table(tab_d)})
1071
1149
  job.run()
1150
+ logger.debug(job.url)
1151
+ time.sleep(5) # wait a bit until checking phase
1072
1152
 
1073
1153
  if isinstance(job.phase, type(None)):
1074
- raise vo.dal.DALServiceError(f"Job submission failed. No phase!")
1154
+ raise vo.dal.DALServiceError(
1155
+ f"Job submission failed. No phase!"
1156
+ f"response: {job.submit_response}"
1157
+ )
1075
1158
 
1076
1159
  logger.info(f'submitted job for {t} for chunk {i}: ')
1077
1160
  logger.debug(f'Job: {job.url}; {job.phase}')
1078
- self.tap_jobs[t][i] = job
1161
+ self.tap_jobs[t][i] = job.url
1079
1162
  self.queue.put((t, i))
1080
1163
  break
1081
1164
 
1082
- except (requests.exceptions.ConnectionError, vo.dal.exceptions.DALServiceError) as e:
1165
+ except (
1166
+ requests.exceptions.ConnectionError,
1167
+ vo.dal.exceptions.DALServiceError,
1168
+ requests.HTTPError
1169
+ ) as e:
1083
1170
  wait = 60
1084
1171
  N_tries -= 1
1085
1172
  logger.warning(f"{chunk_number}th query of {table_name}: Could not submit TAP job!\n"
@@ -1091,7 +1178,7 @@ class WISEDataBase(abc.ABC):
1091
1178
  _additional_neowise_query = '_neowise_gator' if additional_neowise_query else ''
1092
1179
  fn = f"{self._cached_raw_photometry_prefix}_{table_name}{_additional_neowise_query}" \
1093
1180
  f"{self._split_chunk_key}{chunk_number}.csv"
1094
- return os.path.join(self._cache_photometry_dir, fn)
1181
+ return self._cache_photometry_dir / fn
1095
1182
 
1096
1183
  @staticmethod
1097
1184
  def _give_up_tap(e):
@@ -1107,7 +1194,7 @@ class WISEDataBase(abc.ABC):
1107
1194
  def _thread_wait_and_get_results(self, t, i):
1108
1195
  logger.info(f"Waiting on {i}th query of {t} ........")
1109
1196
 
1110
- _job = self.tap_jobs[t][i]
1197
+ _job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
1111
1198
  _job.wait()
1112
1199
  logger.info(f'{i}th query of {t}: Done!')
1113
1200
 
@@ -1136,7 +1223,7 @@ class WISEDataBase(abc.ABC):
1136
1223
  logger.debug(f"No more queue. exiting")
1137
1224
  break
1138
1225
 
1139
- job = self.tap_jobs[t][i]
1226
+ job = StableAsyncTAPJob(url=self.tap_jobs[t][i])
1140
1227
 
1141
1228
  _ntries = 10
1142
1229
  while True:
@@ -1179,38 +1266,50 @@ class WISEDataBase(abc.ABC):
1179
1266
 
1180
1267
  try:
1181
1268
  self.queue.join()
1269
+ logger.info('all tap_jobs done!')
1182
1270
  except KeyboardInterrupt:
1183
- pass
1271
+ self.dump_tap_cache()
1272
+ return False
1273
+ finally:
1274
+ for i, t in enumerate(threads):
1275
+ logger.debug(f"{i}th thread alive: {t.is_alive()}")
1276
+ for t in threads:
1277
+ t.join()
1278
+ self.tap_jobs = None
1279
+ del threads
1184
1280
 
1185
- logger.info('all tap_jobs done!')
1186
- for i, t in enumerate(threads):
1187
- logger.debug(f"{i}th thread alive: {t.is_alive()}")
1281
+ return True
1188
1282
 
1189
- for t in threads:
1190
- t.join()
1191
- self.tap_jobs = None
1192
- del threads
1283
+ def _query_for_photometry(self, tables, chunks, mag, flux, nthreads, query_type):
1284
+ # ----------------------------------------------------------------------
1285
+ # Load TAP cache if it exists
1286
+ # ----------------------------------------------------------------------
1287
+ cache_exists = self.load_tap_cache()
1193
1288
 
1194
- def _query_for_photometry(self, tables, chunks, wait, mag, flux, nthreads, query_type):
1195
1289
  # ----------------------------------------------------------------------
1196
1290
  # Do the query
1197
1291
  # ----------------------------------------------------------------------
1198
- self.tap_jobs = dict()
1199
- self.queue = queue.Queue()
1200
- tables = np.atleast_1d(tables)
1201
-
1202
- for t in tables:
1203
- self.tap_jobs[t] = dict()
1204
- for i in chunks:
1205
- self._submit_job_to_TAP(i, t, mag, flux, query_type)
1206
- time.sleep(5)
1292
+ if not cache_exists:
1293
+ self.tap_jobs = dict()
1294
+ self.queue = queue.Queue() if self.queue is None else self.queue
1295
+ tables = np.atleast_1d(tables)
1296
+
1297
+ for t in tables:
1298
+ self.tap_jobs[t] = dict()
1299
+ for i in chunks:
1300
+ self._submit_job_to_TAP(i, t, mag, flux, query_type)
1301
+ time.sleep(5)
1302
+
1303
+ logger.info(f'added {self.queue.qsize()} tasks to queue')
1304
+ self.dump_tap_cache()
1305
+ logger.info(f"wait some time to give tap_jobs some time")
1306
+ return False
1207
1307
 
1208
- logger.info(f'added {self.queue.qsize()} tasks to queue')
1209
- logger.info(f"wait for {wait} hours to give tap_jobs some time")
1210
- time.sleep(wait * 3600)
1308
+ logger.info(f'starting worker threads to retrieve results, {self.queue.qsize()} tasks in queue')
1211
1309
  nthreads = min(len(tables) * len(chunks), nthreads)
1212
- self._run_tap_worker_threads(nthreads)
1310
+ success = self._run_tap_worker_threads(nthreads)
1213
1311
  self.queue = None
1312
+ return success
1214
1313
 
1215
1314
  # ----------------------------------------------------------------------
1216
1315
  # select individual lightcurves and bin
@@ -1263,7 +1362,7 @@ class WISEDataBase(abc.ABC):
1263
1362
  :type clear: bool, optional
1264
1363
  """
1265
1364
  # load only the files for this chunk
1266
- fns = [os.path.join(self._cache_photometry_dir, fn)
1365
+ fns = [self._cache_photometry_dir / fn
1267
1366
  for fn in os.listdir(self._cache_photometry_dir)
1268
1367
  if (fn.startswith(self._cached_raw_photometry_prefix) and fn.endswith(
1269
1368
  f"{self._split_chunk_key}{chunk_number}.csv"
@@ -1715,9 +1814,9 @@ class WISEDataBase(abc.ABC):
1715
1814
  """
1716
1815
 
1717
1816
  logger.info(f"getting position masks for {service}, chunk {chunk_number}")
1718
- fn = os.path.join(self.cache_dir, "position_masks", f"{service}_chunk{chunk_number}.json")
1817
+ fn = self.cache_dir / "position_masks" / f"{service}_chunk{chunk_number}.json"
1719
1818
 
1720
- if not os.path.isfile(fn):
1819
+ if not fn.is_file():
1721
1820
  logger.debug(f"No file {fn}. Calculating position masks.")
1722
1821
 
1723
1822
  if service == "tap":
@@ -1746,10 +1845,7 @@ class WISEDataBase(abc.ABC):
1746
1845
  if len(bad_indices) > 0:
1747
1846
  position_masks[str(i)] = bad_indices
1748
1847
 
1749
- d = os.path.dirname(fn)
1750
- if not os.path.isdir(d):
1751
- os.makedirs(d, exist_ok=True)
1752
-
1848
+ fn.parent.mkdir(exist_ok=True, parents=True)
1753
1849
  with open(fn, "w") as f:
1754
1850
  json.dump(position_masks, f)
1755
1851
 
@@ -1820,7 +1916,7 @@ class WISEDataBase(abc.ABC):
1820
1916
  _lc = lc if plot_binned else None
1821
1917
 
1822
1918
  if not fn:
1823
- fn = os.path.join(self.plots_dir, f"{parent_sample_idx}_{lum_key}.pdf")
1919
+ fn = self.plots_dir / f"{parent_sample_idx}_{lum_key}.pdf"
1824
1920
 
1825
1921
  return self._plot_lc(lightcurve=_lc, unbinned_lc=unbinned_lc, interactive=interactive, fn=fn, ax=ax,
1826
1922
  save=save, lum_key=lum_key, **kwargs)
@@ -662,7 +662,7 @@ class WiseDataByVisit(WISEDataBase):
662
662
 
663
663
  if save:
664
664
  if fn is None:
665
- fn = os.path.join(self.plots_dir, f"{ind}_binning_diag_{which}cutout.pdf")
665
+ fn = self.plots_dir / f"{ind}_binning_diag_{which}cutout.pdf"
666
666
  logger.debug(f"saving under {fn}")
667
667
  fig.savefig(fn)
668
668
 
@@ -1,37 +1,41 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: timewise
3
- Version: 0.4.12
3
+ Version: 0.5.3
4
4
  Summary: A small package to download infrared data from the WISE satellite
5
- Home-page: https://github.com/JannisNe/timewise
6
5
  License: MIT
7
6
  Author: Jannis Necker
8
7
  Author-email: jannis.necker@gmail.com
9
- Requires-Python: >=3.8,<3.12
8
+ Requires-Python: >=3.9,<3.12
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.8
13
11
  Classifier: Programming Language :: Python :: 3.9
14
12
  Classifier: Programming Language :: Python :: 3.10
15
13
  Classifier: Programming Language :: Python :: 3.11
16
- Requires-Dist: astropy (>=5.1,<6.0)
14
+ Provides-Extra: dev
15
+ Provides-Extra: docs
16
+ Requires-Dist: astropy (>=5.1,<6.0.0)
17
17
  Requires-Dist: backoff (>=2.1.2,<3.0.0)
18
+ Requires-Dist: coveralls (>=3.3.1,<4.0.0) ; extra == "dev"
18
19
  Requires-Dist: jupyter[jupyter] (>=1.0.0,<2.0.0)
19
20
  Requires-Dist: jupyterlab[jupyter] (>=4.0.6,<5.0.0)
20
21
  Requires-Dist: matplotlib (>=3.5.3,<4.0.0)
22
+ Requires-Dist: myst-parser (>=1,<3) ; extra == "docs"
21
23
  Requires-Dist: numpy (>=1.23.2,<2.0.0)
22
24
  Requires-Dist: pandas (>=1.4.3,<3.0.0)
23
25
  Requires-Dist: pydantic (>=1.9.0,<2.0.0)
24
- Requires-Dist: pyvo (>=1.4.1,<2.0.0)
26
+ Requires-Dist: pytest (>=7.2.2,<8.0.0) ; extra == "dev"
27
+ Requires-Dist: pyvo (>=1.7.0,<2.0.0)
25
28
  Requires-Dist: requests (>=2.28.1,<3.0.0)
26
- Requires-Dist: scikit-image (>=0.19.3,<0.21.0)
29
+ Requires-Dist: scikit-image (>=0.19.3,<0.22.0)
27
30
  Requires-Dist: scikit-learn (>=1.3.0,<2.0.0)
28
- Requires-Dist: seaborn (>=0.11.2,<0.13.0)
31
+ Requires-Dist: seaborn (>=0.11.2,<0.14.0)
32
+ Requires-Dist: sphinx-rtd-theme (>=1.3.0,<2.0.0) ; extra == "docs"
29
33
  Requires-Dist: tqdm (>=4.64.0,<5.0.0)
30
34
  Requires-Dist: urllib3 (==1.26.15)
31
35
  Requires-Dist: virtualenv (>=20.16.3,<21.0.0)
32
36
  Project-URL: Bug Tracker, https://github.com/JannisNe/timewise/issues
33
37
  Project-URL: Documentation, https://timewise.readthedocs.io/en/latest
34
- Project-URL: Repository, https://github.com/JannisNe/timewise
38
+ Project-URL: Homepage, https://github.com/JannisNe/timewise
35
39
  Description-Content-Type: text/markdown
36
40
 
37
41
  [![CI](https://github.com/JannisNe/timewise/actions/workflows/continous_integration.yml/badge.svg)](https://github.com/JannisNe/timewise/actions/workflows/continous_integration.yml)
@@ -0,0 +1,17 @@
1
+ timewise/__init__.py,sha256=mm7QFyMaZmalMXJJy28ljsyL7FJkxWyi5WXncAfmQ2U,203
2
+ timewise/big_parent_sample.py,sha256=fB3JR7lGa2cKiJRYxafvu4_SaURB7cvu28ZWxyXBSVs,3443
3
+ timewise/cli.py,sha256=LRBR3cOd_qBEpGkyP7tbJBCKvO0XxRQA8BgoMZfw08k,533
4
+ timewise/config_loader.py,sha256=wvOV4zFXNQBeWU9Qlf1dazhPtz12uEUhNY5ztUeNe6U,5690
5
+ timewise/general.py,sha256=rGpECBpSjD8aha1xFVAtrsU1JGO_1joibOfvZF-aAbk,1640
6
+ timewise/parent_sample_base.py,sha256=AbM4WQnQuqxgovqagNJVmVoIxS3HCEMKpc87J04DNsc,3400
7
+ timewise/point_source_utils.py,sha256=4dmxfujrrNxDLkh2rVziSR-NNaHzrKFa8xgx_Lj-ZNE,2171
8
+ timewise/utils.py,sha256=DkhsWbwbkHI7DmQQk1jGJY1i0G3Zhl1Qfmiagb8Tfik,18509
9
+ timewise/wise_bigdata_desy_cluster.py,sha256=fuTaXvYkoNqdh8OctNcVCqmzhfzJAajCfRiJh3RSX8w,56623
10
+ timewise/wise_data_base.py,sha256=Zq9NzxZKbMzL6QelG2rzkcr-7cEHLnK7n14TY_3Cs-o,86476
11
+ timewise/wise_data_by_visit.py,sha256=hNV3kZd9Lh062ohOT2vNXy9nKYSBIrlbbi3dwDDTaKA,29116
12
+ timewise/wise_flux_conversion_correction.dat,sha256=XLnYqk0g1NVthVSNGsKlqinzKI7QUwJidO-qg4tHXKU,1095
13
+ timewise-0.5.3.dist-info/LICENSE,sha256=sVoNJWiTlH-NarJx0wdsob468Pg3JE6vIIgll4lCa3E,1070
14
+ timewise-0.5.3.dist-info/METADATA,sha256=Mb5kuRHh8D3dgjlWRHxmgz40zgeUnhv9tnSk1Vnvgdg,2674
15
+ timewise-0.5.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
16
+ timewise-0.5.3.dist-info/entry_points.txt,sha256=yIWgzM0CZCJBrSR9-zbQW9d8MpFae0KlRVnZXF8rZow,54
17
+ timewise-0.5.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.0.0
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,17 +0,0 @@
1
- timewise/__init__.py,sha256=gnpyn_jjCr_5brbB3Xi6G1eDotPNrlP6PINA-UAHGnM,204
2
- timewise/big_parent_sample.py,sha256=B7w7TMJx2mohsVfY0RiBHQU9N6mn_MDqrB7Ica0aafU,3480
3
- timewise/cli.py,sha256=LRBR3cOd_qBEpGkyP7tbJBCKvO0XxRQA8BgoMZfw08k,533
4
- timewise/config_loader.py,sha256=gqu881fSoBIzK_-KSVSCDV1pLPuD_g0AHbWovKFtWEA,5675
5
- timewise/general.py,sha256=cadgPIACEWpFsa4yncFb-nboGgXmJ1TicjPQalB_SAU,1472
6
- timewise/parent_sample_base.py,sha256=fHRBS791w51JCEwkfrCMyzXSyyc94NNSu-J9AfRFf2E,3298
7
- timewise/point_source_utils.py,sha256=4dmxfujrrNxDLkh2rVziSR-NNaHzrKFa8xgx_Lj-ZNE,2171
8
- timewise/utils.py,sha256=-tS5jrRnMYa7tosWvs_Jitjv8qPfi9VZE-o0-DMHKEQ,15144
9
- timewise/wise_bigdata_desy_cluster.py,sha256=8Zd1jCgVpR8nefGrmHxFtyBasP2oaOZ04NXUuEikyhI,56173
10
- timewise/wise_data_base.py,sha256=PrOhI7RtgVgdAaZKzGcmiFAIPhwLjg2aT_y9opG8DhQ,82744
11
- timewise/wise_data_by_visit.py,sha256=5fR5qaDz_liWJaBwnDhsEx--yoyh3oxPKKpXGpEsXmk,29129
12
- timewise/wise_flux_conversion_correction.dat,sha256=XLnYqk0g1NVthVSNGsKlqinzKI7QUwJidO-qg4tHXKU,1095
13
- timewise-0.4.12.dist-info/LICENSE,sha256=sVoNJWiTlH-NarJx0wdsob468Pg3JE6vIIgll4lCa3E,1070
14
- timewise-0.4.12.dist-info/METADATA,sha256=s4qqipfvBVCdeFyTy5ko5d7dsjXlXYPFWOcpHFEdFoc,2496
15
- timewise-0.4.12.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
16
- timewise-0.4.12.dist-info/entry_points.txt,sha256=yIWgzM0CZCJBrSR9-zbQW9d8MpFae0KlRVnZXF8rZow,54
17
- timewise-0.4.12.dist-info/RECORD,,