timewise 1.0.0a7__tar.gz → 1.0.0a9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {timewise-1.0.0a7 → timewise-1.0.0a9}/PKG-INFO +5 -4
  2. {timewise-1.0.0a7 → timewise-1.0.0a9}/README.md +3 -3
  3. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/alert/TimewiseAlertSupplier.py +2 -2
  4. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/alert/load/TimewiseFileLoader.py +11 -1
  5. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/ingest/TiMongoMuxer.py +130 -16
  6. {timewise-1.0.0a7 → timewise-1.0.0a9}/pyproject.toml +2 -1
  7. timewise-1.0.0a9/timewise/__init__.py +1 -0
  8. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/backend/base.py +2 -0
  9. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/backend/filesystem.py +3 -0
  10. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/chunking.py +18 -1
  11. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/cli.py +9 -1
  12. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/io/config.py +14 -10
  13. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/io/download.py +37 -31
  14. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/io/stable_tap.py +24 -4
  15. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/plot/sdss.py +1 -3
  16. timewise-1.0.0a9/timewise/query/__init__.py +11 -0
  17. timewise-1.0.0a9/timewise/query/by_allwise_cntr_and_position.py +49 -0
  18. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/query/positional.py +0 -1
  19. timewise-1.0.0a9/timewise/tables/__init__.py +11 -0
  20. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/tables/allwise_p3as_mep.py +3 -1
  21. timewise-1.0.0a9/timewise/tables/allwise_p3as_psd.py +24 -0
  22. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/tables/neowiser_p1bs_psd.py +3 -1
  23. timewise-1.0.0a7/timewise/__init__.py +0 -1
  24. timewise-1.0.0a7/timewise/query/__init__.py +0 -6
  25. timewise-1.0.0a7/timewise/tables/__init__.py +0 -10
  26. timewise-1.0.0a7/timewise/util/backoff.py +0 -12
  27. {timewise-1.0.0a7 → timewise-1.0.0a9}/LICENSE +0 -0
  28. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/ingest/TiCompilerOptions.py +0 -0
  29. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/ingest/TiDataPointShaper.py +0 -0
  30. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/ingest/tags.py +0 -0
  31. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/t1/T1HDBSCAN.py +0 -0
  32. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/t1/TimewiseFilter.py +0 -0
  33. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/t2/T2StackVisits.py +0 -0
  34. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/util/AuxDiagnosticPlotter.py +0 -0
  35. {timewise-1.0.0a7 → timewise-1.0.0a9}/ampel/timewise/util/pdutil.py +0 -0
  36. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/backend/__init__.py +0 -0
  37. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/config.py +0 -0
  38. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/io/__init__.py +0 -0
  39. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/plot/__init__.py +0 -0
  40. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/plot/diagnostic.py +0 -0
  41. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/plot/lightcurve.py +0 -0
  42. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/plot/panstarrs.py +0 -0
  43. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/process/__init__.py +0 -0
  44. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/process/config.py +0 -0
  45. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/process/interface.py +0 -0
  46. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/process/keys.py +0 -0
  47. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/process/stacking.py +0 -0
  48. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/process/template.yml +0 -0
  49. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/query/base.py +0 -0
  50. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/tables/base.py +0 -0
  51. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/types.py +0 -0
  52. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/util/csv_utils.py +0 -0
  53. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/util/error_threading.py +0 -0
  54. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/util/path.py +0 -0
  55. {timewise-1.0.0a7 → timewise-1.0.0a9}/timewise/util/visits.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: timewise
3
- Version: 1.0.0a7
3
+ Version: 1.0.0a9
4
4
  Summary: Download WISE infrared data for many objects and process them with AMPEL
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -27,6 +27,7 @@ Requires-Dist: coveralls (>=3.3.1,<4.0.0) ; extra == "dev"
27
27
  Requires-Dist: jupyter[jupyter] (>=1.0.0,<2.0.0)
28
28
  Requires-Dist: jupyterlab[jupyter] (>=4.0.6,<5.0.0)
29
29
  Requires-Dist: matplotlib (>=3.5.3,<4.0.0)
30
+ Requires-Dist: mongomock (>=4.3.0,<5.0.0) ; extra == "dev"
30
31
  Requires-Dist: mypy (>=1.18.2,<2.0.0) ; extra == "dev"
31
32
  Requires-Dist: myst-parser (>=1,<3) ; extra == "docs"
32
33
  Requires-Dist: numpy (>=1.23.2,<2.0.0)
@@ -63,7 +64,7 @@ Description-Content-Type: text/markdown
63
64
  This package downloads WISE data for positions on the sky and stacks single-exposure photometry per visit
64
65
 
65
66
  ## Prerequisites
66
- Python version 3.11.
67
+ Python version 3.11, 3.12 or 3.13.
67
68
 
68
69
  If you want to not only download individual exposure photometry but also stack detections per visit (see below),
69
70
  you must have access to a running [MongoDB](https://www.mongodb.com/)*.
@@ -77,12 +78,12 @@ to get the MongoDB community edition. </sub>
77
78
  ### If you use timewise only for downloading
78
79
  The package can be installed via `pip` (but make sure to install the v1 pre-release):
79
80
  ```bash
80
- pip install --pre timewise==1.0.0a6
81
+ pip install --pre timewise==1.0.0a9
81
82
  ```
82
83
  ### If you use timewise also for stacking individual exposures
83
84
  You must install with the `ampel` extra:
84
85
  ```bash
85
- pip install --pre 'timewise[ampel]==1.0.0a6'
86
+ pip install --pre 'timewise[ampel]==1.0.0a9'
86
87
  ```
87
88
  To tell AMPEL which modules, aka units, to use, build the corresponding configuration file:
88
89
  ```bash
@@ -10,7 +10,7 @@
10
10
  This package downloads WISE data for positions on the sky and stacks single-exposure photometry per visit
11
11
 
12
12
  ## Prerequisites
13
- Python version 3.11.
13
+ Python version 3.11, 3.12 or 3.13.
14
14
 
15
15
  If you want to not only download individual exposure photometry but also stack detections per visit (see below),
16
16
  you must have access to a running [MongoDB](https://www.mongodb.com/)*.
@@ -24,12 +24,12 @@ to get the MongoDB community edition. </sub>
24
24
  ### If you use timewise only for downloading
25
25
  The package can be installed via `pip` (but make sure to install the v1 pre-release):
26
26
  ```bash
27
- pip install --pre timewise==1.0.0a6
27
+ pip install --pre timewise==1.0.0a9
28
28
  ```
29
29
  ### If you use timewise also for stacking individual exposures
30
30
  You must install with the `ampel` extra:
31
31
  ```bash
32
- pip install --pre 'timewise[ampel]==1.0.0a6'
32
+ pip install --pre 'timewise[ampel]==1.0.0a9'
33
33
  ```
34
34
  To tell AMPEL which modules, aka units, to use, build the corresponding configuration file:
35
35
  ```bash
@@ -71,8 +71,8 @@ class TimewiseAlertSupplier(BaseAlertSupplier, AmpelABC):
71
71
 
72
72
  move = {
73
73
  c: c.replace("_ep", "")
74
- for c in columns_to_rename
75
- if c.replace("_ep", "") in table.columns
74
+ for c in table.columns
75
+ if (c.replace("_ep", "") in table.columns) and (c.endswith("_ep"))
76
76
  }
77
77
  if move:
78
78
  # In this case, the columns already exists because the neowise data is present
@@ -32,6 +32,9 @@ class TimewiseFileLoader(AbsAlertLoader[Dict], AmpelABC):
32
32
 
33
33
  chunks: list[int] | None = None
34
34
 
35
+ # optionally skip files that are missing
36
+ skip_missing_files: bool = False
37
+
35
38
  def __init__(self, **kwargs) -> None:
36
39
  super().__init__(**kwargs)
37
40
 
@@ -81,7 +84,14 @@ class TimewiseFileLoader(AbsAlertLoader[Dict], AmpelABC):
81
84
  data = []
82
85
  for task in tasks:
83
86
  self.logger.debug(f"reading {task}")
84
- idata = backend.load_data(task)
87
+ try:
88
+ idata = backend.load_data(task)
89
+ except FileNotFoundError as e:
90
+ if self.skip_missing_files:
91
+ self.logger.warn(f"file for task {task} not found, skipping...")
92
+ continue
93
+ else:
94
+ raise e
85
95
 
86
96
  # add table name
87
97
  idata["table_name"] = (
@@ -8,13 +8,20 @@
8
8
 
9
9
  from bisect import bisect_right
10
10
  from contextlib import suppress
11
- from typing import Any
12
-
11
+ from typing import Any, Sequence
13
12
 
14
13
  from ampel.abstract.AbsT0Muxer import AbsT0Muxer
15
14
  from ampel.content.DataPoint import DataPoint
16
- from ampel.types import DataPointId, StockId
15
+ from ampel.model.operator.AllOf import AllOf
16
+ from ampel.model.operator.AnyOf import AnyOf
17
+ from ampel.types import ChannelId, DataPointId, StockId
17
18
  from ampel.util.mappings import unflatten_dict
19
+ from astropy.table import Table
20
+ from pydantic import TypeAdapter
21
+ from timewise.io.stable_tap import StableTAPService
22
+ from timewise.query import QueryType
23
+ from timewise.tables.allwise_p3as_mep import allwise_p3as_mep
24
+ from timewise.types import TYPE_MAP
18
25
 
19
26
 
20
27
  class ConcurrentUpdateError(Exception):
@@ -32,7 +39,7 @@ class TiMongoMuxer(AbsT0Muxer):
32
39
  """
33
40
 
34
41
  # Standard projection used when checking DB for existing PPS/ULS
35
- projection = {
42
+ projection: dict[str, int] = {
36
43
  "_id": 0,
37
44
  "id": 1,
38
45
  "tag": 1,
@@ -51,6 +58,13 @@ class TiMongoMuxer(AbsT0Muxer):
51
58
  "body.dec": 1,
52
59
  }
53
60
 
61
+ channel: None | ChannelId | AnyOf[ChannelId] | AllOf[ChannelId] = None
62
+
63
+ unique_key: list[str] = ["mjd", "ra", "dec"]
64
+
65
+ # URL of tap service for query of AllWISE Source Table
66
+ tap_service_url: str = "https://irsa.ipac.caltech.edu/TAP"
67
+
54
68
  def __init__(self, **kwargs) -> None:
55
69
  super().__init__(**kwargs)
56
70
 
@@ -58,6 +72,11 @@ class TiMongoMuxer(AbsT0Muxer):
58
72
  self._photo_col = self.context.db.get_collection("t0")
59
73
  self._projection_spec = unflatten_dict(self.projection)
60
74
 
75
+ self._tap_service = StableTAPService(self.tap_service_url)
76
+
77
+ self._allwise_source_cntr: list[str] = []
78
+ self._not_allwise_source_cntr: list[str] = []
79
+
61
80
  def process(
62
81
  self, dps: list[DataPoint], stock_id: None | StockId = None
63
82
  ) -> tuple[None | list[DataPoint], None | list[DataPoint]]:
@@ -79,7 +98,76 @@ class TiMongoMuxer(AbsT0Muxer):
79
98
 
80
99
  # NB: this 1-liner is a separate method to provide a patch point for race condition testing
81
100
  def _get_dps(self, stock_id: None | StockId) -> list[DataPoint]:
82
- return list(self._photo_col.find({"stock": stock_id}, self.projection))
101
+ if self.channel is not None:
102
+ if isinstance(self.channel, ChannelId):
103
+ channel_query: (
104
+ ChannelId | dict[str, Sequence[ChannelId | AllOf[ChannelId]]]
105
+ ) = self.channel
106
+ elif isinstance(self.channel, AnyOf):
107
+ channel_query = {"$in": self.channel.any_of}
108
+ elif isinstance(self.channel, AllOf):
109
+ channel_query = {"$all": self.channel.all_of}
110
+ else:
111
+ # should not happen
112
+ raise TypeError()
113
+ _channel = {"channel": channel_query}
114
+ else:
115
+ _channel = {}
116
+ query = {"stock": stock_id, **_channel}
117
+ return list(self._photo_col.find(query, self.projection))
118
+
119
+ def _check_cntrs(self, dps: Sequence[DataPoint]) -> None:
120
+ # assemble query
121
+ query_config = {
122
+ "type": "by_allwise_cntr_and_position",
123
+ "radius_arcsec": 10,
124
+ "columns": ["cntr"],
125
+ "constraints": [],
126
+ "table": {"name": "allwise_p3as_psd"},
127
+ }
128
+ query: QueryType = TypeAdapter(QueryType).validate_python(query_config)
129
+
130
+ # load datapoints into astropy table
131
+ upload = Table([dp["body"] for dp in dps])
132
+ upload["allwise_cntr"] = upload[allwise_p3as_mep.allwise_cntr_column]
133
+ upload[query.original_id_key] = [dp["id"] for dp in dps]
134
+ for key, dtype in query.input_columns.items():
135
+ upload[key] = upload[key].astype(TYPE_MAP[dtype])
136
+ for key in upload.colnames:
137
+ if key not in query.input_columns:
138
+ upload.remove_column(key)
139
+
140
+ # run query
141
+ self.logger.info("Querying AllWISE Source Table for MEP CNTRs ...")
142
+ res = self._tap_service.run_sync(
143
+ query.adql, uploads={query.upload_name: upload}
144
+ )
145
+
146
+ # update internal state
147
+ res_cntr = res.to_table()["cntr"].astype(str)
148
+ self._allwise_source_cntr.extend(list(res_cntr))
149
+ self._not_allwise_source_cntr.extend(
150
+ list(set(upload["allwise_cntr"].astype(str)) - set(res_cntr))
151
+ )
152
+
153
+ def _check_mep_allwise_sources(self, dps: Sequence[DataPoint]) -> list[DataPointId]:
154
+ dps_with_unchecked_cntr = [
155
+ dp
156
+ for dp in dps
157
+ if str(dp["body"][allwise_p3as_mep.allwise_cntr_column])
158
+ not in self._allwise_source_cntr + self._not_allwise_source_cntr
159
+ ]
160
+ if len(dps_with_unchecked_cntr) > 0:
161
+ self._check_cntrs(dps_with_unchecked_cntr)
162
+
163
+ # compile list of invalid datapoint ids
164
+ invalid_dp_ids = []
165
+ for dp in dps:
166
+ cntr = str(dp["body"][allwise_p3as_mep.allwise_cntr_column])
167
+ if cntr in self._not_allwise_source_cntr:
168
+ invalid_dp_ids.append(dp["id"])
169
+
170
+ return invalid_dp_ids
83
171
 
84
172
  def _process(
85
173
  self, dps: list[DataPoint], stock_id: None | StockId = None
@@ -116,11 +204,7 @@ class TiMongoMuxer(AbsT0Muxer):
116
204
  # jd alone is not enough for matching pps because each time is associated with
117
205
  # two filters! Also, if there can be multiple sources within the same frame which
118
206
  # leads to duplicate MJD and FID. Check position in addition.
119
- key = (
120
- dp["body"]["mjd"],
121
- dp["body"]["ra"],
122
- dp["body"]["dec"],
123
- )
207
+ key = tuple(dp["body"][k] for k in self.unique_key)
124
208
 
125
209
  if target := unique_dps_ids.get(key):
126
210
  # insert id in order
@@ -130,7 +214,10 @@ class TiMongoMuxer(AbsT0Muxer):
130
214
  else:
131
215
  unique_dps_ids[key] = [dp["id"]]
132
216
 
133
- # make sure no duplicate datapoints exist
217
+ # Part 2: Check that there are no duplicates and handle redundant AllWISE MEP data
218
+ ##################################################################################
219
+
220
+ invalid_dp_ids = []
134
221
  for key, simultaneous_dps in unique_dps_ids.items():
135
222
  dps_db_wrong = [dp for dp in dps_db if dp["id"] in simultaneous_dps]
136
223
  dps_wrong = [dp for dp in dps if dp["id"] in simultaneous_dps]
@@ -138,20 +225,47 @@ class TiMongoMuxer(AbsT0Muxer):
138
225
  f"stockID {str(stock_id)}: Duplicate photopoints at {key}!\nDPS from DB:"
139
226
  f"\n{dps_db_wrong}\nNew DPS:\n{dps_wrong}"
140
227
  )
141
- assert len(simultaneous_dps) == 1, msg
142
228
 
143
- # Part 2: Update new data points that are already superseded
144
- ############################################################
229
+ all_wrong_dps = dps_db_wrong + dps_wrong
230
+ if len(simultaneous_dps) > 1:
231
+ # if these datapoints come from the AllWISE MEP database, downloaded by timewise
232
+ # there can be duplicates. Only the AllWISE CNTR can tell us which datapoints
233
+ # should be used: the CNTR that appears in the AllWISE source catalog.
234
+ if all(
235
+ [
236
+ ("TIMEWISE" in dp["tag"]) and ("allwise_p3as_mep" in dp["tag"])
237
+ for dp in all_wrong_dps
238
+ ]
239
+ ):
240
+ self.logger.info(
241
+ f"{len(all_wrong_dps)} duplicate MEP datapoints found. Checking ..."
242
+ )
243
+ i_invalid_dp_ids = self._check_mep_allwise_sources(
244
+ dps_db_wrong + dps_wrong
245
+ )
246
+ self.logger.info(
247
+ f"Found {len(i_invalid_dp_ids)} invalid MEP datapoints."
248
+ )
249
+ invalid_dp_ids.extend(i_invalid_dp_ids)
250
+
251
+ else:
252
+ raise RuntimeError(msg)
253
+
254
+ # Part 3: Compile final lists of datapoints to insert and combine
255
+ #################################################################
145
256
 
146
257
  # Difference between candids from the alert and candids present in DB
147
- ids_dps_to_insert = ids_dps_alert - ids_dps_db
258
+ ids_dps_to_insert = ids_dps_alert - ids_dps_db - set(invalid_dp_ids)
148
259
  dps_to_insert = [dp for dp in dps if dp["id"] in ids_dps_to_insert]
149
260
  dps_to_combine = [
150
- dp for dp in dps + dps_db if dp["id"] in ids_dps_alert | ids_dps_db
261
+ dp
262
+ for dp in dps + dps_db
263
+ if dp["id"] in ((ids_dps_alert | ids_dps_db) - set(invalid_dp_ids))
151
264
  ]
152
265
  self.logger.debug(
153
266
  f"Got {len(ids_dps_alert)} datapoints from alerts, "
154
267
  f"found {len(dps_db)} in DB, "
268
+ f"{len(invalid_dp_ids)} invalid datapoints, "
155
269
  f"inserting {len(dps_to_insert)} datapoints, "
156
270
  f"combining {len(dps_to_combine)} datapoints"
157
271
  )
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [project]
6
6
  name = "timewise"
7
- version = "1.0.0a7"
7
+ version = "1.0.0a9"
8
8
  description = "Download WISE infrared data for many objects and process them with AMPEL"
9
9
  authors = [
10
10
  { name = "Jannis Necker", email = "jannis.necker@gmail.com" },
@@ -54,6 +54,7 @@ dev = [
54
54
  "scipy-stubs (>=1.16.2.0,<2.0.0.0)",
55
55
  "types-pyyaml (>=6.0.12.20250915,<7.0.0.0)",
56
56
  "types-requests (>=2.32.4.20250913,<3.0.0.0)",
57
+ "mongomock (>=4.3.0,<5.0.0)",
57
58
  ]
58
59
  docs = [
59
60
  "myst-parser>=1,<3",
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0a9"
@@ -20,6 +20,8 @@ class Backend(abc.ABC, BaseModel):
20
20
  def save_meta(self, task: TaskID, meta: dict[str, Any]) -> None: ...
21
21
  @abc.abstractmethod
22
22
  def load_meta(self, task: TaskID) -> dict[str, Any] | None: ...
23
+ @abc.abstractmethod
24
+ def drop_meta(self, task: TaskID) -> None: ...
23
25
 
24
26
  # --- Markers ---
25
27
  @abc.abstractmethod
@@ -52,6 +52,9 @@ class FileSystemBackend(Backend):
52
52
  def meta_exists(self, task: TaskID) -> bool:
53
53
  return self._meta_path(task).exists()
54
54
 
55
+ def drop_meta(self, task: TaskID) -> None:
56
+ self._meta_path(task).unlink()
57
+
55
58
  # ----------------------------
56
59
  # Markers
57
60
  # ----------------------------
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from functools import cached_property
3
3
  from pathlib import Path
4
- from typing import Iterator
4
+ from typing import Iterator, Sequence, cast
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
@@ -24,6 +24,23 @@ class Chunk:
24
24
  stop = max(self.row_numbers) + 1
25
25
  return pd.read_csv(self.input_csv, skiprows=start, nrows=stop - start).index
26
26
 
27
+ @property
28
+ def data(self) -> pd.DataFrame:
29
+ start = (
30
+ min(cast(Sequence[int], self.row_numbers)) + 1
31
+ ) # plus one to always skip header line
32
+ nrows = (
33
+ max(cast(Sequence[int], self.row_numbers)) - start + 2
34
+ ) # plus one: skip header, plus one:
35
+
36
+ columns = list(pd.read_csv(self.input_csv, nrows=0).columns)
37
+ return pd.read_csv(
38
+ filepath_or_buffer=self.input_csv,
39
+ skiprows=start,
40
+ nrows=nrows,
41
+ names=columns,
42
+ )
43
+
27
44
 
28
45
  class Chunker:
29
46
  def __init__(self, input_csv: Path, chunk_size: int):
@@ -53,8 +53,16 @@ def main(
53
53
  @app.command(help="Download WISE photometry from IRSA")
54
54
  def download(
55
55
  config_path: config_path_type,
56
+ resubmit_failed: Annotated[
57
+ bool,
58
+ typer.Option(
59
+ help="Re-submit jobs when failed due to connection issues",
60
+ ),
61
+ ] = False,
56
62
  ):
57
- TimewiseConfig.from_yaml(config_path).download.build_downloader().run()
63
+ TimewiseConfig.from_yaml(config_path).download.build_downloader(
64
+ resubmit_failed=resubmit_failed
65
+ ).run()
58
66
 
59
67
 
60
68
  # the following commands will only be added if ampel is installed
@@ -17,6 +17,7 @@ class DownloadConfig(BaseModel):
17
17
  poll_interval: float = 10.0
18
18
  queries: List[QueryType] = Field(..., description="One or more queries per chunk")
19
19
  backend: BackendType = Field(..., discriminator="type")
20
+ resubmit_failed: bool = False
20
21
 
21
22
  service_url: str = "https://irsa.ipac.caltech.edu/TAP"
22
23
 
@@ -57,13 +58,16 @@ class DownloadConfig(BaseModel):
57
58
 
58
59
  return self
59
60
 
60
- def build_downloader(self) -> Downloader:
61
- return Downloader(
62
- service_url=self.service_url,
63
- input_csv=self.expanded_input_csv,
64
- chunk_size=self.chunk_size,
65
- backend=self.backend,
66
- queries=self.queries,
67
- max_concurrent_jobs=self.max_concurrent_jobs,
68
- poll_interval=self.poll_interval,
69
- )
61
+ def build_downloader(self, **overwrite) -> Downloader:
62
+ default = {
63
+ "service_url": self.service_url,
64
+ "input_csv": self.expanded_input_csv,
65
+ "chunk_size": self.chunk_size,
66
+ "backend": self.backend,
67
+ "queries": self.queries,
68
+ "max_concurrent_jobs": self.max_concurrent_jobs,
69
+ "poll_interval": self.poll_interval,
70
+ "resubmit_failed": self.resubmit_failed,
71
+ }
72
+ default.update(overwrite)
73
+ return Downloader(**default) # type: ignore
@@ -1,25 +1,22 @@
1
- import time
2
- import threading
3
1
  import logging
4
- from queue import Empty
5
- from typing import Dict, Iterator, cast, Sequence
2
+ import threading
3
+ import time
4
+ from datetime import datetime, timedelta
6
5
  from itertools import product
7
6
  from pathlib import Path
8
- from datetime import datetime, timedelta
7
+ from queue import Empty
8
+ from typing import Dict, Iterator
9
9
 
10
- import pandas as pd
11
10
  import numpy as np
12
11
  from astropy.table import Table
13
12
  from pyvo.utils.http import create_session
14
13
 
15
- from .stable_tap import StableTAPService
16
14
  from ..backend import BackendType
17
- from ..types import TAPJobMeta, TaskID, TYPE_MAP
15
+ from ..chunking import Chunk, Chunker
18
16
  from ..query import QueryType
19
- from ..query.base import Query
17
+ from ..types import TYPE_MAP, TAPJobMeta, TaskID
20
18
  from ..util.error_threading import ErrorQueue, ExceptionSafeThread
21
- from ..chunking import Chunker, Chunk
22
-
19
+ from .stable_tap import StableTAPService
23
20
 
24
21
  logger = logging.getLogger(__name__)
25
22
 
@@ -34,6 +31,7 @@ class Downloader:
34
31
  queries: list[QueryType],
35
32
  max_concurrent_jobs: int,
36
33
  poll_interval: float,
34
+ resubmit_failed: bool,
37
35
  ):
38
36
  self.backend = backend
39
37
  self.queries = queries
@@ -67,6 +65,7 @@ class Downloader:
67
65
  self.service: StableTAPService = StableTAPService(
68
66
  service_url, session=self.session
69
67
  )
68
+ self.resubmit_failed = resubmit_failed
70
69
 
71
70
  self.chunker = Chunker(input_csv=input_csv, chunk_size=chunk_size)
72
71
 
@@ -74,7 +73,7 @@ class Downloader:
74
73
  # helpers
75
74
  # ----------------------------
76
75
  @staticmethod
77
- def get_task_id(chunk: Chunk, query: Query) -> TaskID:
76
+ def get_task_id(chunk: Chunk, query: QueryType) -> TaskID:
78
77
  return TaskID(
79
78
  namespace="download", key=f"chunk{chunk.chunk_id:04d}_{query.hash}"
80
79
  )
@@ -106,25 +105,10 @@ class Downloader:
106
105
  # ----------------------------
107
106
  # TAP submission and download
108
107
  # ----------------------------
109
- def get_chunk_data(self, chunk: Chunk) -> pd.DataFrame:
110
- start = (
111
- min(cast(Sequence[int], chunk.row_numbers)) + 1
112
- ) # plus one to always skip header line
113
- nrows = (
114
- max(cast(Sequence[int], chunk.row_numbers)) - start + 2
115
- ) # plus one: skip header, plus one:
116
-
117
- columns = list(pd.read_csv(self.input_csv, nrows=0).columns)
118
- return pd.read_csv(
119
- filepath_or_buffer=self.input_csv,
120
- skiprows=start,
121
- nrows=nrows,
122
- names=columns,
123
- )
124
108
 
125
- def submit_tap_job(self, query: Query, chunk: Chunk) -> TAPJobMeta:
109
+ def submit_tap_job(self, query: QueryType, chunk: Chunk) -> TAPJobMeta:
126
110
  adql = query.adql
127
- chunk_df = self.get_chunk_data(chunk)
111
+ chunk_df = chunk.data
128
112
 
129
113
  assert all(chunk_df.index.isin(chunk.indices)), (
130
114
  "Some inputs loaded from wrong chunk!"
@@ -148,7 +132,6 @@ class Downloader:
148
132
  logger.debug(f"uploading {len(upload)} objects.")
149
133
  job = self.service.submit_job(adql, uploads={query.upload_name: upload})
150
134
  job.run()
151
- logger.debug(job.url)
152
135
 
153
136
  return TAPJobMeta(
154
137
  url=job.url,
@@ -178,7 +161,7 @@ class Downloader:
178
161
  def _submission_worker(self):
179
162
  while not self.stop_event.is_set():
180
163
  try:
181
- chunk, query = self.submit_queue.get(timeout=1.0) # type: Chunk, Query
164
+ chunk, query = self.submit_queue.get(timeout=1.0) # type: Chunk, QueryType
182
165
  except Empty:
183
166
  if self.all_chunks_queued:
184
167
  self.all_chunks_submitted = True
@@ -209,6 +192,26 @@ class Downloader:
209
192
  # ----------------------------
210
193
  # Polling thread
211
194
  # ----------------------------
195
+
196
+ def resubmit(self, resubmit_task: TaskID):
197
+ logger.info(f"resubmitting {resubmit_task}")
198
+ submit = None
199
+ for chunk, q in product(self.chunker, self.queries):
200
+ task = self.get_task_id(chunk, q)
201
+ if task == resubmit_task:
202
+ submit = chunk, q
203
+ break
204
+ if submit is None:
205
+ raise RuntimeError(f"resubmit task {resubmit_task} not found!")
206
+
207
+ # remove current info, so the job won't be re-submitted over and over again
208
+ self.backend.drop_meta(resubmit_task)
209
+ with self.job_lock:
210
+ self.jobs.pop(resubmit_task)
211
+
212
+ # put task back in resubmit queue
213
+ self.submit_queue.put(submit)
214
+
212
215
  def _polling_worker(self):
213
216
  logger.debug("starting polling worker")
214
217
  backend = self.backend
@@ -240,6 +243,9 @@ class Downloader:
240
243
  f"No job found under {meta['url']} for {task}! "
241
244
  f"Probably took too long before downloading results."
242
245
  )
246
+ if self.resubmit_failed:
247
+ self.resubmit(task)
248
+ continue
243
249
 
244
250
  meta["status"] = status
245
251
  with self.job_lock:
@@ -3,8 +3,7 @@ import backoff
3
3
  import pyvo as vo
4
4
  from xml.etree import ElementTree
5
5
 
6
- from timewise.util.backoff import backoff_hndlr
7
-
6
+ import requests
8
7
 
9
8
  logger = logging.getLogger(__name__)
10
9
 
@@ -20,6 +19,11 @@ class StableAsyncTAPJob(vo.dal.AsyncTAPJob):
20
19
  self.submit_response = None
21
20
 
22
21
  @classmethod
22
+ @backoff.on_exception(
23
+ backoff.expo,
24
+ requests.exceptions.HTTPError,
25
+ max_tries=5,
26
+ )
23
27
  def create(
24
28
  cls,
25
29
  baseurl,
@@ -84,11 +88,18 @@ class StableAsyncTAPJob(vo.dal.AsyncTAPJob):
84
88
  backoff.expo,
85
89
  (vo.dal.DALServiceError, AttributeError),
86
90
  max_tries=50,
87
- on_backoff=backoff_hndlr,
88
91
  )
89
92
  def phase(self):
90
93
  return super(StableAsyncTAPJob, self).phase
91
94
 
95
+ @backoff.on_exception(
96
+ backoff.expo,
97
+ vo.dal.DALServiceError,
98
+ max_tries=50,
99
+ )
100
+ def _update(self, *args, **kwargs):
101
+ return super(StableAsyncTAPJob, self)._update(*args, **kwargs)
102
+
92
103
 
93
104
  class StableTAPService(vo.dal.TAPService):
94
105
  """
@@ -99,7 +110,6 @@ class StableTAPService(vo.dal.TAPService):
99
110
  backoff.expo,
100
111
  (vo.dal.DALServiceError, AttributeError, AssertionError),
101
112
  max_tries=5,
102
- on_backoff=backoff_hndlr,
103
113
  )
104
114
  def submit_job(
105
115
  self, query, *, language="ADQL", maxrec=None, uploads=None, **keywords
@@ -119,3 +129,13 @@ class StableTAPService(vo.dal.TAPService):
119
129
 
120
130
  def get_job_from_url(self, url):
121
131
  return StableAsyncTAPJob(url, session=self._session)
132
+
133
+ @backoff.on_exception(
134
+ backoff.expo,
135
+ (vo.dal.DALServiceError, vo.dal.DALFormatError),
136
+ max_tries=5,
137
+ )
138
+ def run_sync(
139
+ self, query, *, language="ADQL", maxrec=None, uploads=None,
140
+ **keywords):
141
+ return super().run_sync(query, language=language, maxrec=maxrec, uploads=uploads, **keywords)
@@ -5,8 +5,6 @@ import logging
5
5
  import matplotlib.pyplot as plt
6
6
  import backoff
7
7
 
8
- from ..util.backoff import backoff_hndlr
9
-
10
8
 
11
9
  logger = logging.getLogger(__name__)
12
10
 
@@ -34,7 +32,7 @@ def login_to_sciserver():
34
32
 
35
33
 
36
34
  @backoff.on_exception(
37
- backoff.expo, requests.RequestException, max_tries=50, on_backoff=backoff_hndlr
35
+ backoff.expo, requests.RequestException, max_tries=50
38
36
  )
39
37
  def get_cutout(*args, **kwargs):
40
38
  login_to_sciserver()
@@ -0,0 +1,11 @@
1
+ from typing import Annotated, TypeAlias, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from .by_allwise_cntr_and_position import AllWISECntrQuery
6
+ from .positional import PositionalQuery
7
+
8
+ # Discriminated union of all query types
9
+ QueryType: TypeAlias = Annotated[
10
+ Union[PositionalQuery, AllWISECntrQuery], Field(discriminator="type")
11
+ ]
@@ -0,0 +1,49 @@
1
+ import logging
2
+ from typing import Dict, Literal
3
+
4
+ from .base import Query
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class AllWISECntrQuery(Query):
10
+ type: Literal["by_allwise_cntr_and_position"] = "by_allwise_cntr_and_position"
11
+ radius_arcsec: float
12
+
13
+ @property
14
+ def input_columns(self) -> Dict[str, str]:
15
+ return {
16
+ "allwise_cntr": "int",
17
+ "ra": "float",
18
+ "dec": "float",
19
+ self.original_id_key: "int",
20
+ }
21
+
22
+ def build(self) -> str:
23
+ logger.debug(f"constructing query by AllWISE cntr for {self.table.name}")
24
+
25
+ q = "SELECT \n\t"
26
+ for k in self.columns:
27
+ q += f"{self.table.name}.{k}, "
28
+ q += f"\n\tmine.{self.original_id_key} \n"
29
+ q += f"FROM\n\tTAP_UPLOAD.{self.upload_name} AS mine \n"
30
+ q += f"RIGHT JOIN\n\t{self.table.name} \n"
31
+ q += "WHERE \n"
32
+ q += (
33
+ f"\tCONTAINS(POINT('J2000',{self.table.name}.{self.table.ra_column},{self.table.name}.{self.table.dec_column}),"
34
+ f"CIRCLE('J2000',mine.ra,mine.dec,{self.radius_arcsec / 3600:.18f}))=1 "
35
+ )
36
+
37
+ constraints = self.constraints + [
38
+ f"{self.table.allwise_cntr_column} = {self.upload_name}.allwise_cntr"
39
+ ]
40
+
41
+ if len(constraints) > 0:
42
+ q += " AND (\n"
43
+ for c in constraints:
44
+ q += f"\t{self.table.name}.{c} AND \n"
45
+ q = q.strip(" AND \n")
46
+ q += "\t)"
47
+
48
+ logger.debug(f"\n{q}")
49
+ return q
@@ -36,5 +36,4 @@ class PositionalQuery(Query):
36
36
  q = q.strip(" AND \n")
37
37
  q += "\t)"
38
38
 
39
- logger.debug(f"\n{q}")
40
39
  return q
@@ -0,0 +1,11 @@
1
+ from typing import Annotated, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from .allwise_p3as_mep import allwise_p3as_mep
6
+ from .allwise_p3as_psd import allwise_p3as_psd
7
+ from .neowiser_p1bs_psd import neowiser_p1bs_psd
8
+
9
+ TableType = Annotated[
10
+ Union[allwise_p3as_mep, neowiser_p1bs_psd, allwise_p3as_psd], Field(discriminator="name")
11
+ ]
@@ -1,4 +1,5 @@
1
- from typing import Literal, ClassVar, Type, Dict
1
+ from typing import ClassVar, Dict, Literal, Type
2
+
2
3
  from .base import TableConfig
3
4
 
4
5
 
@@ -20,3 +21,4 @@ class allwise_p3as_mep(TableConfig):
20
21
  }
21
22
  ra_column: ClassVar[str] = "ra"
22
23
  dec_column: ClassVar[str] = "dec"
24
+ allwise_cntr_column: ClassVar[str] = "cntr_mf"
@@ -0,0 +1,24 @@
1
+ from typing import ClassVar, Dict, Literal, Type
2
+
3
+ from .base import TableConfig
4
+
5
+
6
+ class allwise_p3as_psd(TableConfig):
7
+ name: Literal["allwise_p3as_psd"] = "allwise_p3as_psd"
8
+ columns_dtypes: ClassVar[Dict[str, Type]] = {
9
+ "ra": float,
10
+ "dec": float,
11
+ "mjd": float,
12
+ "cntr": str,
13
+ "w1mpro": float,
14
+ "w1sigmpro": float,
15
+ "w2mpro": float,
16
+ "w2sigmpro": float,
17
+ "w1flux": float,
18
+ "w1sigflux": float,
19
+ "w2flux": float,
20
+ "w2sigflux": float,
21
+ }
22
+ ra_column: ClassVar[str] = "ra"
23
+ dec_column: ClassVar[str] = "dec"
24
+ allwise_cntr_column: ClassVar[str] = "cntr"
@@ -1,4 +1,5 @@
1
- from typing import Literal, ClassVar, Dict, Type
1
+ from typing import ClassVar, Dict, Literal, Type
2
+
2
3
  from .base import TableConfig
3
4
 
4
5
 
@@ -20,3 +21,4 @@ class neowiser_p1bs_psd(TableConfig):
20
21
  }
21
22
  ra_column: ClassVar[str] = "ra"
22
23
  dec_column: ClassVar[str] = "dec"
24
+ allwise_cntr_column: ClassVar[str] = "allwise_cntr"
@@ -1 +0,0 @@
1
- __version__ = "1.0.0a7"
@@ -1,6 +0,0 @@
1
- from pydantic import Field
2
- from typing import Union, Annotated, TypeAlias
3
- from .positional import PositionalQuery
4
-
5
- # Discriminated union of all query types
6
- QueryType: TypeAlias = Annotated[Union[PositionalQuery], Field(discriminator="type")]
@@ -1,10 +0,0 @@
1
- from pydantic import Field
2
- from typing import Union, Annotated
3
-
4
- from .allwise_p3as_mep import allwise_p3as_mep
5
- from .neowiser_p1bs_psd import neowiser_p1bs_psd
6
-
7
-
8
- TableType = Annotated[
9
- Union[allwise_p3as_mep, neowiser_p1bs_psd], Field(discriminator="name")
10
- ]
@@ -1,12 +0,0 @@
1
- import logging
2
-
3
-
4
- logger = logging.getLogger(__name__)
5
-
6
-
7
- def backoff_hndlr(details):
8
- logger.info(
9
- "Backing off {wait:0.1f} seconds after {tries} tries "
10
- "calling function {target} with args {args} and kwargs "
11
- "{kwargs}".format(**details)
12
- )
File without changes
File without changes