timewise 0.5.4__py3-none-any.whl → 1.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- timewise/__init__.py +1 -5
- timewise/backend/__init__.py +6 -0
- timewise/backend/base.py +36 -0
- timewise/backend/filesystem.py +80 -0
- timewise/chunking.py +50 -0
- timewise/cli.py +117 -11
- timewise/config.py +34 -0
- timewise/io/__init__.py +1 -0
- timewise/io/config.py +64 -0
- timewise/io/download.py +302 -0
- timewise/io/stable_tap.py +121 -0
- timewise/plot/__init__.py +3 -0
- timewise/plot/diagnostic.py +242 -0
- timewise/plot/lightcurve.py +112 -0
- timewise/plot/panstarrs.py +260 -0
- timewise/plot/sdss.py +109 -0
- timewise/process/__init__.py +2 -0
- timewise/process/config.py +30 -0
- timewise/process/interface.py +143 -0
- timewise/process/keys.py +10 -0
- timewise/process/stacking.py +310 -0
- timewise/process/template.yml +49 -0
- timewise/query/__init__.py +6 -0
- timewise/query/base.py +45 -0
- timewise/query/positional.py +40 -0
- timewise/tables/__init__.py +10 -0
- timewise/tables/allwise_p3as_mep.py +22 -0
- timewise/tables/base.py +9 -0
- timewise/tables/neowiser_p1bs_psd.py +22 -0
- timewise/types.py +30 -0
- timewise/util/backoff.py +12 -0
- timewise/util/csv_utils.py +12 -0
- timewise/util/error_threading.py +70 -0
- timewise/util/visits.py +33 -0
- timewise-1.0.0a1.dist-info/METADATA +205 -0
- timewise-1.0.0a1.dist-info/RECORD +39 -0
- timewise-1.0.0a1.dist-info/entry_points.txt +3 -0
- timewise/big_parent_sample.py +0 -106
- timewise/config_loader.py +0 -157
- timewise/general.py +0 -52
- timewise/parent_sample_base.py +0 -89
- timewise/point_source_utils.py +0 -68
- timewise/utils.py +0 -558
- timewise/wise_bigdata_desy_cluster.py +0 -1407
- timewise/wise_data_base.py +0 -2027
- timewise/wise_data_by_visit.py +0 -672
- timewise/wise_flux_conversion_correction.dat +0 -19
- timewise-0.5.4.dist-info/METADATA +0 -56
- timewise-0.5.4.dist-info/RECORD +0 -17
- timewise-0.5.4.dist-info/entry_points.txt +0 -3
- {timewise-0.5.4.dist-info → timewise-1.0.0a1.dist-info}/WHEEL +0 -0
- {timewise-0.5.4.dist-info → timewise-1.0.0a1.dist-info}/licenses/LICENSE +0 -0
timewise/__init__.py
CHANGED
timewise/backend/base.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Any
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
from astropy.table import Table
|
|
5
|
+
from ..types import TaskID
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Backend(abc.ABC, BaseModel):
|
|
9
|
+
type: str
|
|
10
|
+
base_path: Any
|
|
11
|
+
"""
|
|
12
|
+
Abstract persistence backend for jobs, results, and markers.
|
|
13
|
+
Works with generic TaskIDs so it can be reused across Downloader/Processor.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
# --- metadata ---
|
|
17
|
+
@abc.abstractmethod
|
|
18
|
+
def meta_exists(self, task: TaskID) -> bool: ...
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
def save_meta(self, task: TaskID, meta: dict[str, Any]) -> None: ...
|
|
21
|
+
@abc.abstractmethod
|
|
22
|
+
def load_meta(self, task: TaskID) -> dict[str, Any] | None: ...
|
|
23
|
+
|
|
24
|
+
# --- Markers ---
|
|
25
|
+
@abc.abstractmethod
|
|
26
|
+
def mark_done(self, task: TaskID) -> None: ...
|
|
27
|
+
@abc.abstractmethod
|
|
28
|
+
def is_done(self, task: TaskID) -> bool: ...
|
|
29
|
+
|
|
30
|
+
# --- Data ---
|
|
31
|
+
@abc.abstractmethod
|
|
32
|
+
def save_data(self, task: TaskID, content: Table) -> None: ...
|
|
33
|
+
@abc.abstractmethod
|
|
34
|
+
def load_data(self, task: TaskID) -> Table: ...
|
|
35
|
+
@abc.abstractmethod
|
|
36
|
+
def data_exists(self, task: TaskID) -> bool: ...
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Literal
|
|
5
|
+
from astropy.table import Table
|
|
6
|
+
|
|
7
|
+
from .base import Backend
|
|
8
|
+
from ..types import TaskID
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FileSystemBackend(Backend):
|
|
15
|
+
type: Literal["filesystem"] = "filesystem"
|
|
16
|
+
base_path: Path
|
|
17
|
+
|
|
18
|
+
# ----------------------------
|
|
19
|
+
# Helpers for paths
|
|
20
|
+
# ----------------------------
|
|
21
|
+
def _meta_path(self, task: TaskID) -> Path:
|
|
22
|
+
return self.base_path / f"{task}.meta.json"
|
|
23
|
+
|
|
24
|
+
def _marker_path(self, task: TaskID) -> Path:
|
|
25
|
+
return self.base_path / f"{task}.ok"
|
|
26
|
+
|
|
27
|
+
def _data_path(self, task: TaskID) -> Path:
|
|
28
|
+
return self.base_path / f"{task}.fits"
|
|
29
|
+
|
|
30
|
+
# ----------------------------
|
|
31
|
+
# Metadata
|
|
32
|
+
# ----------------------------
|
|
33
|
+
def save_meta(self, task: TaskID, meta: dict[str, Any]) -> None:
|
|
34
|
+
path = self._meta_path(task)
|
|
35
|
+
tmp = path.with_suffix(".tmp")
|
|
36
|
+
tmp.parent.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
logger.debug(f"writing {path}")
|
|
38
|
+
tmp.write_text(json.dumps(meta, indent=2))
|
|
39
|
+
tmp.replace(path)
|
|
40
|
+
|
|
41
|
+
def load_meta(self, task: TaskID) -> dict[str, Any] | None:
|
|
42
|
+
path = self._meta_path(task)
|
|
43
|
+
if not path.exists():
|
|
44
|
+
return None
|
|
45
|
+
return json.loads(path.read_text())
|
|
46
|
+
|
|
47
|
+
def meta_exists(self, task: TaskID) -> bool:
|
|
48
|
+
return self._meta_path(task).exists()
|
|
49
|
+
|
|
50
|
+
# ----------------------------
|
|
51
|
+
# Markers
|
|
52
|
+
# ----------------------------
|
|
53
|
+
def mark_done(self, task: TaskID) -> None:
|
|
54
|
+
mp = self._marker_path(task)
|
|
55
|
+
mp.parent.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
logger.debug(f"writing {mp}")
|
|
57
|
+
mp.write_text("done")
|
|
58
|
+
|
|
59
|
+
def is_done(self, task: TaskID) -> bool:
|
|
60
|
+
return self._marker_path(task).exists()
|
|
61
|
+
|
|
62
|
+
# ----------------------------
|
|
63
|
+
# Data
|
|
64
|
+
# ----------------------------
|
|
65
|
+
def save_data(self, task: TaskID, content: Table) -> None:
|
|
66
|
+
path = self._data_path(task)
|
|
67
|
+
tmp = path.with_suffix(".tmp")
|
|
68
|
+
tmp.parent.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
logger.debug(f"writing {path}")
|
|
70
|
+
content.write(tmp, format="fits")
|
|
71
|
+
tmp.replace(path)
|
|
72
|
+
|
|
73
|
+
def load_data(self, task: TaskID) -> Table:
|
|
74
|
+
path = self._data_path(task)
|
|
75
|
+
if not path.exists():
|
|
76
|
+
raise FileNotFoundError(path)
|
|
77
|
+
return Table.read(path, format="fits")
|
|
78
|
+
|
|
79
|
+
def data_exists(self, task: TaskID) -> bool:
|
|
80
|
+
return self._data_path(task).exists()
|
timewise/chunking.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from typing import Iterator
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import numpy as np
|
|
4
|
+
from numpy import typing as npt
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Chunk:
|
|
12
|
+
def __init__(
|
|
13
|
+
self, chunk_id: int, indices: npt.ArrayLike, row_indices: npt.ArrayLike
|
|
14
|
+
):
|
|
15
|
+
self.chunk_id = chunk_id
|
|
16
|
+
self.indices = indices
|
|
17
|
+
self.row_numbers = row_indices
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Chunker:
|
|
21
|
+
def __init__(self, input_csv: Path, chunk_size: int):
|
|
22
|
+
self.input_csv = input_csv
|
|
23
|
+
self.chunk_size = chunk_size
|
|
24
|
+
self._n_rows = self._count_rows()
|
|
25
|
+
logger.debug(f"found {self._n_rows} rows in {self.input_csv}")
|
|
26
|
+
|
|
27
|
+
def _count_rows(self) -> int:
|
|
28
|
+
chunk = 1024 * 1024 # Process 1 MB at a time.
|
|
29
|
+
f = np.memmap(self.input_csv)
|
|
30
|
+
num_newlines = sum(
|
|
31
|
+
np.sum(f[i : i + chunk] == ord("\n")) for i in range(0, len(f), chunk)
|
|
32
|
+
)
|
|
33
|
+
del f
|
|
34
|
+
return num_newlines - 1 # one header row
|
|
35
|
+
|
|
36
|
+
def __len__(self) -> int:
|
|
37
|
+
return int(np.ceil(self._n_rows / self.chunk_size))
|
|
38
|
+
|
|
39
|
+
def __iter__(self) -> Iterator[Chunk]:
|
|
40
|
+
for chunk_id in range(len(self)):
|
|
41
|
+
yield self.get_chunk(chunk_id)
|
|
42
|
+
|
|
43
|
+
def get_chunk(self, chunk_id: int) -> Chunk:
|
|
44
|
+
if chunk_id >= len(self):
|
|
45
|
+
raise IndexError(f"Invalid chunk_id {chunk_id}")
|
|
46
|
+
start = chunk_id * self.chunk_size
|
|
47
|
+
stop = min(start + self.chunk_size, self._n_rows)
|
|
48
|
+
indices = pd.read_csv(self.input_csv, skiprows=start, nrows=stop - start).index
|
|
49
|
+
logger.debug(f"chunk {chunk_id}: from {start} to {stop}")
|
|
50
|
+
return Chunk(chunk_id, indices, np.arange(start=start, stop=stop))
|
timewise/cli.py
CHANGED
|
@@ -1,18 +1,124 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from typing import Annotated, Literal, List
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
|
|
4
|
-
|
|
5
|
-
from timewise.config_loader import TimewiseConfigLoader
|
|
5
|
+
import typer
|
|
6
6
|
|
|
7
|
+
from .config import TimewiseConfig
|
|
8
|
+
from .plot.diagnostic import make_plot
|
|
7
9
|
|
|
8
|
-
|
|
10
|
+
from rich.logging import RichHandler
|
|
9
11
|
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
parser = ArgumentParser()
|
|
13
|
-
parser.add_argument("config", type=str, help="Path to timewise config file")
|
|
14
|
-
parser.add_argument("-l", "--logging-level", default="INFO", type=str)
|
|
15
|
-
cfg = vars(parser.parse_args())
|
|
13
|
+
app = typer.Typer(help="Timewsie CLI")
|
|
16
14
|
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
config_path_type = Annotated[
|
|
16
|
+
Path, typer.Argument(help="Pipeline config file (YAML/JSON)")
|
|
17
|
+
]
|
|
18
|
+
ampel_config_path_type = Annotated[Path, typer.Argument(help="AMPEL config YAML")]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# --- Global callback (runs before every command) ---
|
|
22
|
+
@app.callback()
|
|
23
|
+
def main(
|
|
24
|
+
ctx: typer.Context,
|
|
25
|
+
log_level: str = typer.Option(
|
|
26
|
+
"INFO",
|
|
27
|
+
"--log-level",
|
|
28
|
+
"-l",
|
|
29
|
+
help="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
|
|
30
|
+
case_sensitive=False,
|
|
31
|
+
),
|
|
32
|
+
):
|
|
33
|
+
"""Global options for all Timewise commands."""
|
|
34
|
+
# Normalize log level
|
|
35
|
+
level = getattr(logging, log_level.upper(), None)
|
|
36
|
+
if not isinstance(level, int):
|
|
37
|
+
raise typer.BadParameter(f"Invalid log level: {log_level}")
|
|
38
|
+
|
|
39
|
+
# Rich logging
|
|
40
|
+
logging.basicConfig(
|
|
41
|
+
handlers=[RichHandler(rich_tracebacks=True, markup=True)],
|
|
42
|
+
)
|
|
43
|
+
logging.getLogger("timewise").setLevel(level)
|
|
44
|
+
|
|
45
|
+
# Store log level in context for subcommands
|
|
46
|
+
ctx.obj = {"log_level": level}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@app.command(help="Download WISE photometry from IRSA")
|
|
50
|
+
def download(
|
|
51
|
+
config_path: config_path_type,
|
|
52
|
+
):
|
|
53
|
+
TimewiseConfig.from_yaml(config_path).download.build_downloader().run()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@app.command(help="Prepares the AMPEL job file so AMPEL can be run manually")
|
|
57
|
+
def prepare_ampel(
|
|
58
|
+
config_path: config_path_type,
|
|
59
|
+
):
|
|
60
|
+
cfg = TimewiseConfig.from_yaml(config_path)
|
|
61
|
+
ampel_interface = cfg.build_ampel_interface()
|
|
62
|
+
p = ampel_interface.prepare(config_path)
|
|
63
|
+
typer.echo(f"AMPEL job file: {p}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@app.command(help="Processes the lightcurves using AMPEL")
|
|
67
|
+
def process(
|
|
68
|
+
config_path: config_path_type,
|
|
69
|
+
ampel_config_path: ampel_config_path_type,
|
|
70
|
+
):
|
|
71
|
+
cfg = TimewiseConfig.from_yaml(config_path)
|
|
72
|
+
ampel_interface = cfg.build_ampel_interface()
|
|
73
|
+
ampel_interface.run(config_path, ampel_config_path)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@app.command(help="Write stacked lightcurves to disk")
|
|
77
|
+
def export(
|
|
78
|
+
config_path: config_path_type,
|
|
79
|
+
output_directory: Annotated[Path, typer.Argument(help="output directory")],
|
|
80
|
+
indices: Annotated[
|
|
81
|
+
list[int] | None,
|
|
82
|
+
typer.Option(
|
|
83
|
+
"-i", "--indices", help="Indices to export, defaults to all indices"
|
|
84
|
+
),
|
|
85
|
+
] = None,
|
|
86
|
+
):
|
|
87
|
+
TimewiseConfig.from_yaml(config_path).build_ampel_interface().export_many(
|
|
88
|
+
output_directory, indices
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@app.command(help="Run download, process and export")
|
|
93
|
+
def run_chain(
|
|
94
|
+
config_path: config_path_type,
|
|
95
|
+
ampel_config_path: ampel_config_path_type,
|
|
96
|
+
output_directory: Annotated[Path, typer.Argument(help="output directory")],
|
|
97
|
+
indices: Annotated[
|
|
98
|
+
list[int] | None,
|
|
99
|
+
typer.Option(
|
|
100
|
+
"-i", "--indices", help="Indices to export, defaults to all indices"
|
|
101
|
+
),
|
|
102
|
+
] = None,
|
|
103
|
+
):
|
|
104
|
+
download(config_path)
|
|
105
|
+
process(config_path, ampel_config_path)
|
|
106
|
+
export(config_path, output_directory, indices)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@app.command(help="Make diagnostic plots")
|
|
110
|
+
def plot(
|
|
111
|
+
config_path: config_path_type,
|
|
112
|
+
indices: Annotated[
|
|
113
|
+
List[int],
|
|
114
|
+
typer.Argument(help="Identifiers of the objects for which to create plots"),
|
|
115
|
+
],
|
|
116
|
+
output_directory: Annotated[Path, typer.Argument(help="Output directory")],
|
|
117
|
+
cutout: Annotated[
|
|
118
|
+
Literal["sdss", "panstarrs"],
|
|
119
|
+
typer.Option("-c", "--cutout", help="Which survey to use for cutouts"),
|
|
120
|
+
] = "panstarrs",
|
|
121
|
+
):
|
|
122
|
+
make_plot(
|
|
123
|
+
config_path, indices=indices, cutout=cutout, output_directory=output_directory
|
|
124
|
+
)
|
timewise/config.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import yaml
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from pydantic import BaseModel, model_validator
|
|
6
|
+
|
|
7
|
+
from .io import DownloadConfig
|
|
8
|
+
from .process import AmpelConfig, AmpelInterface
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TimewiseConfig(BaseModel):
|
|
12
|
+
download: DownloadConfig
|
|
13
|
+
ampel: AmpelConfig
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def from_yaml(cls, path: str | Path):
|
|
17
|
+
path = Path(path)
|
|
18
|
+
assert path.exists(), f"{path} not found!"
|
|
19
|
+
with path.open("r") as f:
|
|
20
|
+
config_dict = yaml.safe_load(f)
|
|
21
|
+
return cls.model_validate(config_dict)
|
|
22
|
+
|
|
23
|
+
@model_validator(mode="after")
|
|
24
|
+
def validate_query_original_id_key(self) -> "TimewiseConfig":
|
|
25
|
+
unique_keys = np.unique([q.original_id_key for q in self.download.queries])
|
|
26
|
+
assert len(unique_keys) == 1, (
|
|
27
|
+
"Can not use different 'original_id_key' in queries!"
|
|
28
|
+
)
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def build_ampel_interface(self) -> AmpelInterface:
|
|
32
|
+
return self.ampel.build_interface(
|
|
33
|
+
self.download.queries[0].original_id_key, self.download.input_csv
|
|
34
|
+
)
|
timewise/io/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .config import DownloadConfig
|
timewise/io/config.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import List
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from pydantic import BaseModel, Field, model_validator
|
|
5
|
+
|
|
6
|
+
from .download import Downloader
|
|
7
|
+
from ..query import QueryType
|
|
8
|
+
from ..backend import BackendType
|
|
9
|
+
from ..types import TYPE_MAP
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DownloadConfig(BaseModel):
|
|
13
|
+
input_csv: Path
|
|
14
|
+
chunk_size: int = 500_000
|
|
15
|
+
max_concurrent_jobs: int = 4
|
|
16
|
+
poll_interval: float = 10.0
|
|
17
|
+
queries: List[QueryType] = Field(..., description="One or more queries per chunk")
|
|
18
|
+
backend: BackendType = Field(..., discriminator="type")
|
|
19
|
+
|
|
20
|
+
service_url: str = "https://irsa.ipac.caltech.edu/TAP"
|
|
21
|
+
|
|
22
|
+
@model_validator(mode="after")
|
|
23
|
+
def validate_input_csv_columns(self) -> "DownloadConfig":
|
|
24
|
+
"""Ensure that the input CSV contains all columns required by queries."""
|
|
25
|
+
# only validate if the CSV actually exists
|
|
26
|
+
if not self.input_csv.exists():
|
|
27
|
+
raise ValueError(f"CSV file does not exist: {self.input_csv}")
|
|
28
|
+
|
|
29
|
+
# read just the header and first 10 lines
|
|
30
|
+
input_table = pd.read_csv(self.input_csv, nrows=10)
|
|
31
|
+
|
|
32
|
+
missing_columns = set()
|
|
33
|
+
wrong_dtype = set()
|
|
34
|
+
for qc in self.queries:
|
|
35
|
+
for col, dtype in qc.input_columns.items():
|
|
36
|
+
if col not in input_table.columns:
|
|
37
|
+
missing_columns.add(col)
|
|
38
|
+
else:
|
|
39
|
+
try:
|
|
40
|
+
input_table[col].astype(TYPE_MAP[dtype])
|
|
41
|
+
except Exception:
|
|
42
|
+
wrong_dtype.add(col)
|
|
43
|
+
|
|
44
|
+
msg = f"CSV file {self.input_csv}: "
|
|
45
|
+
if missing_columns:
|
|
46
|
+
raise KeyError(msg + f"Missing required columns: {sorted(missing_columns)}")
|
|
47
|
+
if wrong_dtype:
|
|
48
|
+
raise TypeError(
|
|
49
|
+
msg
|
|
50
|
+
+ f"Columns not convertable to right data type: {sorted(wrong_dtype)}"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def build_downloader(self) -> Downloader:
|
|
56
|
+
return Downloader(
|
|
57
|
+
service_url=self.service_url,
|
|
58
|
+
input_csv=self.input_csv,
|
|
59
|
+
chunk_size=self.chunk_size,
|
|
60
|
+
backend=self.backend,
|
|
61
|
+
queries=self.queries,
|
|
62
|
+
max_concurrent_jobs=self.max_concurrent_jobs,
|
|
63
|
+
poll_interval=self.poll_interval,
|
|
64
|
+
)
|