timewise 0.5.4__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. timewise/__init__.py +1 -5
  2. timewise/backend/__init__.py +6 -0
  3. timewise/backend/base.py +36 -0
  4. timewise/backend/filesystem.py +80 -0
  5. timewise/chunking.py +50 -0
  6. timewise/cli.py +117 -11
  7. timewise/config.py +34 -0
  8. timewise/io/__init__.py +1 -0
  9. timewise/io/config.py +64 -0
  10. timewise/io/download.py +302 -0
  11. timewise/io/stable_tap.py +121 -0
  12. timewise/plot/__init__.py +3 -0
  13. timewise/plot/diagnostic.py +242 -0
  14. timewise/plot/lightcurve.py +112 -0
  15. timewise/plot/panstarrs.py +260 -0
  16. timewise/plot/sdss.py +109 -0
  17. timewise/process/__init__.py +2 -0
  18. timewise/process/config.py +30 -0
  19. timewise/process/interface.py +143 -0
  20. timewise/process/keys.py +10 -0
  21. timewise/process/stacking.py +310 -0
  22. timewise/process/template.yml +49 -0
  23. timewise/query/__init__.py +6 -0
  24. timewise/query/base.py +45 -0
  25. timewise/query/positional.py +40 -0
  26. timewise/tables/__init__.py +10 -0
  27. timewise/tables/allwise_p3as_mep.py +22 -0
  28. timewise/tables/base.py +9 -0
  29. timewise/tables/neowiser_p1bs_psd.py +22 -0
  30. timewise/types.py +30 -0
  31. timewise/util/backoff.py +12 -0
  32. timewise/util/csv_utils.py +12 -0
  33. timewise/util/error_threading.py +70 -0
  34. timewise/util/visits.py +33 -0
  35. timewise-1.0.0a1.dist-info/METADATA +205 -0
  36. timewise-1.0.0a1.dist-info/RECORD +39 -0
  37. timewise-1.0.0a1.dist-info/entry_points.txt +3 -0
  38. timewise/big_parent_sample.py +0 -106
  39. timewise/config_loader.py +0 -157
  40. timewise/general.py +0 -52
  41. timewise/parent_sample_base.py +0 -89
  42. timewise/point_source_utils.py +0 -68
  43. timewise/utils.py +0 -558
  44. timewise/wise_bigdata_desy_cluster.py +0 -1407
  45. timewise/wise_data_base.py +0 -2027
  46. timewise/wise_data_by_visit.py +0 -672
  47. timewise/wise_flux_conversion_correction.dat +0 -19
  48. timewise-0.5.4.dist-info/METADATA +0 -56
  49. timewise-0.5.4.dist-info/RECORD +0 -17
  50. timewise-0.5.4.dist-info/entry_points.txt +0 -3
  51. {timewise-0.5.4.dist-info → timewise-1.0.0a1.dist-info}/WHEEL +0 -0
  52. {timewise-0.5.4.dist-info → timewise-1.0.0a1.dist-info}/licenses/LICENSE +0 -0
timewise/__init__.py CHANGED
@@ -1,5 +1 @@
1
- from timewise.wise_data_by_visit import WiseDataByVisit
2
- from timewise.wise_bigdata_desy_cluster import WISEDataDESYCluster
3
- from timewise.parent_sample_base import ParentSampleBase
4
-
5
- __version__ = "0.5.4"
1
+ __version__ = "1.0.0a1"
@@ -0,0 +1,6 @@
1
+ from typing import Union
2
+
3
+ from .base import Backend
4
+ from .filesystem import FileSystemBackend
5
+
6
+ BackendType = Union[FileSystemBackend]
@@ -0,0 +1,36 @@
1
+ import abc
2
+ from typing import Any
3
+ from pydantic import BaseModel
4
+ from astropy.table import Table
5
+ from ..types import TaskID
6
+
7
+
8
+ class Backend(abc.ABC, BaseModel):
9
+ type: str
10
+ base_path: Any
11
+ """
12
+ Abstract persistence backend for jobs, results, and markers.
13
+ Works with generic TaskIDs so it can be reused across Downloader/Processor.
14
+ """
15
+
16
+ # --- metadata ---
17
+ @abc.abstractmethod
18
+ def meta_exists(self, task: TaskID) -> bool: ...
19
+ @abc.abstractmethod
20
+ def save_meta(self, task: TaskID, meta: dict[str, Any]) -> None: ...
21
+ @abc.abstractmethod
22
+ def load_meta(self, task: TaskID) -> dict[str, Any] | None: ...
23
+
24
+ # --- Markers ---
25
+ @abc.abstractmethod
26
+ def mark_done(self, task: TaskID) -> None: ...
27
+ @abc.abstractmethod
28
+ def is_done(self, task: TaskID) -> bool: ...
29
+
30
+ # --- Data ---
31
+ @abc.abstractmethod
32
+ def save_data(self, task: TaskID, content: Table) -> None: ...
33
+ @abc.abstractmethod
34
+ def load_data(self, task: TaskID) -> Table: ...
35
+ @abc.abstractmethod
36
+ def data_exists(self, task: TaskID) -> bool: ...
@@ -0,0 +1,80 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Any, Literal
5
+ from astropy.table import Table
6
+
7
+ from .base import Backend
8
+ from ..types import TaskID
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class FileSystemBackend(Backend):
15
+ type: Literal["filesystem"] = "filesystem"
16
+ base_path: Path
17
+
18
+ # ----------------------------
19
+ # Helpers for paths
20
+ # ----------------------------
21
+ def _meta_path(self, task: TaskID) -> Path:
22
+ return self.base_path / f"{task}.meta.json"
23
+
24
+ def _marker_path(self, task: TaskID) -> Path:
25
+ return self.base_path / f"{task}.ok"
26
+
27
+ def _data_path(self, task: TaskID) -> Path:
28
+ return self.base_path / f"{task}.fits"
29
+
30
+ # ----------------------------
31
+ # Metadata
32
+ # ----------------------------
33
+ def save_meta(self, task: TaskID, meta: dict[str, Any]) -> None:
34
+ path = self._meta_path(task)
35
+ tmp = path.with_suffix(".tmp")
36
+ tmp.parent.mkdir(parents=True, exist_ok=True)
37
+ logger.debug(f"writing {path}")
38
+ tmp.write_text(json.dumps(meta, indent=2))
39
+ tmp.replace(path)
40
+
41
+ def load_meta(self, task: TaskID) -> dict[str, Any] | None:
42
+ path = self._meta_path(task)
43
+ if not path.exists():
44
+ return None
45
+ return json.loads(path.read_text())
46
+
47
+ def meta_exists(self, task: TaskID) -> bool:
48
+ return self._meta_path(task).exists()
49
+
50
+ # ----------------------------
51
+ # Markers
52
+ # ----------------------------
53
+ def mark_done(self, task: TaskID) -> None:
54
+ mp = self._marker_path(task)
55
+ mp.parent.mkdir(parents=True, exist_ok=True)
56
+ logger.debug(f"writing {mp}")
57
+ mp.write_text("done")
58
+
59
+ def is_done(self, task: TaskID) -> bool:
60
+ return self._marker_path(task).exists()
61
+
62
+ # ----------------------------
63
+ # Data
64
+ # ----------------------------
65
+ def save_data(self, task: TaskID, content: Table) -> None:
66
+ path = self._data_path(task)
67
+ tmp = path.with_suffix(".tmp")
68
+ tmp.parent.mkdir(parents=True, exist_ok=True)
69
+ logger.debug(f"writing {path}")
70
+ content.write(tmp, format="fits")
71
+ tmp.replace(path)
72
+
73
+ def load_data(self, task: TaskID) -> Table:
74
+ path = self._data_path(task)
75
+ if not path.exists():
76
+ raise FileNotFoundError(path)
77
+ return Table.read(path, format="fits")
78
+
79
+ def data_exists(self, task: TaskID) -> bool:
80
+ return self._data_path(task).exists()
timewise/chunking.py ADDED
@@ -0,0 +1,50 @@
1
+ from typing import Iterator
2
+ from pathlib import Path
3
+ import numpy as np
4
+ from numpy import typing as npt
5
+ import pandas as pd
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class Chunk:
12
+ def __init__(
13
+ self, chunk_id: int, indices: npt.ArrayLike, row_indices: npt.ArrayLike
14
+ ):
15
+ self.chunk_id = chunk_id
16
+ self.indices = indices
17
+ self.row_numbers = row_indices
18
+
19
+
20
+ class Chunker:
21
+ def __init__(self, input_csv: Path, chunk_size: int):
22
+ self.input_csv = input_csv
23
+ self.chunk_size = chunk_size
24
+ self._n_rows = self._count_rows()
25
+ logger.debug(f"found {self._n_rows} rows in {self.input_csv}")
26
+
27
+ def _count_rows(self) -> int:
28
+ chunk = 1024 * 1024 # Process 1 MB at a time.
29
+ f = np.memmap(self.input_csv)
30
+ num_newlines = sum(
31
+ np.sum(f[i : i + chunk] == ord("\n")) for i in range(0, len(f), chunk)
32
+ )
33
+ del f
34
+ return num_newlines - 1 # one header row
35
+
36
+ def __len__(self) -> int:
37
+ return int(np.ceil(self._n_rows / self.chunk_size))
38
+
39
+ def __iter__(self) -> Iterator[Chunk]:
40
+ for chunk_id in range(len(self)):
41
+ yield self.get_chunk(chunk_id)
42
+
43
+ def get_chunk(self, chunk_id: int) -> Chunk:
44
+ if chunk_id >= len(self):
45
+ raise IndexError(f"Invalid chunk_id {chunk_id}")
46
+ start = chunk_id * self.chunk_size
47
+ stop = min(start + self.chunk_size, self._n_rows)
48
+ indices = pd.read_csv(self.input_csv, skiprows=start, nrows=stop - start).index
49
+ logger.debug(f"chunk {chunk_id}: from {start} to {stop}")
50
+ return Chunk(chunk_id, indices, np.arange(start=start, stop=stop))
timewise/cli.py CHANGED
@@ -1,18 +1,124 @@
1
1
  import logging
2
- from argparse import ArgumentParser
2
+ from typing import Annotated, Literal, List
3
+ from pathlib import Path
3
4
 
4
- from timewise.general import main_logger
5
- from timewise.config_loader import TimewiseConfigLoader
5
+ import typer
6
6
 
7
+ from .config import TimewiseConfig
8
+ from .plot.diagnostic import make_plot
7
9
 
8
- logger = logging.getLogger(__name__)
10
+ from rich.logging import RichHandler
9
11
 
10
12
 
11
- def timewise_cli():
12
- parser = ArgumentParser()
13
- parser.add_argument("config", type=str, help="Path to timewise config file")
14
- parser.add_argument("-l", "--logging-level", default="INFO", type=str)
15
- cfg = vars(parser.parse_args())
13
+ app = typer.Typer(help="Timewsie CLI")
16
14
 
17
- main_logger.setLevel(cfg.pop("logging_level"))
18
- TimewiseConfigLoader.run_yaml(cfg["config"])
15
+ config_path_type = Annotated[
16
+ Path, typer.Argument(help="Pipeline config file (YAML/JSON)")
17
+ ]
18
+ ampel_config_path_type = Annotated[Path, typer.Argument(help="AMPEL config YAML")]
19
+
20
+
21
+ # --- Global callback (runs before every command) ---
22
+ @app.callback()
23
+ def main(
24
+ ctx: typer.Context,
25
+ log_level: str = typer.Option(
26
+ "INFO",
27
+ "--log-level",
28
+ "-l",
29
+ help="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
30
+ case_sensitive=False,
31
+ ),
32
+ ):
33
+ """Global options for all Timewise commands."""
34
+ # Normalize log level
35
+ level = getattr(logging, log_level.upper(), None)
36
+ if not isinstance(level, int):
37
+ raise typer.BadParameter(f"Invalid log level: {log_level}")
38
+
39
+ # Rich logging
40
+ logging.basicConfig(
41
+ handlers=[RichHandler(rich_tracebacks=True, markup=True)],
42
+ )
43
+ logging.getLogger("timewise").setLevel(level)
44
+
45
+ # Store log level in context for subcommands
46
+ ctx.obj = {"log_level": level}
47
+
48
+
49
+ @app.command(help="Download WISE photometry from IRSA")
50
+ def download(
51
+ config_path: config_path_type,
52
+ ):
53
+ TimewiseConfig.from_yaml(config_path).download.build_downloader().run()
54
+
55
+
56
+ @app.command(help="Prepares the AMPEL job file so AMPEL can be run manually")
57
+ def prepare_ampel(
58
+ config_path: config_path_type,
59
+ ):
60
+ cfg = TimewiseConfig.from_yaml(config_path)
61
+ ampel_interface = cfg.build_ampel_interface()
62
+ p = ampel_interface.prepare(config_path)
63
+ typer.echo(f"AMPEL job file: {p}")
64
+
65
+
66
+ @app.command(help="Processes the lightcurves using AMPEL")
67
+ def process(
68
+ config_path: config_path_type,
69
+ ampel_config_path: ampel_config_path_type,
70
+ ):
71
+ cfg = TimewiseConfig.from_yaml(config_path)
72
+ ampel_interface = cfg.build_ampel_interface()
73
+ ampel_interface.run(config_path, ampel_config_path)
74
+
75
+
76
+ @app.command(help="Write stacked lightcurves to disk")
77
+ def export(
78
+ config_path: config_path_type,
79
+ output_directory: Annotated[Path, typer.Argument(help="output directory")],
80
+ indices: Annotated[
81
+ list[int] | None,
82
+ typer.Option(
83
+ "-i", "--indices", help="Indices to export, defaults to all indices"
84
+ ),
85
+ ] = None,
86
+ ):
87
+ TimewiseConfig.from_yaml(config_path).build_ampel_interface().export_many(
88
+ output_directory, indices
89
+ )
90
+
91
+
92
+ @app.command(help="Run download, process and export")
93
+ def run_chain(
94
+ config_path: config_path_type,
95
+ ampel_config_path: ampel_config_path_type,
96
+ output_directory: Annotated[Path, typer.Argument(help="output directory")],
97
+ indices: Annotated[
98
+ list[int] | None,
99
+ typer.Option(
100
+ "-i", "--indices", help="Indices to export, defaults to all indices"
101
+ ),
102
+ ] = None,
103
+ ):
104
+ download(config_path)
105
+ process(config_path, ampel_config_path)
106
+ export(config_path, output_directory, indices)
107
+
108
+
109
+ @app.command(help="Make diagnostic plots")
110
+ def plot(
111
+ config_path: config_path_type,
112
+ indices: Annotated[
113
+ List[int],
114
+ typer.Argument(help="Identifiers of the objects for which to create plots"),
115
+ ],
116
+ output_directory: Annotated[Path, typer.Argument(help="Output directory")],
117
+ cutout: Annotated[
118
+ Literal["sdss", "panstarrs"],
119
+ typer.Option("-c", "--cutout", help="Which survey to use for cutouts"),
120
+ ] = "panstarrs",
121
+ ):
122
+ make_plot(
123
+ config_path, indices=indices, cutout=cutout, output_directory=output_directory
124
+ )
timewise/config.py ADDED
@@ -0,0 +1,34 @@
1
+ from pathlib import Path
2
+ import yaml
3
+
4
+ import numpy as np
5
+ from pydantic import BaseModel, model_validator
6
+
7
+ from .io import DownloadConfig
8
+ from .process import AmpelConfig, AmpelInterface
9
+
10
+
11
+ class TimewiseConfig(BaseModel):
12
+ download: DownloadConfig
13
+ ampel: AmpelConfig
14
+
15
+ @classmethod
16
+ def from_yaml(cls, path: str | Path):
17
+ path = Path(path)
18
+ assert path.exists(), f"{path} not found!"
19
+ with path.open("r") as f:
20
+ config_dict = yaml.safe_load(f)
21
+ return cls.model_validate(config_dict)
22
+
23
+ @model_validator(mode="after")
24
+ def validate_query_original_id_key(self) -> "TimewiseConfig":
25
+ unique_keys = np.unique([q.original_id_key for q in self.download.queries])
26
+ assert len(unique_keys) == 1, (
27
+ "Can not use different 'original_id_key' in queries!"
28
+ )
29
+ return self
30
+
31
+ def build_ampel_interface(self) -> AmpelInterface:
32
+ return self.ampel.build_interface(
33
+ self.download.queries[0].original_id_key, self.download.input_csv
34
+ )
@@ -0,0 +1 @@
1
+ from .config import DownloadConfig
timewise/io/config.py ADDED
@@ -0,0 +1,64 @@
1
+ from pathlib import Path
2
+ from typing import List
3
+ import pandas as pd
4
+ from pydantic import BaseModel, Field, model_validator
5
+
6
+ from .download import Downloader
7
+ from ..query import QueryType
8
+ from ..backend import BackendType
9
+ from ..types import TYPE_MAP
10
+
11
+
12
+ class DownloadConfig(BaseModel):
13
+ input_csv: Path
14
+ chunk_size: int = 500_000
15
+ max_concurrent_jobs: int = 4
16
+ poll_interval: float = 10.0
17
+ queries: List[QueryType] = Field(..., description="One or more queries per chunk")
18
+ backend: BackendType = Field(..., discriminator="type")
19
+
20
+ service_url: str = "https://irsa.ipac.caltech.edu/TAP"
21
+
22
+ @model_validator(mode="after")
23
+ def validate_input_csv_columns(self) -> "DownloadConfig":
24
+ """Ensure that the input CSV contains all columns required by queries."""
25
+ # only validate if the CSV actually exists
26
+ if not self.input_csv.exists():
27
+ raise ValueError(f"CSV file does not exist: {self.input_csv}")
28
+
29
+ # read just the header and first 10 lines
30
+ input_table = pd.read_csv(self.input_csv, nrows=10)
31
+
32
+ missing_columns = set()
33
+ wrong_dtype = set()
34
+ for qc in self.queries:
35
+ for col, dtype in qc.input_columns.items():
36
+ if col not in input_table.columns:
37
+ missing_columns.add(col)
38
+ else:
39
+ try:
40
+ input_table[col].astype(TYPE_MAP[dtype])
41
+ except Exception:
42
+ wrong_dtype.add(col)
43
+
44
+ msg = f"CSV file {self.input_csv}: "
45
+ if missing_columns:
46
+ raise KeyError(msg + f"Missing required columns: {sorted(missing_columns)}")
47
+ if wrong_dtype:
48
+ raise TypeError(
49
+ msg
50
+ + f"Columns not convertable to right data type: {sorted(wrong_dtype)}"
51
+ )
52
+
53
+ return self
54
+
55
+ def build_downloader(self) -> Downloader:
56
+ return Downloader(
57
+ service_url=self.service_url,
58
+ input_csv=self.input_csv,
59
+ chunk_size=self.chunk_size,
60
+ backend=self.backend,
61
+ queries=self.queries,
62
+ max_concurrent_jobs=self.max_concurrent_jobs,
63
+ poll_interval=self.poll_interval,
64
+ )