PyPI - timewise - Versions diffs - 0.5.3__py3-none-any.whl → 1.0.0a1__py3-none-any.whl - Mend

timewise 0.5.3py3-none-any.whl → 1.0.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

timewise/__init__.py +1 -5
timewise/backend/__init__.py +6 -0
timewise/backend/base.py +36 -0
timewise/backend/filesystem.py +80 -0
timewise/chunking.py +50 -0
timewise/cli.py +117 -11
timewise/config.py +34 -0
timewise/io/__init__.py +1 -0
timewise/io/config.py +64 -0
timewise/io/download.py +302 -0
timewise/io/stable_tap.py +121 -0
timewise/plot/__init__.py +3 -0
timewise/plot/diagnostic.py +242 -0
timewise/plot/lightcurve.py +112 -0
timewise/plot/panstarrs.py +260 -0
timewise/plot/sdss.py +109 -0
timewise/process/__init__.py +2 -0
timewise/process/config.py +30 -0
timewise/process/interface.py +143 -0
timewise/process/keys.py +10 -0
timewise/process/stacking.py +310 -0
timewise/process/template.yml +49 -0
timewise/query/__init__.py +6 -0
timewise/query/base.py +45 -0
timewise/query/positional.py +40 -0
timewise/tables/__init__.py +10 -0
timewise/tables/allwise_p3as_mep.py +22 -0
timewise/tables/base.py +9 -0
timewise/tables/neowiser_p1bs_psd.py +22 -0
timewise/types.py +30 -0
timewise/util/backoff.py +12 -0
timewise/util/csv_utils.py +12 -0
timewise/util/error_threading.py +70 -0
timewise/util/visits.py +33 -0
timewise-1.0.0a1.dist-info/METADATA +205 -0
timewise-1.0.0a1.dist-info/RECORD +39 -0
{timewise-0.5.3.dist-info → timewise-1.0.0a1.dist-info}/WHEEL +1 -1
timewise-1.0.0a1.dist-info/entry_points.txt +3 -0
timewise/big_parent_sample.py +0 -106
timewise/config_loader.py +0 -157
timewise/general.py +0 -52
timewise/parent_sample_base.py +0 -89
timewise/point_source_utils.py +0 -68
timewise/utils.py +0 -558
timewise/wise_bigdata_desy_cluster.py +0 -1407
timewise/wise_data_base.py +0 -2027
timewise/wise_data_by_visit.py +0 -672
timewise/wise_flux_conversion_correction.dat +0 -19
timewise-0.5.3.dist-info/METADATA +0 -55
timewise-0.5.3.dist-info/RECORD +0 -17
timewise-0.5.3.dist-info/entry_points.txt +0 -3
{timewise-0.5.3.dist-info → timewise-1.0.0a1.dist-info/licenses}/LICENSE +0 -0

timewise/io/download.py ADDED Viewed

@@ -0,0 +1,302 @@
+import time
+import threading
+import logging
+from queue import Empty
+from typing import Dict, Iterator, cast, Sequence
+from itertools import product
+from pathlib import Path
+from datetime import datetime, timedelta
+import pandas as pd
+import numpy as np
+from astropy.table import Table
+from pyvo.utils.http import create_session
+from .stable_tap import StableTAPService
+from ..backend import BackendType
+from ..types import TAPJobMeta, TaskID, TYPE_MAP
+from ..query import QueryType
+from ..query.base import Query
+from ..util.error_threading import ErrorQueue, ExceptionSafeThread
+from ..chunking import Chunker, Chunk
+logger = logging.getLogger(__name__)
+class Downloader:
+    def __init__(
+        self,
+        service_url: str,
+        input_csv: Path,
+        chunk_size: int,
+        backend: BackendType,
+        queries: list[QueryType],
+        max_concurrent_jobs: int,
+        poll_interval: float,
+    ):
+        self.backend = backend
+        self.queries = queries
+        self.input_csv = input_csv
+        self.max_concurrent_jobs = max_concurrent_jobs
+        self.poll_interval = poll_interval
+        # ----------------------------
+        # concurrency setup
+        # ----------------------------
+        # Shared state
+        self.job_lock = threading.Lock()
+        # (chunk_id, query_hash) -> job meta
+        self.jobs: Dict[TaskID, TAPJobMeta] = {}
+        self.stop_event = threading.Event()
+        self.submit_queue: ErrorQueue = ErrorQueue(stop_event=self.stop_event)
+        self.submit_thread = ExceptionSafeThread(
+            error_queue=self.submit_queue, target=self._submission_worker, daemon=True
+        )
+        self.poll_thread = ExceptionSafeThread(
+            error_queue=self.submit_queue, target=self._polling_worker, daemon=True
+        )
+        self.all_chunks_queued = False
+        self.all_chunks_submitted = False
+        # ----------------------------
+        # TAP setup
+        # ----------------------------
+        self.session = create_session()
+        self.service: StableTAPService = StableTAPService(
+            service_url, session=self.session
+        )
+        self.chunker = Chunker(input_csv=input_csv, chunk_size=chunk_size)
+    # ----------------------------
+    # helpers
+    # ----------------------------
+    @staticmethod
+    def get_task_id(chunk: Chunk, query: Query) -> TaskID:
+        return TaskID(
+            namespace="download", key=f"chunk{chunk.chunk_id:04d}_{query.hash}"
+        )
+    def iter_tasks(self) -> Iterator[TaskID]:
+        for chunk in self.chunker:
+            for q in self.queries:
+                yield self.get_task_id(chunk, q)
+    def iter_tasks_per_chunk(self) -> Iterator[list[TaskID]]:
+        for chunk in self.chunker:
+            yield [self.get_task_id(chunk, q) for q in self.queries]
+    def load_job_meta(self):
+        backend = self.backend
+        for task in self.iter_tasks():
+            if backend.meta_exists(task):
+                logger.debug(f"found job metadata {task}")
+                if task not in self.jobs:
+                    try:
+                        jm = TAPJobMeta(**backend.load_meta(task))
+                        logger.debug(f"loaded {jm}")
+                        logger.debug(f"setting {task}")
+                        with self.job_lock:
+                            self.jobs[task] = jm
+                    except Exception:
+                        continue
+    # ----------------------------
+    # TAP submission and download
+    # ----------------------------
+    def get_chunk_data(self, chunk: Chunk) -> pd.DataFrame:
+        start = (
+            min(cast(Sequence[int], chunk.row_numbers)) + 1
+        )  # plus one to always skip header line
+        nrows = (
+            max(cast(Sequence[int], chunk.row_numbers)) - start + 2
+        )  # plus one: skip header, plus one:
+        columns = list(pd.read_csv(self.input_csv, nrows=0).columns)
+        return pd.read_csv(
+            filepath_or_buffer=self.input_csv,
+            skiprows=start,
+            nrows=nrows,
+            names=columns,
+        )
+    def submit_tap_job(self, query: Query, chunk: Chunk) -> TAPJobMeta:
+        adql = query.adql
+        chunk_df = self.get_chunk_data(chunk)
+        assert all(chunk_df.index.isin(chunk.indices)), (
+            "Some inputs loaded from wrong chunk!"
+        )
+        assert all(np.isin(chunk.indices, chunk_df.index)), (
+            f"Some indices are missing in chunk {chunk.chunk_id}!"
+        )
+        logger.debug(f"loaded {len(chunk_df)} objects")
+        try:
+            upload = Table(
+                {
+                    key: np.array(chunk_df[key]).astype(TYPE_MAP[dtype])
+                    for key, dtype in query.input_columns.items()
+                }
+            )
+        except KeyError as e:
+            print(chunk_df)
+            raise KeyError(e)
+        logger.debug(f"uploading {len(upload)} objects.")
+        job = self.service.submit_job(adql, uploads={query.upload_name: upload})
+        job.run()
+        logger.debug(job.url)
+        return TAPJobMeta(
+            url=job.url,
+            query=adql,
+            query_config=query.model_dump(),
+            input_length=len(chunk_df),
+            submitted=str(datetime.now()),
+            last_checked=str(datetime.now()),
+            status=job.phase,
+            completed_at="",
+        )
+    def check_job_status(self, job_meta: TAPJobMeta) -> str:
+        status = self.service.get_job_from_url(url=job_meta["url"]).phase
+        job_meta["last_checked"] = str(datetime.now())
+        return status
+    def download_job_result(self, job_meta: TAPJobMeta) -> Table:
+        logger.info(f"downloading {job_meta['url']}")
+        job = self.service.get_job_from_url(url=job_meta["url"])
+        job.wait()
+        return job.fetch_result().to_table()
+    # ----------------------------
+    # Submission thread
+    # ----------------------------
+    def _submission_worker(self):
+        while not self.stop_event.is_set():
+            try:
+                chunk, query = self.submit_queue.get(timeout=1.0)  # type: Chunk, Query
+            except Empty:
+                if self.all_chunks_queued:
+                    self.all_chunks_submitted = True
+                    break
+                continue
+            # Wait until we have capacity
+            while not self.stop_event.is_set():
+                with self.job_lock:
+                    running = sum(
+                        1
+                        for j in self.jobs.values()
+                        if j.get("status") in ("QUEUED", "EXECUTING", "RUNNING")
+                    )
+                if running < self.max_concurrent_jobs:
+                    break
+                time.sleep(1.0)
+            task = self.get_task_id(chunk, query)
+            logger.info(f"submitting {task}")
+            job_meta = self.submit_tap_job(query, chunk)
+            self.backend.save_meta(task, job_meta)
+            with self.job_lock:
+                self.jobs[task] = job_meta
+            self.submit_queue.task_done()
+    # ----------------------------
+    # Polling thread
+    # ----------------------------
+    def _polling_worker(self):
+        logger.debug("starting polling worker")
+        backend = self.backend
+        while not self.stop_event.is_set():
+            # reload job infos
+            self.load_job_meta()
+            with self.job_lock:
+                items = list(self.jobs.items())
+            for task, meta in items:  # type: TaskID, TAPJobMeta
+                if meta.get("status") in ("COMPLETED", "ERROR", "ABORTED"):
+                    logger.debug(f"{task} was already {meta['status']}")
+                    continue
+                status = self.check_job_status(meta)
+                if status == "COMPLETED":
+                    logger.info(f"completed {task}")
+                    payload_table = self.download_job_result(meta)
+                    logger.debug(payload_table.columns)
+                    backend.save_data(task, payload_table)
+                    meta["status"] = "COMPLETED"
+                    meta["completed_at"] = str(datetime.now())
+                    backend.save_meta(task, meta)
+                    backend.mark_done(task)
+                    with self.job_lock:
+                        self.jobs[task] = meta
+                elif status in ("ERROR", "ABORTED"):
+                    logger.warning(f"failed {task}: {status}")
+                    meta["status"] = status
+                    with self.job_lock:
+                        self.jobs[task] = meta
+                    backend.save_meta(task, meta)
+                else:
+                    with self.job_lock:
+                        self.jobs[task]["status"] = status
+                        snapshot = self.jobs[task]
+                    backend.save_meta(task, snapshot)
+            if self.all_chunks_submitted:
+                with self.job_lock:
+                    all_done = (
+                        all(
+                            j.get("status") in ("COMPLETED", "ERROR", "ABORTED")
+                            for j in self.jobs.values()
+                        )
+                        if len(self.jobs) > 0
+                        else False
+                    )
+                if all_done:
+                    logger.info("All tasks done! Exiting polling thread")
+                    break
+            logger.info(
+                f"Next poll at {datetime.now() + timedelta(seconds=self.poll_interval)}s"
+            )
+            time.sleep(self.poll_interval)
+    # ----------------------------
+    # Main run loop
+    # ----------------------------
+    def run(self):
+        # load existing job metadata
+        self.load_job_meta()
+        # start threads
+        self.submit_thread.start()
+        self.poll_thread.start()
+        # enqueue all chunks & queries
+        backend = self.backend
+        for chunk, q in product(self.chunker, self.queries):
+            task = self.get_task_id(chunk, q)
+            # skip if the download is done, or the job is queued
+            if backend.is_done(task) or (task in self.jobs):
+                continue
+            self.submit_queue.put((chunk, q))
+        self.all_chunks_queued = True
+        # wait until all jobs are submitted
+        self.submit_queue.join()
+        # wait for the submit thread
+        self.submit_thread.join()
+        # the polling thread will exit ones all results are downloaded
+        self.poll_thread.join()
+        # the stop event will stop also the submit thread
+        self.stop_event.set()
+        # if any thread exited with an error report it
+        self.submit_queue.raise_errors()
+        logger.info("Done running downloader!")

timewise/io/stable_tap.py ADDED Viewed

@@ -0,0 +1,121 @@
+import logging
+import backoff
+import pyvo as vo
+from xml.etree import ElementTree
+from timewise.util.backoff import backoff_hndlr
+logger = logging.getLogger(__name__)
+class StableAsyncTAPJob(vo.dal.AsyncTAPJob):
+    """
+    Implements backoff for call of phase which otherwise breaks the code if there are connection issues.
+    Also stores the response of TapQuery.submit() under self.submit_response for debugging
+    """
+    def __init__(self, url, *, session=None, delete=True):
+        super(StableAsyncTAPJob, self).__init__(url, session=session, delete=delete)
+        self.submit_response = None
+    @classmethod
+    def create(
+        cls,
+        baseurl,
+        query,
+        *,
+        language="ADQL",
+        maxrec=None,
+        uploads=None,
+        session=None,
+        **keywords,
+    ):
+        """
+        creates a async tap job on the server under ``baseurl``
+        Raises requests.HTTPError if TAPQuery.submit() failes.
+        Parameters
+        ----------
+        baseurl : str
+            the TAP baseurl
+        query : str
+            the query string
+        language : str
+            specifies the query language, default ADQL.
+            useful for services which allow to use the backend query language.
+        maxrec : int
+            the maximum records to return. defaults to the service default
+        uploads : dict
+            a mapping from table names to objects containing a votable
+        session : object
+           optional session to use for network requests
+        """
+        tapquery = vo.dal.TAPQuery(
+            baseurl,
+            query,
+            mode="async",
+            language=language,
+            maxrec=maxrec,
+            uploads=uploads,
+            session=session,
+            **keywords,
+        )
+        response = tapquery.submit()
+        # check if the response is valid
+        response.raise_for_status()
+        # check if the response contains an error from the ADQL engine
+        root = ElementTree.fromstring(response.content)
+        info = root.find(".//v:INFO", {"v": "http://www.ivoa.net/xml/VOTable/v1.3"})
+        if info and (info.attrib.get("value") == "ERROR"):
+            raise vo.dal.DALQueryError(info.text.strip())
+        # create the job instance
+        job = cls(response.url, session=session)
+        job._client_set_maxrec = maxrec
+        job.submit_response = response
+        return job
+    @property
+    @backoff.on_exception(
+        backoff.expo,
+        (vo.dal.DALServiceError, AttributeError),
+        max_tries=50,
+        on_backoff=backoff_hndlr,
+    )
+    def phase(self):
+        return super(StableAsyncTAPJob, self).phase
+class StableTAPService(vo.dal.TAPService):
+    """
+    Implements the StableAsyncTAPJob for job submission
+    """
+    @backoff.on_exception(
+        backoff.expo,
+        (vo.dal.DALServiceError, AttributeError, AssertionError),
+        max_tries=5,
+        on_backoff=backoff_hndlr,
+    )
+    def submit_job(
+        self, query, *, language="ADQL", maxrec=None, uploads=None, **keywords
+    ):
+        job = StableAsyncTAPJob.create(
+            self.baseurl,
+            query,
+            language=language,
+            maxrec=maxrec,
+            uploads=uploads,
+            session=self._session,
+            **keywords,
+        )
+        logger.debug(job.url)
+        assert job.phase
+        return job
+    def get_job_from_url(self, url):
+        return StableAsyncTAPJob(url, session=self._session)

timewise/plot/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .lightcurve import plot_lightcurve
+from .sdss import plot_sdss_cutout
+from .panstarrs import plot_panstarrs_cutout

timewise/plot/diagnostic.py ADDED Viewed

@@ -0,0 +1,242 @@
+from typing import Literal, Dict, Any, Sequence, List, cast
+from functools import partial
+from pathlib import Path
+import logging
+import pandas as pd
+import numpy as np
+from numpy import typing as npt
+from pydantic import BaseModel
+import matplotlib.pyplot as plt
+from matplotlib.lines import Line2D
+from matplotlib.markers import MarkerStyle
+from matplotlib.transforms import Affine2D
+from timewise.plot import plot_lightcurve, plot_panstarrs_cutout, plot_sdss_cutout
+from timewise.plot.lightcurve import BAND_PLOT_COLORS
+from timewise.process import keys
+from timewise.util.visits import get_visit_map
+from timewise.config import TimewiseConfig
+logger = logging.getLogger(__name__)
+class DiagnosticPlotter(BaseModel):
+    cutout: Literal["sdss", "panstarrs"] = "panstarrs"
+    band_colors: Dict[str, str] = BAND_PLOT_COLORS
+    lum_key: str = keys.FLUX_EXT
+    def plot_lightcurve(
+        self,
+        stacked_lightcurve: pd.DataFrame | None = None,
+        raw_lightcurve: pd.DataFrame | None = None,
+        ax: plt.Axes | None = None,
+        **kwargs,
+    ):
+        return plot_lightcurve(
+            lum_key=self.lum_key,
+            stacked_lightcurve=stacked_lightcurve,
+            raw_lightcurve=raw_lightcurve,
+            ax=ax,
+            colors=self.band_colors,
+            **kwargs,
+        )
+    def plot_cutout(self, ra: float, dec: float, radius_arcsec: float, ax: plt.Axes):
+        if self.cutout == "sdss":
+            plot_cutout = plot_sdss_cutout
+        elif self.cutout == "panstarrs":
+            plot_cutout = partial(plot_panstarrs_cutout, plot_color_image=True)
+        else:
+            raise NotImplementedError  # should never happen
+        return plot_cutout(ra=ra, dec=dec, arcsec=radius_arcsec, ax=ax)
+    def make_plot(
+        self,
+        stacked_lightcurve: pd.DataFrame | None,
+        raw_lightcurve: pd.DataFrame,
+        labels: npt.ArrayLike,
+        source_ra: float,
+        source_dec: float,
+        selected_indices: list[Any],
+        highlight_radius: float | None = None,
+    ) -> tuple[plt.Figure, Sequence[plt.Axes]]:
+        fig, axs = plt.subplots(
+            nrows=2, gridspec_kw={"height_ratios": [3, 2]}, figsize=(5, 8)
+        )
+        self.plot_cutout(ra=source_ra, dec=source_dec, ax=axs[0], radius_arcsec=20)
+        selected_mask = raw_lightcurve.index.isin(selected_indices)
+        plot_lightcurve(
+            raw_lightcurve=raw_lightcurve[~selected_mask],
+            lum_key=self.lum_key,
+            ax=axs[-1],
+            save=False,
+            colors={"w1": "gray", "w2": "lightgray"},
+            add_to_label=" ignored",
+        )
+        self.plot_lightcurve(
+            stacked_lightcurve=stacked_lightcurve,
+            raw_lightcurve=raw_lightcurve[selected_mask],
+            ax=axs[-1],
+            save=False,
+        )
+        # set markers for clusters
+        markers_strings = list(Line2D.filled_markers) + [
+            "$1$",
+            "$2$",
+            "$3$",
+            "$4$",
+            "$5$",
+            "$6$",
+            "$7$",
+            "$8$",
+            "$9$",
+        ]
+        markers_straight = [MarkerStyle(im) for im in markers_strings]
+        rot = Affine2D().rotate_deg(180)
+        markers_rotated = [MarkerStyle(im, transform=rot) for im in markers_strings]
+        markers = markers_straight + markers_rotated
+        # calculate ra and dec relative to center of cutout
+        ra = (raw_lightcurve.ra - source_ra) * 3600
+        dec = (raw_lightcurve.dec - source_dec) * 3600
+        # get visit map
+        visit_map = get_visit_map(raw_lightcurve)
+        # for each visit plot the datapoints on the cutout
+        # for each visit plot the datapoints on the cutout
+        for visit in np.unique(visit_map):
+            m = visit_map == visit
+            label = str(visit)
+            axs[0].plot(
+                [],
+                [],
+                marker=markers[visit],
+                label=label,
+                mec="k",
+                mew=1,
+                mfc="none",
+                ls="",
+            )
+            for im, mec, zorder in zip(
+                [selected_mask, ~selected_mask], ["k", "none"], [1, 0]
+            ):
+                mask = m & im
+                for i_label in np.unique(labels):
+                    label_mask = labels == i_label
+                    final_mask = mask & label_mask
+                    datapoints_label = raw_lightcurve[final_mask]
+                    color = f"C{i_label}" if i_label != -1 else "grey"
+                    if ("sigra" in datapoints_label.columns) and (
+                        "sigdec" in datapoints_label.columns
+                    ):
+                        has_sig = (
+                            ~datapoints_label.sigra.isna()
+                            & ~datapoints_label.sigdec.isna()
+                        )
+                        _ra = ra[final_mask]
+                        _dec = dec[final_mask]
+                        axs[0].errorbar(
+                            _ra[has_sig],
+                            _dec[has_sig],
+                            xerr=datapoints_label.sigra[has_sig] / 3600,
+                            yerr=datapoints_label.sigdec[has_sig] / 3600,
+                            marker=markers[visit],
+                            ls="",
+                            color=color,
+                            zorder=zorder,
+                            ms=10,
+                            mec=mec,
+                            mew=0.1,
+                        )
+                        axs[0].scatter(
+                            _ra[~has_sig],
+                            _dec[~has_sig],
+                            marker=markers[visit],
+                            color=color,
+                            zorder=zorder,
+                            edgecolors=mec,
+                            linewidths=0.1,
+                        )
+                    else:
+                        axs[0].scatter(
+                            ra[final_mask],
+                            dec[final_mask],
+                            marker=markers[visit],
+                            color=color,
+                            zorder=zorder,
+                            edgecolors=mec,
+                            linewidths=0.1,
+                        )
+        if highlight_radius:
+            circle = plt.Circle(
+                (0, 0),
+                highlight_radius,
+                color="g",
+                fill=False,
+                ls="-",
+                lw=3,
+                zorder=0,
+            )
+            axs[0].add_artist(circle)
+        # formatting
+        title = axs[0].get_title()
+        axs[-1].set_ylabel("Apparent Vega Magnitude")
+        axs[-1].grid(ls=":", alpha=0.5)
+        axs[0].set_title("")
+        axs[0].legend(
+            ncol=5,
+            bbox_to_anchor=(0, 1, 1, 0),
+            loc="lower left",
+            mode="expand",
+            title=title,
+        )
+        axs[0].set_aspect(1, adjustable="box")
+        return fig, axs
+def make_plot(
+    config_path: Path,
+    cutout: Literal["sdss", "panstarrs"],
+    indices: List[int],
+    output_directory: Path,
+):
+    cfg = TimewiseConfig.from_yaml(config_path)
+    ampel_interface = cfg.build_ampel_interface()
+    input_data = pd.read_csv(cfg.download.input_csv).set_index(
+        ampel_interface.orig_id_key
+    )
+    plotter = DiagnosticPlotter(cutout=cutout)
+    for index in indices:
+        stacked_lightcurve = ampel_interface.extract_stacked_lightcurve(stock_id=index)
+        raw_lightcurve = ampel_interface.extract_datapoints(stock_id=index)
+        selected_dp_ids = ampel_interface.extract_selected_datapoint_ids(stock_id=index)
+        labels = [0] * len(raw_lightcurve)
+        source = input_data.loc[index]
+        ra: float = cast(float, source.ra)
+        dec: float = cast(float, source.dec)
+        fig, axs = plotter.make_plot(
+            stacked_lightcurve=stacked_lightcurve,
+            raw_lightcurve=raw_lightcurve,
+            labels=labels,
+            source_ra=ra,
+            source_dec=dec,
+            selected_indices=selected_dp_ids,
+        )
+        fn = output_directory / f"{index}.pdf"
+        logger.info(f"Saving plot to {fn}")
+        fig.savefig(fn)
+        plt.close()

timewise 0.5.3__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

timewise 0.5.3py3-none-any.whl → 1.0.0a1py3-none-any.whl