PyPI - tesorotools-python - Versions diffs - 0.0.28__tar.gz → 0.0.30__tar.gz - Mend

tesorotools-python 0.0.28tar.gz → 0.0.30tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

{tesorotools_python-0.0.28 → tesorotools_python-0.0.30}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tesorotools-python
-Version: 0.0.28
+Version: 0.0.30
 Requires-Python: >=3.13
 Requires-Dist: babel>=2.17
 Requires-Dist: eikon>=1.1
@@ -16,3 +16,5 @@ Requires-Dist: pyyaml>=6.0
 Requires-Dist: sqlalchemy>=2.0
 Provides-Extra: bde
 Requires-Dist: requests>=2.31; extra == 'bde'
+Provides-Extra: ecb
+Requires-Dist: requests>=2.31; extra == 'ecb'

{tesorotools_python-0.0.28 → tesorotools_python-0.0.30}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "tesorotools-python"
 requires-python = ">=3.13"
-version = "0.0.28"
+version = "0.0.30"
 dependencies = [
     # database and ORM
     "psycopg[binary]>=3.1",
@@ -28,6 +28,7 @@ dependencies = [
 [project.optional-dependencies]
 bde = ["requests>=2.31"]
+ecb = ["requests>=2.31"]
 [dependency-groups]
 dev = [

{tesorotools_python-0.0.28 → tesorotools_python-0.0.30}/src/tesorotools/artists/line_plot.py RENAMED Viewed

@@ -96,6 +96,100 @@ def auto_ncol(ax: Axes, labels: list[str]) -> int:
     return min(ncol, len(labels))
+def annotate_last_values(
+    ax: Axes,
+    plot_data: pd.DataFrame,
+    *,
+    decimals: int,
+    units: str,
+    series_styles: dict[str, dict[str, Any]] | None = None,
+    annotate_color: str | None = None,
+) -> None:
+    """Label the last non-NaN value of each column on the right.
+    Colour priority (highest first): ``annotate_color``
+    (global override), ``series_styles[col]['color']``,
+    the Matplotlib line colour.
+    Labels are packed vertically in display space so that
+    series ending at nearly the same value do not overlap.
+    The x-axis limit is extended by the widest label so
+    the text is not clipped by the axes frame.
+    """
+    fig = ax.get_figure()
+    if fig is None:
+        return
+    styles = series_styles or {}
+    lines_by_label = {str(line.get_label()): line for line in ax.lines}
+    entries: list[tuple[Any, float, str, Any]] = []
+    for col in plot_data.columns:
+        col_series: pd.Series[float] = plot_data[col].dropna()
+        if col_series.empty:
+            continue
+        last_date = col_series.index[-1]
+        last_val = float(col_series.iloc[-1])
+        if annotate_color is not None:
+            color: Any = annotate_color
+        else:
+            override = styles.get(col, {}).get("color")
+            if override is not None:
+                color = override
+            else:
+                line = lines_by_label.get(col)
+                color = line.get_color() if line is not None else "black"
+        entries.append((last_date, last_val, col, color))
+    if not entries:
+        return
+    fig.canvas.draw()  # type: ignore[reportUnknownMemberType]
+    renderer = fig.canvas.get_renderer()  # type: ignore[reportUnknownMemberType]
+    trans = ax.transData
+    sample = ax.text(  # type: ignore[reportUnknownMemberType]
+        0, 0, "0"
+    )
+    text_height = (
+        sample.get_window_extent(renderer).height  # type: ignore[reportUnknownArgumentType]
+        * 1.15
+    )
+    sample.remove()
+    entries.sort(key=lambda e: e[1])
+    placements: list[float] = []
+    prev_y = -float("inf")
+    for _, val, _, _ in entries:
+        y_display: float = trans.transform((0, val))[1]  # type: ignore[reportUnknownArgumentType]
+        if y_display - prev_y < text_height:
+            y_display = prev_y + text_height
+        placements.append(y_display)
+        prev_y = y_display
+    max_label_width = 0.0
+    for (date, val, _, color), packed_y in zip(entries, placements):
+        orig_y: float = trans.transform((0, val))[1]  # type: ignore[reportUnknownArgumentType]
+        dy_pts = (packed_y - orig_y) * 72.0 / fig.dpi
+        text = format_annotation(val, decimals, units)
+        t = ax.annotate(  # type: ignore[reportUnknownMemberType]
+            text,
+            xy=(date, val),
+            xytext=(5, dy_pts),
+            textcoords="offset points",
+            color=color,
+            va="center",
+            ha="left",
+        )
+        width = t.get_window_extent(renderer).width  # type: ignore[reportUnknownArgumentType]
+        max_label_width = max(max_label_width, width)
+    inv = trans.inverted()
+    x0: float = inv.transform((0, 0))[0]  # type: ignore[reportUnknownArgumentType]
+    x1: float = inv.transform((max_label_width + 10, 0))[0]  # type: ignore[reportUnknownArgumentType]
+    xmin, xmax = ax.get_xlim()
+    ax.set_xlim(xmin, xmax + (x1 - x0))
 def style_spines(
     ax: Axes,
     decimals: int,
@@ -290,6 +384,7 @@ class LinePlot:
         end_date: datetime.datetime | None = None,
         base_100: bool = False,
         annotate: bool = False,
+        annotate_color: str | None = None,
         baseline: bool = False,
         format: Format | None = None,
         legend: Legend | None = None,
@@ -320,7 +415,8 @@ class LinePlot:
             raise ValueError("series is required")
         self.base_100 = base_100
-        self.annotate = annotate  # unused for the moment
+        self.annotate = annotate
+        self.annotate_color = annotate_color
         self.format = format
         self.start_date = start_date
         self.end_date = end_date
@@ -379,10 +475,17 @@ class LinePlot:
         else:
             plot_data.plot(ax=ax)
-        if self.annotate:  # not implemented yet
-            pass
         assert self.format is not None
+        if self.annotate:
+            annotate_last_values(
+                ax,
+                plot_data,
+                decimals=self.format.decimals,
+                units=self.format.units,
+                series_styles=self.series_styles,
+                annotate_color=self.annotate_color,
+            )
         style_spines(  # maybe make this function accept a Format object
             ax,
             decimals=self.format.decimals,

{tesorotools_python-0.0.28 → tesorotools_python-0.0.30}/src/tesorotools/pipeline/rules.py RENAMED Viewed

@@ -179,6 +179,152 @@ def cumsum_rule(output: str, source: str) -> TransformationRule:
     )
+def resample_rule(
+    output: str,
+    source: str,
+    freq: str,
+    agg: str = "mean",
+) -> TransformationRule:
+    """Resample a series to *freq* with aggregator *agg*.
+    NaN-aware: drops NaN before ``resample`` so interior
+    gaps in the input do not poison the aggregator.
+    """
+    def _compute(
+        df: pd.DataFrame,
+        s: str = source,
+        f: str = freq,
+        a: str = agg,
+    ) -> pd.Series[float]:
+        clean: pd.Series[float] = df[s].dropna()
+        return clean.resample(f).agg(a)  # type: ignore[reportUnknownMemberType]
+    return TransformationRule(
+        output_name=output,
+        dependencies=[source],
+        compute=_compute,
+    )
+def weighted_average_rule(
+    output: str,
+    values: list[str],
+    weights: list[str],
+) -> TransformationRule:
+    """Weighted average: ``sum(v_i * w_i) / sum(w_i)``.
+    NaN components contribute 0 to both numerator and
+    denominator (so they do not propagate NaN across the
+    whole row).  A denominator of 0 yields NaN (avoids
+    ``inf``).
+    """
+    if len(values) != len(weights):
+        raise ValueError(
+            f"weighted_average requires len(values) == len(weights),"
+            f" got {len(values)} and {len(weights)}"
+        )
+    deps = list(values) + list(weights)
+    def _compute(
+        df: pd.DataFrame,
+        vs: list[str] = list(values),
+        ws: list[str] = list(weights),
+    ) -> pd.Series[float]:
+        zero = pd.Series(0.0, index=df.index)
+        num: pd.Series[float] = sum(
+            (df[v].fillna(0) * df[w].fillna(0) for v, w in zip(vs, ws)),
+            start=zero,
+        )
+        den: pd.Series[float] = sum(
+            (df[w].fillna(0) for w in ws),
+            start=zero,
+        )
+        return num / den.replace(0, float("nan"))
+    return TransformationRule(
+        output_name=output,
+        dependencies=deps,
+        compute=_compute,
+    )
+def index_rule(
+    output: str,
+    source: str,
+    reference_date: str | pd.Timestamp,
+    base: float = 100.0,
+) -> TransformationRule:
+    """Rebase to *base* at *reference_date*.
+    Returns an empty Series if the reference date is not
+    present in the source's index after dropping NaN, or
+    if the reference value is zero.  Keeps the pipeline
+    from blowing up on partial data.
+    """
+    ref = pd.Timestamp(reference_date)
+    def _compute(
+        df: pd.DataFrame,
+        s: str = source,
+        r: pd.Timestamp = ref,
+        b: float = base,
+    ) -> pd.Series[float]:
+        clean: pd.Series[float] = df[s].dropna()
+        if r not in clean.index:
+            return pd.Series(dtype="float64")
+        ref_val = float(clean.loc[r])
+        if ref_val == 0:
+            return pd.Series(dtype="float64")
+        return clean / ref_val * b
+    return TransformationRule(
+        output_name=output,
+        dependencies=[source],
+        compute=_compute,
+    )
+def forward_fill_rule(
+    output: str,
+    source: str,
+    freq: str = "MS",
+    limit: int | None = None,
+    extend: int = 0,
+) -> TransformationRule:
+    """Reindex to a regular *freq* grid and forward-fill.
+    Decouples ``limit`` from the host DataFrame's own
+    frequency — useful when a quarterly series has to be
+    propagated to the months of each quarter inside a
+    daily/monthly DataFrame.  ``extend`` adds that many
+    extra *freq* steps after the last real observation.
+    """
+    def _compute(
+        df: pd.DataFrame,
+        s: str = source,
+        f: str = freq,
+        lim: int | None = limit,
+        ext: int = extend,
+    ) -> pd.Series[float]:
+        clean: pd.Series[float] = df[s].dropna()
+        if clean.empty:
+            return pd.Series(dtype="float64")
+        start = clean.index.min()
+        end = clean.index.max()
+        if ext > 0:
+            end = end + pd.tseries.frequencies.to_offset(f) * ext  # type: ignore[reportOperatorIssue]
+        grid = pd.date_range(start=start, end=end, freq=f)
+        return clean.reindex(grid).ffill(limit=lim)
+    return TransformationRule(
+        output_name=output,
+        dependencies=[source],
+        compute=_compute,
+    )
 #: Registry of factory functions, keyed by YAML function name.
 #: Projects can add custom factories at runtime.
 FACTORIES: dict[
@@ -194,4 +340,8 @@ FACTORIES: dict[
     "rolling_sum": rolling_sum_rule,
     "delta": delta_rule,
     "cumsum": cumsum_rule,
+    "resample": resample_rule,
+    "weighted_average": weighted_average_rule,
+    "index": index_rule,
+    "forward_fill": forward_fill_rule,
 }

tesorotools_python-0.0.30/src/tesorotools/providers/ecb.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""ECB SDMX REST data provider.
+Downloads time series from the ECB public statistical data
+warehouse via its SDMX 2.1 REST API. No authentication
+required.
+Install with the ``ecb`` optional extra::
+    uv pip install "tesorotools-python[ecb]"
+API reference
+-------------
+Endpoint:
+    https://data-api.ecb.europa.eu/service/data/{dataflow}/{key}
+Where ``{dataflow}`` is the dataset identifier (e.g. ``"MIR"``,
+``"BSI"``, ``"FM"``) and ``{key}`` is the dot-separated series
+key (e.g. ``"M.ES.B.L22.A.R.A.2250.EUR.N"``).
+Query parameters used here:
+    ``format``       : ``"csvdata"`` (flat CSV with one row per obs)
+    ``startPeriod``  : optional, ISO date or year-month
+Code convention
+---------------
+This provider accepts codes in the ECB full-key form
+``"{dataflow}.{key}"``, e.g.
+``"MIR.M.ES.B.L22.A.R.A.2250.EUR.N"``.  The first dot
+separates the dataflow from the key.  This matches how R
+packages (``ecb::get_data``) address series and keeps
+catalog entries unambiguous.
+"""
+from __future__ import annotations
+import csv
+import io
+import logging
+from typing import cast
+import pandas as pd
+import requests
+from tesorotools.providers.base import DataProvider
+logger = logging.getLogger(__name__)
+_BASE_URL = "https://data-api.ecb.europa.eu/service/data"
+_PING_URL = "https://data-api.ecb.europa.eu/service/dataflow/ECB"
+DEFAULT_TIMEOUT = 60
+def _split_code(code: str) -> tuple[str, str]:
+    """Split an ECB code into ``(dataflow, key)``.
+    ``"MIR.M.ES.B.L22.A.R.A.2250.EUR.N"`` becomes
+    ``("MIR", "M.ES.B.L22.A.R.A.2250.EUR.N")``.
+    """
+    dataflow, _, key = code.partition(".")
+    if not key:
+        raise ValueError(
+            f"ECB code must include a dataflow and a key: {code!r}"
+        )
+    return dataflow, key
+def _parse_period(period: str) -> pd.Timestamp:
+    """Parse an ECB TIME_PERIOD string into a timestamp.
+    Handles the most common formats:
+    - ``"2025"``        annual   -> Jan 1
+    - ``"2025-Q1"``     quarter  -> Jan 1 (start of quarter)
+    - ``"2025-01"``     monthly  -> Jan 1
+    - ``"2025-01-15"``  daily    -> that day
+    - ``"2025-W01"``    weekly   -> Monday of that ISO week
+    """
+    if len(period) == 4 and period.isdigit():
+        return pd.Timestamp(f"{period}-01-01")
+    if "Q" in period:
+        year_str, q_str = period.split("-Q")
+        month = (int(q_str) - 1) * 3 + 1
+        return pd.Timestamp(f"{year_str}-{month:02d}-01")
+    if "W" in period:
+        year_str, w_str = period.split("-W")
+        return pd.Timestamp.fromisocalendar(int(year_str), int(w_str), 1)
+    if len(period) == 7:  # YYYY-MM
+        return pd.Timestamp(f"{period}-01")
+    return pd.Timestamp(period)
+class EcbProvider(DataProvider):
+    """Provider that downloads series from the ECB SDMX API.
+    One HTTP request per series (ECB allows multiple keys per
+    request using ``+``-separated alternatives within a
+    position, but bundling arbitrary keys is not supported).
+    Parameters
+    ----------
+    timeout
+        Max seconds per HTTP request.
+    session
+        Optional pre-built ``requests.Session``.  Useful for
+        tests or for custom retry policies.
+    """
+    def __init__(
+        self,
+        *,
+        timeout: int = DEFAULT_TIMEOUT,
+        session: requests.Session | None = None,
+    ) -> None:
+        self._timeout = timeout
+        self._session = session or requests.Session()
+    def is_available(self) -> bool:
+        """Check whether the ECB API responds.
+        Hits a lightweight dataflow endpoint.
+        """
+        try:
+            r = self._session.get(_PING_URL, timeout=self._timeout)
+            return r.ok
+        except requests.RequestException:
+            return False
+    def fetch(
+        self,
+        codes: list[str],
+        start: str | None = None,
+        end: str | None = None,
+    ) -> pd.DataFrame:
+        """Download series for ``codes`` and return a DataFrame.
+        Parameters
+        ----------
+        codes
+            List of ECB full-key codes (``"{dataflow}.{key}"``).
+        start
+            Start period (ISO format, e.g. ``"2022-01"``).
+            If ``None`` the full history is returned.
+        end
+            End period.  If ``None`` up to latest.
+        Returns
+        -------
+        pd.DataFrame
+            Wide DataFrame. Index = dates (tz-naive), columns =
+            the requested codes. Missing observations are NaN.
+        """
+        if not codes:
+            return pd.DataFrame()
+        frames: list[pd.Series[float]] = []
+        for code in codes:
+            series = self._fetch_one(code, start=start, end=end)
+            frames.append(series)
+        df = pd.concat(frames, axis=1)
+        df.index.name = "date"
+        df.sort_index(inplace=True)
+        return df
+    def _fetch_one(
+        self,
+        code: str,
+        start: str | None,
+        end: str | None,
+    ) -> "pd.Series[float]":
+        """Download a single series and return it as a Series."""
+        dataflow, key = _split_code(code)
+        url = f"{_BASE_URL}/{dataflow}/{key}"
+        params: dict[str, str] = {"format": "csvdata"}
+        if start is not None:
+            params["startPeriod"] = start
+        if end is not None:
+            params["endPeriod"] = end
+        logger.debug("GET %s params=%s", url, params)
+        r = self._session.get(url, params=params, timeout=self._timeout)
+        r.raise_for_status()
+        values = _parse_csv(r.text)
+        ts = pd.Series(values, name=code, dtype="float64")
+        ts.sort_index(inplace=True)
+        return ts
+def _parse_csv(text: str) -> dict[pd.Timestamp, float]:
+    """Parse an ECB csvdata response.
+    Returns a mapping ``timestamp -> observation``.  Empty or
+    non-numeric values are skipped.
+    The ECB csvdata format has one row per observation with
+    (at least) ``TIME_PERIOD`` and ``OBS_VALUE`` columns.
+    Other metadata columns are ignored.
+    """
+    reader = csv.DictReader(io.StringIO(text))
+    out: dict[pd.Timestamp, float] = {}
+    for row_any in reader:
+        row = cast(dict[str, str], row_any)
+        period = row.get("TIME_PERIOD") or ""
+        raw = row.get("OBS_VALUE") or ""
+        if not period or not raw:
+            continue
+        try:
+            value = float(raw)
+        except ValueError:
+            continue
+        out[_parse_period(period)] = value
+    return out