PyPI - tesorotools-python - Versions diffs - 0.0.29__tar.gz → 0.0.31__tar.gz - Mend

tesorotools-python 0.0.29tar.gz → 0.0.31tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

{tesorotools_python-0.0.29 → tesorotools_python-0.0.31}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tesorotools-python
-Version: 0.0.29
+Version: 0.0.31
 Requires-Python: >=3.13
 Requires-Dist: babel>=2.17
 Requires-Dist: eikon>=1.1
@@ -16,3 +16,5 @@ Requires-Dist: pyyaml>=6.0
 Requires-Dist: sqlalchemy>=2.0
 Provides-Extra: bde
 Requires-Dist: requests>=2.31; extra == 'bde'
+Provides-Extra: ecb
+Requires-Dist: requests>=2.31; extra == 'ecb'

{tesorotools_python-0.0.29 → tesorotools_python-0.0.31}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "tesorotools-python"
 requires-python = ">=3.13"
-version = "0.0.29"
+version = "0.0.31"
 dependencies = [
     # database and ORM
     "psycopg[binary]>=3.1",
@@ -28,6 +28,7 @@ dependencies = [
 [project.optional-dependencies]
 bde = ["requests>=2.31"]
+ecb = ["requests>=2.31"]
 [dependency-groups]
 dev = [

{tesorotools_python-0.0.29 → tesorotools_python-0.0.31}/src/tesorotools/artists/line_plot.py RENAMED Viewed

@@ -102,11 +102,17 @@ def annotate_last_values(
     *,
     decimals: int,
     units: str,
+    labels: dict[str, str] | None = None,
     series_styles: dict[str, dict[str, Any]] | None = None,
     annotate_color: str | None = None,
 ) -> None:
     """Label the last non-NaN value of each column on the right.
+    ``plot_data.columns`` and ``series_styles`` keys must be
+    canonical series IDs.  ``labels`` maps each ID to the
+    Matplotlib line label used when the axes were drawn; if
+    omitted, the ID itself is used as the line label.
     Colour priority (highest first): ``annotate_color``
     (global override), ``series_styles[col]['color']``,
     the Matplotlib line colour.
@@ -120,6 +126,7 @@ def annotate_last_values(
     if fig is None:
         return
     styles = series_styles or {}
+    label_map = labels or {}
     lines_by_label = {str(line.get_label()): line for line in ax.lines}
     entries: list[tuple[Any, float, str, Any]] = []
@@ -136,7 +143,7 @@ def annotate_last_values(
             if override is not None:
                 color = override
             else:
-                line = lines_by_label.get(col)
+                line = lines_by_label.get(label_map.get(col, col))
                 color = line.get_color() if line is not None else "black"
         entries.append((last_date, last_val, col, color))
@@ -454,7 +461,6 @@ class LinePlot:
         plot_data: pd.DataFrame = self.data.loc[
             slice(start_date, end_date), self.series.keys()
         ]
-        plot_data = plot_data.rename(columns=self.series)
         plot_data = plot_data * self.scale
@@ -468,12 +474,10 @@ class LinePlot:
             **fig_kw
         )
         ax = fig.add_subplot()
-        if self.series_styles:
-            for col in plot_data.columns:
-                style = self.series_styles.get(col, {})
-                plot_data[col].plot(ax=ax, label=col, **style)
-        else:
-            plot_data.plot(ax=ax)
+        styles = self.series_styles
+        for col in plot_data.columns:
+            style = styles.get(col, {}) if styles else {}
+            plot_data[col].plot(ax=ax, label=self.series[col], **style)
         assert self.format is not None
         if self.annotate:
@@ -482,6 +486,7 @@ class LinePlot:
                 plot_data,
                 decimals=self.format.decimals,
                 units=self.format.units,
+                labels=self.series,
                 series_styles=self.series_styles,
                 annotate_color=self.annotate_color,
             )
@@ -497,7 +502,7 @@ class LinePlot:
             style_baseline(ax, reference, **AX_CONFIG["baseline"])
         if self.legend is not None:
-            labels = list(plot_data.columns)
+            labels = [self.series[c] for c in plot_data.columns]
             ncol = (
                 self.legend.ncol
                 if self.legend.ncol is not None

{tesorotools_python-0.0.29 → tesorotools_python-0.0.31}/src/tesorotools/artists/stacked.py RENAMED Viewed

@@ -84,7 +84,8 @@ class StackedAreaPlot:
             else self.data.index.max()
         )
-        plot_data = self.data.loc[start:end, list(self.series.keys())].dropna()
+        plot_data = self.data.loc[start:end, list(self.series.keys())]
+        plot_data = plot_data.dropna(how="all").fillna(0)
         plot_data = plot_data * self.scale
         fig_kw = dict(FIG_CONFIG)
@@ -211,7 +212,8 @@ class StackedBarPlot:
             else self.data.index.max()
         )
         all_cols = list(self.series.keys()) + list(self.overlay_series.keys())
-        plot_data = self.data.loc[start:end, all_cols].dropna()
+        plot_data = self.data.loc[start:end, all_cols]
+        plot_data = plot_data.dropna(how="all").fillna(0)
         return plot_data * self.scale
     def _format_xticks(

{tesorotools_python-0.0.29 → tesorotools_python-0.0.31}/src/tesorotools/pipeline/rules.py RENAMED Viewed

@@ -45,6 +45,21 @@ def sum_rule(output: str, sources: list[str]) -> TransformationRule:
     )
+def mean_rule(output: str, sources: list[str]) -> TransformationRule:
+    """Row-wise arithmetic mean of multiple columns.
+    Equivalent to ``=AVERAGE(...)`` in Excel: NaN components
+    are skipped (not treated as zero), so a row with any
+    non-NaN value yields the mean of whatever is present.
+    A row of all NaN yields NaN.
+    """
+    return TransformationRule(
+        output_name=output,
+        dependencies=list(sources),
+        compute=lambda df, cols=list(sources): df[cols].mean(axis=1),
+    )
 def ratio_rule(
     output: str, numerator: str, denominator: str
 ) -> TransformationRule:
@@ -179,6 +194,152 @@ def cumsum_rule(output: str, source: str) -> TransformationRule:
     )
+def resample_rule(
+    output: str,
+    source: str,
+    freq: str,
+    agg: str = "mean",
+) -> TransformationRule:
+    """Resample a series to *freq* with aggregator *agg*.
+    NaN-aware: drops NaN before ``resample`` so interior
+    gaps in the input do not poison the aggregator.
+    """
+    def _compute(
+        df: pd.DataFrame,
+        s: str = source,
+        f: str = freq,
+        a: str = agg,
+    ) -> pd.Series[float]:
+        clean: pd.Series[float] = df[s].dropna()
+        return clean.resample(f).agg(a)  # type: ignore[reportUnknownMemberType]
+    return TransformationRule(
+        output_name=output,
+        dependencies=[source],
+        compute=_compute,
+    )
+def weighted_average_rule(
+    output: str,
+    values: list[str],
+    weights: list[str],
+) -> TransformationRule:
+    """Weighted average: ``sum(v_i * w_i) / sum(w_i)``.
+    NaN components contribute 0 to both numerator and
+    denominator (so they do not propagate NaN across the
+    whole row).  A denominator of 0 yields NaN (avoids
+    ``inf``).
+    """
+    if len(values) != len(weights):
+        raise ValueError(
+            f"weighted_average requires len(values) == len(weights),"
+            f" got {len(values)} and {len(weights)}"
+        )
+    deps = list(values) + list(weights)
+    def _compute(
+        df: pd.DataFrame,
+        vs: list[str] = list(values),
+        ws: list[str] = list(weights),
+    ) -> pd.Series[float]:
+        zero = pd.Series(0.0, index=df.index)
+        num: pd.Series[float] = sum(
+            (df[v].fillna(0) * df[w].fillna(0) for v, w in zip(vs, ws)),
+            start=zero,
+        )
+        den: pd.Series[float] = sum(
+            (df[w].fillna(0) for w in ws),
+            start=zero,
+        )
+        return num / den.replace(0, float("nan"))
+    return TransformationRule(
+        output_name=output,
+        dependencies=deps,
+        compute=_compute,
+    )
+def index_rule(
+    output: str,
+    source: str,
+    reference_date: str | pd.Timestamp,
+    base: float = 100.0,
+) -> TransformationRule:
+    """Rebase to *base* at *reference_date*.
+    Returns an empty Series if the reference date is not
+    present in the source's index after dropping NaN, or
+    if the reference value is zero.  Keeps the pipeline
+    from blowing up on partial data.
+    """
+    ref = pd.Timestamp(reference_date)
+    def _compute(
+        df: pd.DataFrame,
+        s: str = source,
+        r: pd.Timestamp = ref,
+        b: float = base,
+    ) -> pd.Series[float]:
+        clean: pd.Series[float] = df[s].dropna()
+        if r not in clean.index:
+            return pd.Series(dtype="float64")
+        ref_val = float(clean.loc[r])
+        if ref_val == 0:
+            return pd.Series(dtype="float64")
+        return clean / ref_val * b
+    return TransformationRule(
+        output_name=output,
+        dependencies=[source],
+        compute=_compute,
+    )
+def forward_fill_rule(
+    output: str,
+    source: str,
+    freq: str = "MS",
+    limit: int | None = None,
+    extend: int = 0,
+) -> TransformationRule:
+    """Reindex to a regular *freq* grid and forward-fill.
+    Decouples ``limit`` from the host DataFrame's own
+    frequency — useful when a quarterly series has to be
+    propagated to the months of each quarter inside a
+    daily/monthly DataFrame.  ``extend`` adds that many
+    extra *freq* steps after the last real observation.
+    """
+    def _compute(
+        df: pd.DataFrame,
+        s: str = source,
+        f: str = freq,
+        lim: int | None = limit,
+        ext: int = extend,
+    ) -> pd.Series[float]:
+        clean: pd.Series[float] = df[s].dropna()
+        if clean.empty:
+            return pd.Series(dtype="float64")
+        start = clean.index.min()
+        end = clean.index.max()
+        if ext > 0:
+            end = end + pd.tseries.frequencies.to_offset(f) * ext  # type: ignore[reportOperatorIssue]
+        grid = pd.date_range(start=start, end=end, freq=f)
+        return clean.reindex(grid).ffill(limit=lim)
+    return TransformationRule(
+        output_name=output,
+        dependencies=[source],
+        compute=_compute,
+    )
 #: Registry of factory functions, keyed by YAML function name.
 #: Projects can add custom factories at runtime.
 FACTORIES: dict[
@@ -187,6 +348,7 @@ FACTORIES: dict[
 ] = {
     "scale": scale_rule,
     "sum": sum_rule,
+    "mean": mean_rule,
     "ratio": ratio_rule,
     "difference": difference_rule,
     "inverse": inverse_rule,
@@ -194,4 +356,8 @@ FACTORIES: dict[
     "rolling_sum": rolling_sum_rule,
     "delta": delta_rule,
     "cumsum": cumsum_rule,
+    "resample": resample_rule,
+    "weighted_average": weighted_average_rule,
+    "index": index_rule,
+    "forward_fill": forward_fill_rule,
 }

tesorotools_python-0.0.31/src/tesorotools/providers/ecb.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""ECB SDMX REST data provider.
+Downloads time series from the ECB public statistical data
+warehouse via its SDMX 2.1 REST API. No authentication
+required.
+Install with the ``ecb`` optional extra::
+    uv pip install "tesorotools-python[ecb]"
+API reference
+-------------
+Endpoint:
+    https://data-api.ecb.europa.eu/service/data/{dataflow}/{key}
+Where ``{dataflow}`` is the dataset identifier (e.g. ``"MIR"``,
+``"BSI"``, ``"FM"``) and ``{key}`` is the dot-separated series
+key (e.g. ``"M.ES.B.L22.A.R.A.2250.EUR.N"``).
+Query parameters used here:
+    ``format``       : ``"csvdata"`` (flat CSV with one row per obs)
+    ``startPeriod``  : optional, ISO date or year-month
+Code convention
+---------------
+This provider accepts codes in the ECB full-key form
+``"{dataflow}.{key}"``, e.g.
+``"MIR.M.ES.B.L22.A.R.A.2250.EUR.N"``.  The first dot
+separates the dataflow from the key.  This matches how R
+packages (``ecb::get_data``) address series and keeps
+catalog entries unambiguous.
+"""
+from __future__ import annotations
+import csv
+import io
+import logging
+from typing import cast
+import pandas as pd
+import requests
+from tesorotools.providers.base import DataProvider
+logger = logging.getLogger(__name__)
+_BASE_URL = "https://data-api.ecb.europa.eu/service/data"
+_PING_URL = "https://data-api.ecb.europa.eu/service/dataflow/ECB"
+DEFAULT_TIMEOUT = 60
+def _split_code(code: str) -> tuple[str, str]:
+    """Split an ECB code into ``(dataflow, key)``.
+    ``"MIR.M.ES.B.L22.A.R.A.2250.EUR.N"`` becomes
+    ``("MIR", "M.ES.B.L22.A.R.A.2250.EUR.N")``.
+    """
+    dataflow, _, key = code.partition(".")
+    if not key:
+        raise ValueError(
+            f"ECB code must include a dataflow and a key: {code!r}"
+        )
+    return dataflow, key
+def _parse_period(period: str) -> pd.Timestamp:
+    """Parse an ECB TIME_PERIOD string into a timestamp.
+    Handles the most common formats:
+    - ``"2025"``        annual   -> Jan 1
+    - ``"2025-Q1"``     quarter  -> Jan 1 (start of quarter)
+    - ``"2025-01"``     monthly  -> Jan 1
+    - ``"2025-01-15"``  daily    -> that day
+    - ``"2025-W01"``    weekly   -> Monday of that ISO week
+    """
+    if len(period) == 4 and period.isdigit():
+        return pd.Timestamp(f"{period}-01-01")
+    if "Q" in period:
+        year_str, q_str = period.split("-Q")
+        month = (int(q_str) - 1) * 3 + 1
+        return pd.Timestamp(f"{year_str}-{month:02d}-01")
+    if "W" in period:
+        year_str, w_str = period.split("-W")
+        return pd.Timestamp.fromisocalendar(int(year_str), int(w_str), 1)
+    if len(period) == 7:  # YYYY-MM
+        return pd.Timestamp(f"{period}-01")
+    return pd.Timestamp(period)
+class EcbProvider(DataProvider):
+    """Provider that downloads series from the ECB SDMX API.
+    One HTTP request per series (ECB allows multiple keys per
+    request using ``+``-separated alternatives within a
+    position, but bundling arbitrary keys is not supported).
+    Parameters
+    ----------
+    timeout
+        Max seconds per HTTP request.
+    session
+        Optional pre-built ``requests.Session``.  Useful for
+        tests or for custom retry policies.
+    """
+    def __init__(
+        self,
+        *,
+        timeout: int = DEFAULT_TIMEOUT,
+        session: requests.Session | None = None,
+    ) -> None:
+        self._timeout = timeout
+        self._session = session or requests.Session()
+    def is_available(self) -> bool:
+        """Check whether the ECB API responds.
+        Hits a lightweight dataflow endpoint.
+        """
+        try:
+            r = self._session.get(_PING_URL, timeout=self._timeout)
+            return r.ok
+        except requests.RequestException:
+            return False
+    def fetch(
+        self,
+        codes: list[str],
+        start: str | None = None,
+        end: str | None = None,
+    ) -> pd.DataFrame:
+        """Download series for ``codes`` and return a DataFrame.
+        Parameters
+        ----------
+        codes
+            List of ECB full-key codes (``"{dataflow}.{key}"``).
+        start
+            Start period (ISO format, e.g. ``"2022-01"``).
+            If ``None`` the full history is returned.
+        end
+            End period.  If ``None`` up to latest.
+        Returns
+        -------
+        pd.DataFrame
+            Wide DataFrame. Index = dates (tz-naive), columns =
+            the requested codes. Missing observations are NaN.
+        """
+        if not codes:
+            return pd.DataFrame()
+        frames: list[pd.Series[float]] = []
+        for code in codes:
+            series = self._fetch_one(code, start=start, end=end)
+            frames.append(series)
+        df = pd.concat(frames, axis=1)
+        df.index.name = "date"
+        df.sort_index(inplace=True)
+        return df
+    def _fetch_one(
+        self,
+        code: str,
+        start: str | None,
+        end: str | None,
+    ) -> "pd.Series[float]":
+        """Download a single series and return it as a Series."""
+        dataflow, key = _split_code(code)
+        url = f"{_BASE_URL}/{dataflow}/{key}"
+        params: dict[str, str] = {"format": "csvdata"}
+        if start is not None:
+            params["startPeriod"] = start
+        if end is not None:
+            params["endPeriod"] = end
+        logger.debug("GET %s params=%s", url, params)
+        r = self._session.get(url, params=params, timeout=self._timeout)
+        r.raise_for_status()
+        values = _parse_csv(r.text)
+        ts = pd.Series(values, name=code, dtype="float64")
+        ts.sort_index(inplace=True)
+        return ts
+def _parse_csv(text: str) -> dict[pd.Timestamp, float]:
+    """Parse an ECB csvdata response.
+    Returns a mapping ``timestamp -> observation``.  Empty or
+    non-numeric values are skipped.
+    The ECB csvdata format has one row per observation with
+    (at least) ``TIME_PERIOD`` and ``OBS_VALUE`` columns.
+    Other metadata columns are ignored.
+    """
+    reader = csv.DictReader(io.StringIO(text))
+    out: dict[pd.Timestamp, float] = {}
+    for row_any in reader:
+        row = cast(dict[str, str], row_any)
+        period = row.get("TIME_PERIOD") or ""
+        raw = row.get("OBS_VALUE") or ""
+        if not period or not raw:
+            continue
+        try:
+            value = float(raw)
+        except ValueError:
+            continue
+        out[_parse_period(period)] = value
+    return out