PyPI - tesorotools-python - Versions diffs - 0.0.41__tar.gz → 0.0.42__tar.gz - Mend

tesorotools-python 0.0.41tar.gz → 0.0.42tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

{tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tesorotools-python
-Version: 0.0.41
+Version: 0.0.42
 Requires-Python: >=3.13
 Requires-Dist: babel>=2.17
 Requires-Dist: matplotlib>=3.10
@@ -16,3 +16,5 @@ Provides-Extra: bde
 Requires-Dist: requests>=2.31; extra == 'bde'
 Provides-Extra: ecb
 Requires-Dist: requests>=2.31; extra == 'ecb'
+Provides-Extra: lseg
+Requires-Dist: lseg-data>=2.1; extra == 'lseg'

{tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "tesorotools-python"
 requires-python = ">=3.13"
-version = "0.0.41"
+version = "0.0.42"
 dependencies = [
     # database and ORM
     "psycopg[binary]>=3.1",
@@ -27,6 +27,7 @@ dependencies = [
 [project.optional-dependencies]
 bde = ["requests>=2.31"]
 ecb = ["requests>=2.31"]
+lseg = ["lseg-data>=2.1"]
 [dependency-groups]
 dev = [

{tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/__init__.py RENAMED Viewed

@@ -7,9 +7,9 @@ effects) and registers their YAML tags via
 Provider subclasses gated by optional extras
 (``BdeProvider`` requires ``[bde]``, ``EcbProvider``
-requires ``[ecb]``) are exposed lazily through
-``__getattr__``; importing this module does not require the
-extras to be installed.
+requires ``[ecb]``, ``LSEGProvider`` requires ``[lseg]``)
+are exposed lazily through ``__getattr__``; importing this
+module does not require the extras to be installed.
 Third parties extend the package via ``register_artist``,
 ``register_tag``, and ``register_provider`` (and their
@@ -22,6 +22,7 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from tesorotools.providers.bde import BdeProvider
     from tesorotools.providers.ecb import EcbProvider
+    from tesorotools.providers.lseg import LSEGProvider
 from tesorotools._build_context import BuildContext
 from tesorotools._registry import (
@@ -106,6 +107,7 @@ __all__ = [
     "Images",
     "Legend",
     "LinePlot",
+    "LSEGProvider",
     "RegistryProtocol",
     "Report",
     "Section",
@@ -145,4 +147,8 @@ def __getattr__(name: str) -> Any:
         from tesorotools.providers.ecb import EcbProvider
         return EcbProvider
+    if name == "LSEGProvider":
+        from tesorotools.providers.lseg import LSEGProvider
+        return LSEGProvider
     raise AttributeError(f"module 'tesorotools' has no attribute {name!r}")

{tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/providers/__init__.py RENAMED Viewed

@@ -1,10 +1,9 @@
 """Public provider API.
-``BdeProvider`` and ``EcbProvider`` depend on the optional
-``[bde]`` / ``[ecb]`` extras (which install ``requests``)
-and are imported lazily through ``__getattr__``; importing
-``tesorotools.providers`` itself does not require those
-extras.
+``BdeProvider``, ``EcbProvider`` and ``LSEGProvider`` depend on
+the optional ``[bde]`` / ``[ecb]`` / ``[lseg]`` extras and are
+imported lazily through ``__getattr__``; importing
+``tesorotools.providers`` itself does not require those extras.
 """
 from typing import TYPE_CHECKING, Any
@@ -18,11 +17,13 @@ from tesorotools.providers.base import (
 if TYPE_CHECKING:
     from tesorotools.providers.bde import BdeProvider
     from tesorotools.providers.ecb import EcbProvider
+    from tesorotools.providers.lseg import LSEGProvider
 __all__ = [
     "BdeProvider",
     "DataProvider",
     "EcbProvider",
+    "LSEGProvider",
     "RegistryProtocol",
     "bootstrap_providers",
 ]
@@ -37,6 +38,10 @@ def __getattr__(name: str) -> Any:
         from tesorotools.providers.ecb import EcbProvider
         return EcbProvider
+    if name == "LSEGProvider":
+        from tesorotools.providers.lseg import LSEGProvider
+        return LSEGProvider
     raise AttributeError(
         f"module 'tesorotools.providers' has no attribute {name!r}"
     )

tesorotools_python-0.0.42/src/tesorotools/providers/lseg.py ADDED Viewed

@@ -0,0 +1,792 @@
+"""LSEG Data Library provider.
+Wraps ``lseg-data`` (formerly Refinitiv Data Library) behind the
+project's :class:`DataProvider` interface.
+Install with the ``lseg`` optional extra::
+    uv pip install "tesorotools-python[lseg]"
+Registry metadata
+-----------------
+Each instrument MUST declare an ``lseg`` block in its registry entry
+with a ``history_field`` value (no default). The provider issues a
+single per-RIC ``get_history`` call requesting only that one field.
+Live mode additionally honours an optional ``snapshot_field`` (typically
+``CF_LAST`` for prices, ``CF_YIELD`` for yields).
+Modes
+-----
+:meth:`LSEGProvider.build_for` returns two instances:
+* ``lseg_close``: history-only, multi-day range supported. The standard
+  daily-close path.
+* ``lseg_live``: snapshot only, requires ``start == end``. Uses
+  ``ld.get_data`` against the declared ``snapshot_field`` and falls
+  back to history-grouped for RICs without one.
+Both share the same fetch surface so they plug straight into
+:func:`tesorotools.bootstrap_providers`.
+Availability
+------------
+Workspace ships only on Windows. :meth:`is_available` inspects
+``tasklist`` for a running Workspace/Refinitiv process and returns
+``False`` everywhere else; :meth:`build_for` then raises so a missing
+session never silently downgrades to an empty result.
+"""
+from __future__ import annotations
+import logging
+import subprocess
+import tempfile
+import time
+import warnings
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, cast
+import lseg.data as ld
+import pandas as pd
+from tesorotools.providers.base import DataProvider
+if TYPE_CHECKING:
+    from tesorotools._build_context import BuildContext
+# The lseg-data SDK ships without type stubs, so its top-level symbols
+# surface as partially-unknown. Cast once to a typed alias and silence
+# the unknown-member access at the source attribute lookup so call sites
+# stay clean. The ``| None`` reflects the SDK's observed behaviour
+# (occasional ``None`` returns on transient failures); defensive
+# ``is None`` checks at call sites match it.
+_ld_get_history = cast(
+    Callable[..., "pd.DataFrame | None"],
+    ld.get_history,  # pyright: ignore[reportUnknownMemberType]
+)
+_ld_get_data = cast(
+    Callable[..., "pd.DataFrame | None"],
+    ld.get_data,  # pyright: ignore[reportUnknownMemberType]
+)
+# lseg-data 2.1.1 still uses pandas .fillna with implicit downcasting,
+# which pandas 2.2+ marks as a FutureWarning. Cosmetic; remove this
+# filter when the SDK is patched.
+warnings.filterwarnings(
+    "ignore",
+    message="Downcasting object dtype arrays",
+    category=FutureWarning,
+    module=r"lseg\.data\..*",
+)
+logger = logging.getLogger(__name__)
+_STEP_DATAPOINTS: int = 2_500
+_DEFAULT_COOLDOWN: int = 10
+_MAX_RETRIES: int = 150
+# Retries per individual RIC in the daily snapshot loop. Smaller than
+# :data:`_MAX_RETRIES` because a single RIC failing should not stall the
+# whole run for tens of minutes — failures are isolated and the missing
+# column is left NaN for the caller to handle.
+_PER_RIC_MAX_RETRIES: int = 5
+_CSV_DATE_TITLE: str = "Date"
+# Max RICs per ``ld.get_data`` call in the snapshot overlay. Larger
+# universes occasionally return ``<NA>`` for instruments that resolve
+# cleanly when queried alone (observed: MSCI_WORLD/.MIWO00000PUS and
+# MSCI_LATAM dropping out intermittently). Chunking is a defence in
+# depth; a per-RIC rescue pass catches anything that still slips
+# through.
+_GET_DATA_CHUNK: int = 50
+# Process names that indicate a usable Workspace session is running
+# locally. Includes both the legacy Refinitiv name and the rebranded
+# LSEG names so the check survives the rebrand.
+_WORKSPACE_PROCESSES: tuple[str, ...] = (
+    "lsegworkspace.exe",
+    "refinitivworkspace.exe",
+    "workspace.exe",
+)
+class LSEGProvider(DataProvider):
+    """LSEG Data Library data provider."""
+    PROVIDER_NAME: ClassVar[str] = "lseg"
+    @classmethod
+    def build_for(cls, ctx: "BuildContext") -> dict[str, "DataProvider"]:
+        """Build the live + close LSEG instances.
+        Returns a dict with two keys, ``"lseg_close"`` and
+        ``"lseg_live"``, each backed by an :class:`LSEGProvider` with
+        the matching ``mode``. If the registry asks for no LSEG codes
+        in this consumer the dict is empty.
+        Raises
+        ------
+        RuntimeError
+            If Workspace is not running locally. Consumers that want a
+            mock fallback must wrap this method themselves; this class
+            does not synthesise data.
+        """
+        cids = ctx.registry.all_cids_for_provider(
+            ctx.consumer, cls.PROVIDER_NAME
+        )
+        if not cids:
+            return {}
+        history_fields, snapshot_fields = cls._field_maps(ctx.registry, cids)
+        probe = cls(history_fields=history_fields, mode="close")
+        if ctx.mock or not probe.is_available():
+            raise RuntimeError("LSEG Workspace is not running")
+        return {
+            "lseg_close": cls(
+                history_fields=history_fields,
+                snapshot_fields=snapshot_fields,
+                mode="close",
+            ),
+            "lseg_live": cls(
+                history_fields=history_fields,
+                snapshot_fields=snapshot_fields,
+                mode="live",
+            ),
+        }
+    @staticmethod
+    def _field_maps(
+        registry: Any, cids: list[str]
+    ) -> tuple[dict[str, str], dict[str, str]]:
+        """Build ``(history_fields, snapshot_fields)`` from registry."""
+        history_fields: dict[str, str] = {}
+        snapshot_fields: dict[str, str] = {}
+        for cid in cids:
+            meta = registry.get_provider_meta(cid, "lseg")
+            ric = meta["code"]
+            field = meta.get("history_field")
+            if not field:
+                raise ValueError(
+                    f"{cid}: 'history_field' is required in the lseg "
+                    "block (no default)."
+                )
+            history_fields[ric] = field
+            snap = meta.get("snapshot_field")
+            if snap:
+                snapshot_fields[ric] = snap
+        return history_fields, snapshot_fields
+    def __init__(
+        self,
+        history_fields: dict[str, str],
+        snapshot_fields: dict[str, str] | None = None,
+        skip_session: bool = False,
+        mode: Literal["live", "close"] = "close",
+        cache_dir: Path | None = None,
+    ) -> None:
+        """Initialise an LSEG provider.
+        Parameters
+        ----------
+        history_fields
+            ``{ric: field}``. The field used by the close (history)
+            path for each RIC, e.g. ``"YLDTOMAT"``, ``"SETTLE"``,
+            ``"TRDPRC_1"``. Required for every RIC that ``fetch`` may
+            be asked about — RICs not in this map are silently skipped.
+        snapshot_fields
+            ``{ric: field}``. Optional. The field used by the live
+            (snapshot) path for RICs whose live tick differs from the
+            consolidated history (notably futures, where
+            ``history_field=SETTLE`` is yesterday's settle and the
+            live tick lives in ``CF_LAST``). RICs not present here
+            fall back to ``history_fields`` in live mode too.
+        skip_session
+            Skip opening an LSEG session on first ``fetch``. Mostly
+            for tests that monkey-patch the SDK and never want a real
+            connection.
+        mode
+            ``"live"`` (snapshot, ``start==end``) or ``"close"``
+            (history, multi-day range supported).
+        cache_dir
+            Optional persistent cache for the multi-day close path.
+            When ``None``, ``fetch`` uses a fresh tempdir per call
+            (no resume). When set, CSV partials are kept across runs
+            so an interrupted download resumes from disk.
+        """
+        self._session_opened = skip_session
+        self._mode: Literal["live", "close"] = mode
+        self._cache_dir: Path | None = cache_dir
+        self._history_field_per_ric: dict[str, str] = dict(history_fields)
+        self._snapshot_field_per_ric: dict[str, str] = dict(
+            snapshot_fields or {}
+        )
+    # ------------------------------------------------------------------
+    # Session lifecycle
+    # ------------------------------------------------------------------
+    def _open_session(self) -> None:
+        if not self._session_opened:
+            ld.open_session()
+            self._session_opened = True
+    def is_available(self) -> bool:
+        try:
+            result = subprocess.run(
+                ["tasklist"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            stdout_lower = result.stdout.lower()
+            return any(name in stdout_lower for name in _WORKSPACE_PROCESSES)
+        except Exception:
+            return False
+    # ------------------------------------------------------------------
+    # Tesorotools-style fetch
+    # ------------------------------------------------------------------
+    def fetch(
+        self,
+        codes: list[str],
+        start: str | None = None,
+        end: str | None = None,
+    ) -> pd.DataFrame:
+        """Daily data for ``codes`` between ``start`` and ``end``.
+        Returns columns labelled with raw RICs (no canonical-ID rename),
+        per the tesorotools schema.
+        Dispatch by ``self._mode``:
+        - ``mode="close"``: history-only, multi-day range supported.
+          Reuses the per-RIC batched download with caching.
+        - ``mode="live"``: snapshot only, requires ``start == end``.
+          Snapshot grouped + history fallback for RICs without
+          ``snapshot_field``.
+        """
+        if not codes:
+            return pd.DataFrame()
+        self._open_session()
+        known = [c for c in codes if c in self._history_field_per_ric]
+        if not known:
+            return pd.DataFrame(columns=pd.Index(codes))
+        if self._mode == "live":
+            if start is None or end is None or start != end:
+                raise ValueError(
+                    "LSEGProvider.fetch with mode='live' requires "
+                    f"start == end (got start={start!r}, end={end!r})"
+                )
+            target = pd.Timestamp(start)
+            df = self._download_live_for_rics(target, known)
+            df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
+            df.index.name = "date"
+            return df
+        # mode == "close"
+        if start is None:
+            start = "1900-01-01"
+        if end is None:
+            end = pd.Timestamp.now().strftime("%Y-%m-%d")
+        # Single-day close snapshot routes via _download_history_grouped,
+        # which uses ``count=1, end=target`` semantics. The
+        # interday-summaries endpoint returns 0 rows for ``start==end``
+        # queries on bid/mid/settle/yield-type fields, so the multi-day
+        # batched path below would otherwise return empty. ``count=1``
+        # returns the latest row at-or-before ``target`` and the method
+        # filters to ``target.normalize()`` (NaN if target is a market
+        # holiday).
+        if start == end:
+            target = pd.Timestamp(start)
+            df = self._download_history_grouped(target, known)
+            df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
+            df.index.name = "date"
+            return df
+        # Multi-day close range — per-RIC batched download with caching.
+        dates_to_download = list(
+            pd.date_range(start=start, end=end, freq="B").astype("str")
+        )
+        if not dates_to_download:
+            return pd.DataFrame(columns=pd.Index(known))
+        if self._cache_dir is not None:
+            csv_path = self._cache_dir / "csv"
+            csv_path.mkdir(parents=True, exist_ok=True)
+            data = self._fetch_range(known, dates_to_download, csv_path)
+        else:
+            with tempfile.TemporaryDirectory(prefix="lseg_fetch_") as tmp:
+                csv_path = Path(tmp) / "csv"
+                csv_path.mkdir(parents=True, exist_ok=True)
+                data = self._fetch_range(known, dates_to_download, csv_path)
+        data.index = pd.DatetimeIndex(data.index).tz_localize(None).normalize()
+        data.index.name = "date"
+        return data
+    def _fetch_range(
+        self,
+        rics: list[str],
+        dates_to_download: list[str],
+        csv_path: Path,
+    ) -> pd.DataFrame:
+        """Per-RIC ``_download_batched`` loop + ``_unify_batches`` final."""
+        for idx, ric in enumerate(rics):
+            force_wait = idx < len(rics) - 1
+            self._download_batched(
+                dates_to_download,
+                _STEP_DATAPOINTS,
+                csv_path,
+                ric,
+                force_wait,
+                _DEFAULT_COOLDOWN,
+            )
+        return self._unify_batches(csv_path)
+    def _download_live_for_rics(
+        self, target: pd.Timestamp, rics: list[str]
+    ) -> pd.DataFrame:
+        """Live snapshot for an arbitrary RIC subset.
+        Snapshot grouped (for RICs with ``snapshot_field``) + history
+        grouped fallback (for RICs without one).
+        """
+        snap_rics = [r for r in rics if r in self._snapshot_field_per_ric]
+        hist_rics = [r for r in rics if r not in self._snapshot_field_per_ric]
+        parts: list[pd.DataFrame] = []
+        if snap_rics:
+            parts.append(self._download_snapshot_grouped(snap_rics, target))
+        if hist_rics:
+            parts.append(self._download_history_grouped(target, hist_rics))
+        if not parts:
+            return pd.DataFrame(index=[target])
+        combined = pd.concat(parts, axis=1)
+        ordered = [r for r in rics if r in combined.columns]
+        return combined[ordered]
+    # ------------------------------------------------------------------
+    # Internal: bulk history (close path)
+    # ------------------------------------------------------------------
+    def _download_history_grouped(
+        self, target: pd.Timestamp, rics: list[str]
+    ) -> pd.DataFrame:
+        """Bulk ``ld.get_history`` per ``history_field`` group with per-RIC rescue.
+        Two-stage strategy:
+        1. **Bulk per group**: groups ``rics`` by ``history_field`` and
+           issues one multi-RIC ``ld.get_history`` call per group. The
+           SDK fans out HTTP requests internally so wallclock drops
+           ~2.6× vs sequential per-RIC.
+        2. **Per-RIC rescue**: any RIC that came back NaN — either
+           because its bulk call raised, or because it was silently
+           dropped from the response (a known intermittent issue for a
+           handful of RICs at any universe size, e.g. MSCI_WORLD,
+           MSCI_LATAM) — gets retried via :meth:`_download_per_ric`
+           with isolated retries.
+        ``count=1, end=target`` semantics (vs ``start==end``): lseg-data
+        2.1.1's interday-summaries endpoint returns 0 rows for
+        ``start==end`` queries on bid/mid/settle/yield-type fields. With
+        ``count=1`` the API returns the latest row at-or-before
+        ``target``; we keep only the row matching ``target.normalize()``
+        and drop the rest to NaN (e.g. when target is a holiday for a
+        given market).
+        """
+        end = target.strftime("%Y-%m-%d")
+        target_ts = target
+        by_field: dict[str, list[str]] = {}
+        for ric in rics:
+            by_field.setdefault(self._history_field_per_ric[ric], []).append(
+                ric
+            )
+        values: dict[str, float] = {ric: float("nan") for ric in rics}
+        bulk_failed: list[str] = []
+        for field, group in by_field.items():
+            try:
+                df = _ld_get_history(
+                    universe=group,
+                    fields=[field],
+                    end=end,
+                    count=1,
+                    interval="daily",
+                )
+            except Exception as exc:
+                logger.warning(
+                    "LSEG bulk %s (%d RICs) failed: %s — falling back per-RIC",
+                    field,
+                    len(group),
+                    exc,
+                )
+                bulk_failed.extend(group)
+                continue
+            if df is None or df.empty:
+                continue
+            new_index = pd.DatetimeIndex(pd.to_datetime(df.index))
+            df.index = new_index
+            mask = new_index.normalize() == target_ts.normalize()
+            if not mask.any():
+                continue
+            row = df.loc[mask].iloc[0]
+            # Multi-RIC + single-field response: columns are flat RIC
+            # labels in the typical case, but lseg-data sometimes
+            # returns a (RIC, field) MultiIndex. Handle both.
+            if isinstance(df.columns, pd.MultiIndex):
+                for ric in group:
+                    key = (ric, field)
+                    if key in df.columns and pd.notna(row.get(key)):
+                        values[ric] = float(row[key])
+            else:
+                for ric in group:
+                    if ric in df.columns and pd.notna(row.get(ric)):
+                        values[ric] = float(row[ric])
+        still_nan = [r for r in rics if pd.isna(values[r])]
+        if still_nan:
+            logger.info(
+                "LSEG history rescue: per-RIC for %d/%d RIC(s)",
+                len(still_nan),
+                len(rics),
+            )
+            rescue_df = self._download_per_ric(target_ts, still_nan)
+            for ric in still_nan:
+                if ric in rescue_df.columns:
+                    v = rescue_df[ric].iloc[0]
+                    if pd.notna(v):
+                        values[ric] = float(v)
+        return pd.DataFrame(
+            {ric: [values[ric]] for ric in rics}, index=[target_ts]
+        )
+    def _download_per_ric(
+        self, target: pd.Timestamp, rics: list[str]
+    ) -> pd.DataFrame:
+        """Per-RIC sequential rescue.
+        Used as fallback by :meth:`_download_history_grouped` for RICs
+        that the bulk call returned NaN for (or whose group raised).
+        Each RIC gets up to :data:`_PER_RIC_MAX_RETRIES` attempts with
+        a :data:`_DEFAULT_COOLDOWN` wait between retries. A failing RIC
+        does NOT cause the rest of the universe to be re-fetched — only
+        its own column is left NaN for the caller to handle.
+        ``UserRequestError.90006 — universe does not support the
+        following fields`` is treated as deterministic: the RIC's
+        ``history_field`` declaration is wrong; retries cannot recover.
+        Logged and the column stays NaN.
+        """
+        end = target.strftime("%Y-%m-%d")
+        values: dict[str, float] = {}
+        for ric in rics:
+            field = self._history_field_per_ric[ric]
+            val = float("nan")
+            last_exc: BaseException | None = None
+            for attempt in range(1, _PER_RIC_MAX_RETRIES + 1):
+                try:
+                    df = _ld_get_history(
+                        universe=[ric],
+                        end=end,
+                        count=1,
+                        interval="daily",
+                        fields=[field],
+                    )
+                except Exception as exc:
+                    if "does not support the following fields" in str(exc):
+                        logger.warning(
+                            "LSEG per-RIC %s: declared history_field=%s "
+                            "not supported (no retries); fix registry",
+                            ric,
+                            field,
+                        )
+                        break
+                    last_exc = exc
+                    logger.warning(
+                        "LSEG per-RIC %s (%s) attempt %d/%d failed: %s",
+                        ric,
+                        field,
+                        attempt,
+                        _PER_RIC_MAX_RETRIES,
+                        exc,
+                    )
+                    if attempt < _PER_RIC_MAX_RETRIES:
+                        time.sleep(_DEFAULT_COOLDOWN)
+                    continue
+                if df is None or df.empty or field not in df.columns:
+                    break
+                new_index = pd.DatetimeIndex(pd.to_datetime(df.index))
+                df.index = new_index
+                mask = new_index.normalize() == target.normalize()
+                if mask.any():
+                    matching = df.loc[mask, field]
+                    v = matching.iloc[0]
+                    val = float(v) if pd.notna(v) else float("nan")
+                break
+            else:
+                logger.warning(
+                    "LSEG per-RIC %s exhausted %d retries (last error: %s)",
+                    ric,
+                    _PER_RIC_MAX_RETRIES,
+                    last_exc,
+                )
+            values[ric] = val
+        return pd.DataFrame(
+            {ric: [values[ric]] for ric in rics}, index=[target]
+        )
+    # ------------------------------------------------------------------
+    # Live snapshot (mode="live" only)
+    # ------------------------------------------------------------------
+    def _download_snapshot_grouped(
+        self, rics: list[str], target: pd.Timestamp
+    ) -> pd.DataFrame:
+        """One ``ld.get_data`` call per ``snapshot_field`` group, chunked.
+        Groups ``rics`` by their declared ``snapshot_field`` (typically
+        ``CF_LAST`` for prices, ``CF_YIELD`` for yields) and issues one
+        batched call per group, in chunks of :data:`_GET_DATA_CHUNK`.
+        Two-stage robustness per group:
+        1. **Bulk** — chunks of 50 RICs.
+        2. **Rescue per-RIC** for any RIC still NaN after the bulk.
+        Returns a one-row DataFrame indexed at ``target`` with one
+        column per RIC in ``rics``. RICs that fail every retry stay
+        NaN — the caller can pick them up downstream.
+        """
+        values: dict[str, float] = {ric: float("nan") for ric in rics}
+        by_field: dict[str, list[str]] = {}
+        for ric in rics:
+            field = self._snapshot_field_per_ric[ric]
+            by_field.setdefault(field, []).append(ric)
+        for field, group in by_field.items():
+            for i in range(0, len(group), _GET_DATA_CHUNK):
+                chunk = group[i : i + _GET_DATA_CHUNK]
+                try:
+                    snap = _ld_get_data(universe=chunk, fields=[field])
+                except Exception as exc:
+                    logger.warning(
+                        "LSEG %s call failed for chunk %d-%d: %s",
+                        field,
+                        i,
+                        i + len(chunk) - 1,
+                        exc,
+                    )
+                    continue
+                if snap is None or snap.empty:
+                    continue
+                for _, row in snap.iterrows():
+                    ric = row.get("Instrument")
+                    v = row.get(field)
+                    if ric in values and pd.notna(v):
+                        values[ric] = float(v)
+            still_nan = [r for r in group if pd.isna(values[r])]
+            if still_nan:
+                logger.info(
+                    "LSEG %s rescue: retrying %d RIC(s) individually",
+                    field,
+                    len(still_nan),
+                )
+                for ric in still_nan:
+                    try:
+                        snap = _ld_get_data(universe=[ric], fields=[field])
+                    except Exception as exc:
+                        logger.warning(
+                            "LSEG %s rescue failed for %s: %s",
+                            field,
+                            ric,
+                            exc,
+                        )
+                        continue
+                    if snap is None or snap.empty:
+                        continue
+                    v = snap.iloc[0].get(field)
+                    if pd.notna(v):
+                        values[ric] = float(v)
+        return pd.DataFrame(
+            {ric: [values[ric]] for ric in rics}, index=[target]
+        )
+    @staticmethod
+    def _normalize_history(
+        df: pd.DataFrame,
+        rics: list[str],
+        field_per_ric: dict[str, str],
+    ) -> pd.DataFrame:
+        """Reduce ``ld.get_history`` output to one column per RIC.
+        Handles three response shapes:
+        - **MultiIndex columns** ``(RIC, field)`` — multi-RIC bulk call.
+          Pick ``field_per_ric[ric]`` for each RIC.
+        - **Flat columns labelled by field** — single-RIC call. The
+          declared field becomes the only data column, renamed to the
+          RIC.
+        - **Flat columns labelled by RIC** — single-field call against
+          multiple RICs (degenerate). Returned as-is.
+        """
+        if df.empty:
+            return pd.DataFrame(columns=pd.Index(rics))
+        if isinstance(df.columns, pd.MultiIndex):
+            picked: dict[str, pd.Series[float]] = {}
+            for ric in rics:
+                field = field_per_ric[ric]
+                key = (ric, field)
+                if key in df.columns:
+                    picked[ric] = df[key]
+            return (
+                pd.DataFrame(picked).drop_duplicates().sort_index()
+                if picked
+                else pd.DataFrame(columns=pd.Index(rics))
+            )
+        if len(rics) == 1:
+            ric = rics[0]
+            field = field_per_ric[ric]
+            if field in df.columns:
+                return (
+                    pd.DataFrame({ric: df[field]})
+                    .drop_duplicates()
+                    .sort_index()
+                )
+            return pd.DataFrame(columns=pd.Index([ric]))
+        return df.drop_duplicates().sort_index()
+    # ------------------------------------------------------------------
+    # Internal: per-RIC batched range download (close path)
+    # ------------------------------------------------------------------
+    def _download_batched(
+        self,
+        dates_to_download: list[str],
+        step_datapoints: int,
+        csv_path: Path,
+        ric: str,
+        force_wait: bool,
+        cooldown: int,
+    ) -> None:
+        n_downloaded = 0
+        while n_downloaded < len(dates_to_download):
+            batch_dates = dates_to_download[
+                n_downloaded : n_downloaded + step_datapoints
+            ]
+            start, end = batch_dates[0], batch_dates[-1]
+            path = csv_path / f"{ric}_from_{start}_to_{end}.csv"
+            if not path.exists():
+                logger.info("downloading %s from %s to %s ...", ric, start, end)
+                batch = self._download_batch(start, end, ric, cooldown)
+                index = batch.index
+                n_downloaded += len(index)
+                actual_start = index[0].strftime("%Y-%m-%d")
+                actual_end = index[-1].strftime("%Y-%m-%d")
+                actual_path = (
+                    csv_path / f"{ric}_from_{actual_start}_to_{actual_end}.csv"
+                )
+                batch.to_csv(actual_path)
+                if n_downloaded < len(dates_to_download) or force_wait:
+                    logger.info("waiting %ds...", cooldown)
+                    time.sleep(cooldown)
+            else:
+                logger.info("%s already exists, skipping...", path.name)
+                batch = pd.read_csv(path, index_col=_CSV_DATE_TITLE)
+                n_downloaded += len(batch.index)
+    def _download_batch(
+        self,
+        start_date: str,
+        end_date: str,
+        ric: str,
+        cooldown: int = _DEFAULT_COOLDOWN,
+    ) -> pd.DataFrame:
+        for attempt in range(1, _MAX_RETRIES + 1):
+            try:
+                field = self._history_field_per_ric[ric]
+                df = _ld_get_history(
+                    universe=[ric],
+                    start=start_date,
+                    end=end_date,
+                    interval="daily",
+                    fields=[field],
+                )
+                if df is None or df.empty:
+                    raise RuntimeError(
+                        f"LSEG returned empty for {ric} "
+                        f"{start_date}..{end_date}"
+                    )
+                df = self._normalize_history(
+                    df, [ric], self._history_field_per_ric
+                )
+                dates = pd.DatetimeIndex(pd.to_datetime(df.index))
+                df.index = dates
+                bdays_mask = ~dates.weekday.isin([5, 6])
+                df = df[bdays_mask]
+                df = df[~df.index.duplicated(keep="first")]
+                full_range = pd.date_range(
+                    start=start_date, end=end_date, freq="B"
+                )
+                df = df.reindex(full_range).sort_index()
+                df.index.name = _CSV_DATE_TITLE
+                return df
+            except Exception as exc:
+                logger.warning(
+                    "LSEG batch error %s %s..%s (attempt %d/%d): %s",
+                    ric,
+                    start_date,
+                    end_date,
+                    attempt,
+                    _MAX_RETRIES,
+                    exc,
+                )
+                if attempt == _MAX_RETRIES:
+                    raise
+            logger.info("waiting %ds due to LSEG error...", cooldown)
+            time.sleep(cooldown)
+        raise RuntimeError("unreachable")
+    @staticmethod
+    def _unify_batches(csv_path: Path) -> pd.DataFrame:
+        same_col_groups: dict[str, list[pd.DataFrame]] = {}
+        for csv_file in csv_path.iterdir():
+            if csv_file.suffix != ".csv":
+                continue
+            df = pd.read_csv(csv_file, index_col=_CSV_DATE_TITLE)
+            col_name = df.columns[0]
+            same_col_groups.setdefault(col_name, []).append(df)
+        vertical_dfs: list[pd.DataFrame] = []
+        for dfs in same_col_groups.values():
+            concatenated = pd.concat(dfs, axis=0)
+            concatenated = concatenated[
+                ~concatenated.index.duplicated(keep="first")
+            ]
+            vertical_dfs.append(concatenated)
+        return pd.concat(vertical_dfs, axis=1).sort_index()