tesorotools-python 0.0.41__tar.gz → 0.0.42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/PKG-INFO +3 -1
  2. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/pyproject.toml +2 -1
  3. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/__init__.py +9 -3
  4. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/providers/__init__.py +10 -5
  5. tesorotools_python-0.0.42/src/tesorotools/providers/lseg.py +792 -0
  6. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/.gitignore +0 -0
  7. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/_build_context.py +0 -0
  8. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/_registry.py +0 -0
  9. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/artists/__init__.py +0 -0
  10. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/artists/_common.py +0 -0
  11. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/artists/barh_plot.py +0 -0
  12. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/artists/line_plot.py +0 -0
  13. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/artists/stacked.py +0 -0
  14. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/artists/type_curve.py +0 -0
  15. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/README.md +0 -0
  16. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Black.otf +0 -0
  17. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Bold.otf +0 -0
  18. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Extrabold.otf +0 -0
  19. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Extralight.otf +0 -0
  20. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Light.otf +0 -0
  21. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Medium.otf +0 -0
  22. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Regular.otf +0 -0
  23. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/CabinetGrotesk-Thin.otf +0 -0
  24. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/fonts/README.md +0 -0
  25. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/plots.yaml +0 -0
  26. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/assets/tesoro.mplstyle +0 -0
  27. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/data_sources/__init__.py +0 -0
  28. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/data_sources/debug.py +0 -0
  29. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/database/__init__.py +0 -0
  30. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/database/local.py +0 -0
  31. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/database/push.py +0 -0
  32. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/database/shared.py +0 -0
  33. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/dependencies/__init__.py +0 -0
  34. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/dependencies/node.py +0 -0
  35. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/dependencies/resolution.py +0 -0
  36. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/driver.py +0 -0
  37. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/manifest.py +0 -0
  38. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/offsets/__init__.py +0 -0
  39. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/offsets/offsets.py +0 -0
  40. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/offsets/outliers.py +0 -0
  41. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/orchestration.py +0 -0
  42. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/pipeline/__init__.py +0 -0
  43. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/pipeline/diagnose.py +0 -0
  44. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/pipeline/engine.py +0 -0
  45. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/pipeline/rules.py +0 -0
  46. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/providers/base.py +0 -0
  47. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/providers/bde.py +0 -0
  48. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/providers/ecb.py +0 -0
  49. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/py.typed +0 -0
  50. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/__init__.py +0 -0
  51. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/__init__.py +0 -0
  52. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/content.py +0 -0
  53. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/images.py +0 -0
  54. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/section.py +0 -0
  55. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/subtitle.py +0 -0
  56. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/table.py +0 -0
  57. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/text.py +0 -0
  58. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/content/title.py +0 -0
  59. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/render/report.py +0 -0
  60. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/testing/__init__.py +0 -0
  61. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/testing/compare.py +0 -0
  62. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/__init__.py +0 -0
  63. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/config.py +0 -0
  64. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/format.py +0 -0
  65. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/globals.py +0 -0
  66. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/matplotlib.py +0 -0
  67. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/series.py +0 -0
  68. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/shortcuts.py +0 -0
  69. {tesorotools_python-0.0.41 → tesorotools_python-0.0.42}/src/tesorotools/utils/template.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tesorotools-python
3
- Version: 0.0.41
3
+ Version: 0.0.42
4
4
  Requires-Python: >=3.13
5
5
  Requires-Dist: babel>=2.17
6
6
  Requires-Dist: matplotlib>=3.10
@@ -16,3 +16,5 @@ Provides-Extra: bde
16
16
  Requires-Dist: requests>=2.31; extra == 'bde'
17
17
  Provides-Extra: ecb
18
18
  Requires-Dist: requests>=2.31; extra == 'ecb'
19
+ Provides-Extra: lseg
20
+ Requires-Dist: lseg-data>=2.1; extra == 'lseg'
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "tesorotools-python"
3
3
  requires-python = ">=3.13"
4
- version = "0.0.41"
4
+ version = "0.0.42"
5
5
  dependencies = [
6
6
  # database and ORM
7
7
  "psycopg[binary]>=3.1",
@@ -27,6 +27,7 @@ dependencies = [
27
27
  [project.optional-dependencies]
28
28
  bde = ["requests>=2.31"]
29
29
  ecb = ["requests>=2.31"]
30
+ lseg = ["lseg-data>=2.1"]
30
31
 
31
32
  [dependency-groups]
32
33
  dev = [
@@ -7,9 +7,9 @@ effects) and registers their YAML tags via
7
7
 
8
8
  Provider subclasses gated by optional extras
9
9
  (``BdeProvider`` requires ``[bde]``, ``EcbProvider``
10
- requires ``[ecb]``) are exposed lazily through
11
- ``__getattr__``; importing this module does not require the
12
- extras to be installed.
10
+ requires ``[ecb]``, ``LSEGProvider`` requires ``[lseg]``)
11
+ are exposed lazily through ``__getattr__``; importing this
12
+ module does not require the extras to be installed.
13
13
 
14
14
  Third parties extend the package via ``register_artist``,
15
15
  ``register_tag``, and ``register_provider`` (and their
@@ -22,6 +22,7 @@ from typing import TYPE_CHECKING, Any
22
22
  if TYPE_CHECKING:
23
23
  from tesorotools.providers.bde import BdeProvider
24
24
  from tesorotools.providers.ecb import EcbProvider
25
+ from tesorotools.providers.lseg import LSEGProvider
25
26
 
26
27
  from tesorotools._build_context import BuildContext
27
28
  from tesorotools._registry import (
@@ -106,6 +107,7 @@ __all__ = [
106
107
  "Images",
107
108
  "Legend",
108
109
  "LinePlot",
110
+ "LSEGProvider",
109
111
  "RegistryProtocol",
110
112
  "Report",
111
113
  "Section",
@@ -145,4 +147,8 @@ def __getattr__(name: str) -> Any:
145
147
  from tesorotools.providers.ecb import EcbProvider
146
148
 
147
149
  return EcbProvider
150
+ if name == "LSEGProvider":
151
+ from tesorotools.providers.lseg import LSEGProvider
152
+
153
+ return LSEGProvider
148
154
  raise AttributeError(f"module 'tesorotools' has no attribute {name!r}")
@@ -1,10 +1,9 @@
1
1
  """Public provider API.
2
2
 
3
- ``BdeProvider`` and ``EcbProvider`` depend on the optional
4
- ``[bde]`` / ``[ecb]`` extras (which install ``requests``)
5
- and are imported lazily through ``__getattr__``; importing
6
- ``tesorotools.providers`` itself does not require those
7
- extras.
3
+ ``BdeProvider``, ``EcbProvider`` and ``LSEGProvider`` depend on
4
+ the optional ``[bde]`` / ``[ecb]`` / ``[lseg]`` extras and are
5
+ imported lazily through ``__getattr__``; importing
6
+ ``tesorotools.providers`` itself does not require those extras.
8
7
  """
9
8
 
10
9
  from typing import TYPE_CHECKING, Any
@@ -18,11 +17,13 @@ from tesorotools.providers.base import (
18
17
  if TYPE_CHECKING:
19
18
  from tesorotools.providers.bde import BdeProvider
20
19
  from tesorotools.providers.ecb import EcbProvider
20
+ from tesorotools.providers.lseg import LSEGProvider
21
21
 
22
22
  __all__ = [
23
23
  "BdeProvider",
24
24
  "DataProvider",
25
25
  "EcbProvider",
26
+ "LSEGProvider",
26
27
  "RegistryProtocol",
27
28
  "bootstrap_providers",
28
29
  ]
@@ -37,6 +38,10 @@ def __getattr__(name: str) -> Any:
37
38
  from tesorotools.providers.ecb import EcbProvider
38
39
 
39
40
  return EcbProvider
41
+ if name == "LSEGProvider":
42
+ from tesorotools.providers.lseg import LSEGProvider
43
+
44
+ return LSEGProvider
40
45
  raise AttributeError(
41
46
  f"module 'tesorotools.providers' has no attribute {name!r}"
42
47
  )
@@ -0,0 +1,792 @@
1
+ """LSEG Data Library provider.
2
+
3
+ Wraps ``lseg-data`` (formerly Refinitiv Data Library) behind the
4
+ project's :class:`DataProvider` interface.
5
+
6
+ Install with the ``lseg`` optional extra::
7
+
8
+ uv pip install "tesorotools-python[lseg]"
9
+
10
+ Registry metadata
11
+ -----------------
12
+ Each instrument MUST declare an ``lseg`` block in its registry entry
13
+ with a ``history_field`` value (no default). The provider issues a
14
+ single per-RIC ``get_history`` call requesting only that one field.
15
+ Live mode additionally honours an optional ``snapshot_field`` (typically
16
+ ``CF_LAST`` for prices, ``CF_YIELD`` for yields).
17
+
18
+ Modes
19
+ -----
20
+ :meth:`LSEGProvider.build_for` returns two instances:
21
+
22
+ * ``lseg_close``: history-only, multi-day range supported. The standard
23
+ daily-close path.
24
+ * ``lseg_live``: snapshot only, requires ``start == end``. Uses
25
+ ``ld.get_data`` against the declared ``snapshot_field`` and falls
26
+ back to history-grouped for RICs without one.
27
+
28
+ Both share the same fetch surface so they plug straight into
29
+ :func:`tesorotools.bootstrap_providers`.
30
+
31
+ Availability
32
+ ------------
33
+ Workspace ships only on Windows. :meth:`is_available` inspects
34
+ ``tasklist`` for a running Workspace/Refinitiv process and returns
35
+ ``False`` everywhere else; :meth:`build_for` then raises so a missing
36
+ session never silently downgrades to an empty result.
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import logging
42
+ import subprocess
43
+ import tempfile
44
+ import time
45
+ import warnings
46
+ from pathlib import Path
47
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, cast
48
+
49
+ import lseg.data as ld
50
+ import pandas as pd
51
+
52
+ from tesorotools.providers.base import DataProvider
53
+
54
+ if TYPE_CHECKING:
55
+ from tesorotools._build_context import BuildContext
56
+
57
+ # The lseg-data SDK ships without type stubs, so its top-level symbols
58
+ # surface as partially-unknown. Cast once to a typed alias and silence
59
+ # the unknown-member access at the source attribute lookup so call sites
60
+ # stay clean. The ``| None`` reflects the SDK's observed behaviour
61
+ # (occasional ``None`` returns on transient failures); defensive
62
+ # ``is None`` checks at call sites match it.
63
+ _ld_get_history = cast(
64
+ Callable[..., "pd.DataFrame | None"],
65
+ ld.get_history, # pyright: ignore[reportUnknownMemberType]
66
+ )
67
+ _ld_get_data = cast(
68
+ Callable[..., "pd.DataFrame | None"],
69
+ ld.get_data, # pyright: ignore[reportUnknownMemberType]
70
+ )
71
+
72
+ # lseg-data 2.1.1 still uses pandas .fillna with implicit downcasting,
73
+ # which pandas 2.2+ marks as a FutureWarning. Cosmetic; remove this
74
+ # filter when the SDK is patched.
75
+ warnings.filterwarnings(
76
+ "ignore",
77
+ message="Downcasting object dtype arrays",
78
+ category=FutureWarning,
79
+ module=r"lseg\.data\..*",
80
+ )
81
+
82
+ logger = logging.getLogger(__name__)
83
+
84
+
85
+ _STEP_DATAPOINTS: int = 2_500
86
+ _DEFAULT_COOLDOWN: int = 10
87
+ _MAX_RETRIES: int = 150
88
+ # Retries per individual RIC in the daily snapshot loop. Smaller than
89
+ # :data:`_MAX_RETRIES` because a single RIC failing should not stall the
90
+ # whole run for tens of minutes — failures are isolated and the missing
91
+ # column is left NaN for the caller to handle.
92
+ _PER_RIC_MAX_RETRIES: int = 5
93
+ _CSV_DATE_TITLE: str = "Date"
94
+
95
+ # Max RICs per ``ld.get_data`` call in the snapshot overlay. Larger
96
+ # universes occasionally return ``<NA>`` for instruments that resolve
97
+ # cleanly when queried alone (observed: MSCI_WORLD/.MIWO00000PUS and
98
+ # MSCI_LATAM dropping out intermittently). Chunking is a defence in
99
+ # depth; a per-RIC rescue pass catches anything that still slips
100
+ # through.
101
+ _GET_DATA_CHUNK: int = 50
102
+
103
+ # Process names that indicate a usable Workspace session is running
104
+ # locally. Includes both the legacy Refinitiv name and the rebranded
105
+ # LSEG names so the check survives the rebrand.
106
+ _WORKSPACE_PROCESSES: tuple[str, ...] = (
107
+ "lsegworkspace.exe",
108
+ "refinitivworkspace.exe",
109
+ "workspace.exe",
110
+ )
111
+
112
+
113
+ class LSEGProvider(DataProvider):
114
+ """LSEG Data Library data provider."""
115
+
116
+ PROVIDER_NAME: ClassVar[str] = "lseg"
117
+
118
+ @classmethod
119
+ def build_for(cls, ctx: "BuildContext") -> dict[str, "DataProvider"]:
120
+ """Build the live + close LSEG instances.
121
+
122
+ Returns a dict with two keys, ``"lseg_close"`` and
123
+ ``"lseg_live"``, each backed by an :class:`LSEGProvider` with
124
+ the matching ``mode``. If the registry asks for no LSEG codes
125
+ in this consumer the dict is empty.
126
+
127
+ Raises
128
+ ------
129
+ RuntimeError
130
+ If Workspace is not running locally. Consumers that want a
131
+ mock fallback must wrap this method themselves; this class
132
+ does not synthesise data.
133
+ """
134
+ cids = ctx.registry.all_cids_for_provider(
135
+ ctx.consumer, cls.PROVIDER_NAME
136
+ )
137
+ if not cids:
138
+ return {}
139
+ history_fields, snapshot_fields = cls._field_maps(ctx.registry, cids)
140
+
141
+ probe = cls(history_fields=history_fields, mode="close")
142
+ if ctx.mock or not probe.is_available():
143
+ raise RuntimeError("LSEG Workspace is not running")
144
+
145
+ return {
146
+ "lseg_close": cls(
147
+ history_fields=history_fields,
148
+ snapshot_fields=snapshot_fields,
149
+ mode="close",
150
+ ),
151
+ "lseg_live": cls(
152
+ history_fields=history_fields,
153
+ snapshot_fields=snapshot_fields,
154
+ mode="live",
155
+ ),
156
+ }
157
+
158
+ @staticmethod
159
+ def _field_maps(
160
+ registry: Any, cids: list[str]
161
+ ) -> tuple[dict[str, str], dict[str, str]]:
162
+ """Build ``(history_fields, snapshot_fields)`` from registry."""
163
+ history_fields: dict[str, str] = {}
164
+ snapshot_fields: dict[str, str] = {}
165
+ for cid in cids:
166
+ meta = registry.get_provider_meta(cid, "lseg")
167
+ ric = meta["code"]
168
+ field = meta.get("history_field")
169
+ if not field:
170
+ raise ValueError(
171
+ f"{cid}: 'history_field' is required in the lseg "
172
+ "block (no default)."
173
+ )
174
+ history_fields[ric] = field
175
+ snap = meta.get("snapshot_field")
176
+ if snap:
177
+ snapshot_fields[ric] = snap
178
+ return history_fields, snapshot_fields
179
+
180
+ def __init__(
181
+ self,
182
+ history_fields: dict[str, str],
183
+ snapshot_fields: dict[str, str] | None = None,
184
+ skip_session: bool = False,
185
+ mode: Literal["live", "close"] = "close",
186
+ cache_dir: Path | None = None,
187
+ ) -> None:
188
+ """Initialise an LSEG provider.
189
+
190
+ Parameters
191
+ ----------
192
+ history_fields
193
+ ``{ric: field}``. The field used by the close (history)
194
+ path for each RIC, e.g. ``"YLDTOMAT"``, ``"SETTLE"``,
195
+ ``"TRDPRC_1"``. Required for every RIC that ``fetch`` may
196
+ be asked about — RICs not in this map are silently skipped.
197
+ snapshot_fields
198
+ ``{ric: field}``. Optional. The field used by the live
199
+ (snapshot) path for RICs whose live tick differs from the
200
+ consolidated history (notably futures, where
201
+ ``history_field=SETTLE`` is yesterday's settle and the
202
+ live tick lives in ``CF_LAST``). RICs not present here
203
+ fall back to ``history_fields`` in live mode too.
204
+ skip_session
205
+ Skip opening an LSEG session on first ``fetch``. Mostly
206
+ for tests that monkey-patch the SDK and never want a real
207
+ connection.
208
+ mode
209
+ ``"live"`` (snapshot, ``start==end``) or ``"close"``
210
+ (history, multi-day range supported).
211
+ cache_dir
212
+ Optional persistent cache for the multi-day close path.
213
+ When ``None``, ``fetch`` uses a fresh tempdir per call
214
+ (no resume). When set, CSV partials are kept across runs
215
+ so an interrupted download resumes from disk.
216
+ """
217
+ self._session_opened = skip_session
218
+ self._mode: Literal["live", "close"] = mode
219
+ self._cache_dir: Path | None = cache_dir
220
+ self._history_field_per_ric: dict[str, str] = dict(history_fields)
221
+ self._snapshot_field_per_ric: dict[str, str] = dict(
222
+ snapshot_fields or {}
223
+ )
224
+
225
+ # ------------------------------------------------------------------
226
+ # Session lifecycle
227
+ # ------------------------------------------------------------------
228
+
229
+ def _open_session(self) -> None:
230
+ if not self._session_opened:
231
+ ld.open_session()
232
+ self._session_opened = True
233
+
234
+ def is_available(self) -> bool:
235
+ try:
236
+ result = subprocess.run(
237
+ ["tasklist"],
238
+ capture_output=True,
239
+ text=True,
240
+ timeout=10,
241
+ )
242
+ stdout_lower = result.stdout.lower()
243
+ return any(name in stdout_lower for name in _WORKSPACE_PROCESSES)
244
+ except Exception:
245
+ return False
246
+
247
+ # ------------------------------------------------------------------
248
+ # Tesorotools-style fetch
249
+ # ------------------------------------------------------------------
250
+
251
+ def fetch(
252
+ self,
253
+ codes: list[str],
254
+ start: str | None = None,
255
+ end: str | None = None,
256
+ ) -> pd.DataFrame:
257
+ """Daily data for ``codes`` between ``start`` and ``end``.
258
+
259
+ Returns columns labelled with raw RICs (no canonical-ID rename),
260
+ per the tesorotools schema.
261
+
262
+ Dispatch by ``self._mode``:
263
+
264
+ - ``mode="close"``: history-only, multi-day range supported.
265
+ Reuses the per-RIC batched download with caching.
266
+ - ``mode="live"``: snapshot only, requires ``start == end``.
267
+ Snapshot grouped + history fallback for RICs without
268
+ ``snapshot_field``.
269
+ """
270
+ if not codes:
271
+ return pd.DataFrame()
272
+
273
+ self._open_session()
274
+ known = [c for c in codes if c in self._history_field_per_ric]
275
+ if not known:
276
+ return pd.DataFrame(columns=pd.Index(codes))
277
+
278
+ if self._mode == "live":
279
+ if start is None or end is None or start != end:
280
+ raise ValueError(
281
+ "LSEGProvider.fetch with mode='live' requires "
282
+ f"start == end (got start={start!r}, end={end!r})"
283
+ )
284
+ target = pd.Timestamp(start)
285
+ df = self._download_live_for_rics(target, known)
286
+ df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
287
+ df.index.name = "date"
288
+ return df
289
+
290
+ # mode == "close"
291
+ if start is None:
292
+ start = "1900-01-01"
293
+ if end is None:
294
+ end = pd.Timestamp.now().strftime("%Y-%m-%d")
295
+
296
+ # Single-day close snapshot routes via _download_history_grouped,
297
+ # which uses ``count=1, end=target`` semantics. The
298
+ # interday-summaries endpoint returns 0 rows for ``start==end``
299
+ # queries on bid/mid/settle/yield-type fields, so the multi-day
300
+ # batched path below would otherwise return empty. ``count=1``
301
+ # returns the latest row at-or-before ``target`` and the method
302
+ # filters to ``target.normalize()`` (NaN if target is a market
303
+ # holiday).
304
+ if start == end:
305
+ target = pd.Timestamp(start)
306
+ df = self._download_history_grouped(target, known)
307
+ df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
308
+ df.index.name = "date"
309
+ return df
310
+
311
+ # Multi-day close range — per-RIC batched download with caching.
312
+ dates_to_download = list(
313
+ pd.date_range(start=start, end=end, freq="B").astype("str")
314
+ )
315
+ if not dates_to_download:
316
+ return pd.DataFrame(columns=pd.Index(known))
317
+
318
+ if self._cache_dir is not None:
319
+ csv_path = self._cache_dir / "csv"
320
+ csv_path.mkdir(parents=True, exist_ok=True)
321
+ data = self._fetch_range(known, dates_to_download, csv_path)
322
+ else:
323
+ with tempfile.TemporaryDirectory(prefix="lseg_fetch_") as tmp:
324
+ csv_path = Path(tmp) / "csv"
325
+ csv_path.mkdir(parents=True, exist_ok=True)
326
+ data = self._fetch_range(known, dates_to_download, csv_path)
327
+
328
+ data.index = pd.DatetimeIndex(data.index).tz_localize(None).normalize()
329
+ data.index.name = "date"
330
+ return data
331
+
332
+ def _fetch_range(
333
+ self,
334
+ rics: list[str],
335
+ dates_to_download: list[str],
336
+ csv_path: Path,
337
+ ) -> pd.DataFrame:
338
+ """Per-RIC ``_download_batched`` loop + ``_unify_batches`` final."""
339
+ for idx, ric in enumerate(rics):
340
+ force_wait = idx < len(rics) - 1
341
+ self._download_batched(
342
+ dates_to_download,
343
+ _STEP_DATAPOINTS,
344
+ csv_path,
345
+ ric,
346
+ force_wait,
347
+ _DEFAULT_COOLDOWN,
348
+ )
349
+ return self._unify_batches(csv_path)
350
+
351
+ def _download_live_for_rics(
352
+ self, target: pd.Timestamp, rics: list[str]
353
+ ) -> pd.DataFrame:
354
+ """Live snapshot for an arbitrary RIC subset.
355
+
356
+ Snapshot grouped (for RICs with ``snapshot_field``) + history
357
+ grouped fallback (for RICs without one).
358
+ """
359
+ snap_rics = [r for r in rics if r in self._snapshot_field_per_ric]
360
+ hist_rics = [r for r in rics if r not in self._snapshot_field_per_ric]
361
+
362
+ parts: list[pd.DataFrame] = []
363
+ if snap_rics:
364
+ parts.append(self._download_snapshot_grouped(snap_rics, target))
365
+ if hist_rics:
366
+ parts.append(self._download_history_grouped(target, hist_rics))
367
+
368
+ if not parts:
369
+ return pd.DataFrame(index=[target])
370
+ combined = pd.concat(parts, axis=1)
371
+ ordered = [r for r in rics if r in combined.columns]
372
+ return combined[ordered]
373
+
374
+ # ------------------------------------------------------------------
375
+ # Internal: bulk history (close path)
376
+ # ------------------------------------------------------------------
377
+
378
+ def _download_history_grouped(
379
+ self, target: pd.Timestamp, rics: list[str]
380
+ ) -> pd.DataFrame:
381
+ """Bulk ``ld.get_history`` per ``history_field`` group with per-RIC rescue.
382
+
383
+ Two-stage strategy:
384
+
385
+ 1. **Bulk per group**: groups ``rics`` by ``history_field`` and
386
+ issues one multi-RIC ``ld.get_history`` call per group. The
387
+ SDK fans out HTTP requests internally so wallclock drops
388
+ ~2.6× vs sequential per-RIC.
389
+ 2. **Per-RIC rescue**: any RIC that came back NaN — either
390
+ because its bulk call raised, or because it was silently
391
+ dropped from the response (a known intermittent issue for a
392
+ handful of RICs at any universe size, e.g. MSCI_WORLD,
393
+ MSCI_LATAM) — gets retried via :meth:`_download_per_ric`
394
+ with isolated retries.
395
+
396
+ ``count=1, end=target`` semantics (vs ``start==end``): lseg-data
397
+ 2.1.1's interday-summaries endpoint returns 0 rows for
398
+ ``start==end`` queries on bid/mid/settle/yield-type fields. With
399
+ ``count=1`` the API returns the latest row at-or-before
400
+ ``target``; we keep only the row matching ``target.normalize()``
401
+ and drop the rest to NaN (e.g. when target is a holiday for a
402
+ given market).
403
+ """
404
+ end = target.strftime("%Y-%m-%d")
405
+ target_ts = target
406
+
407
+ by_field: dict[str, list[str]] = {}
408
+ for ric in rics:
409
+ by_field.setdefault(self._history_field_per_ric[ric], []).append(
410
+ ric
411
+ )
412
+
413
+ values: dict[str, float] = {ric: float("nan") for ric in rics}
414
+ bulk_failed: list[str] = []
415
+
416
+ for field, group in by_field.items():
417
+ try:
418
+ df = _ld_get_history(
419
+ universe=group,
420
+ fields=[field],
421
+ end=end,
422
+ count=1,
423
+ interval="daily",
424
+ )
425
+ except Exception as exc:
426
+ logger.warning(
427
+ "LSEG bulk %s (%d RICs) failed: %s — falling back per-RIC",
428
+ field,
429
+ len(group),
430
+ exc,
431
+ )
432
+ bulk_failed.extend(group)
433
+ continue
434
+
435
+ if df is None or df.empty:
436
+ continue
437
+ new_index = pd.DatetimeIndex(pd.to_datetime(df.index))
438
+ df.index = new_index
439
+ mask = new_index.normalize() == target_ts.normalize()
440
+ if not mask.any():
441
+ continue
442
+ row = df.loc[mask].iloc[0]
443
+
444
+ # Multi-RIC + single-field response: columns are flat RIC
445
+ # labels in the typical case, but lseg-data sometimes
446
+ # returns a (RIC, field) MultiIndex. Handle both.
447
+ if isinstance(df.columns, pd.MultiIndex):
448
+ for ric in group:
449
+ key = (ric, field)
450
+ if key in df.columns and pd.notna(row.get(key)):
451
+ values[ric] = float(row[key])
452
+ else:
453
+ for ric in group:
454
+ if ric in df.columns and pd.notna(row.get(ric)):
455
+ values[ric] = float(row[ric])
456
+
457
+ still_nan = [r for r in rics if pd.isna(values[r])]
458
+ if still_nan:
459
+ logger.info(
460
+ "LSEG history rescue: per-RIC for %d/%d RIC(s)",
461
+ len(still_nan),
462
+ len(rics),
463
+ )
464
+ rescue_df = self._download_per_ric(target_ts, still_nan)
465
+ for ric in still_nan:
466
+ if ric in rescue_df.columns:
467
+ v = rescue_df[ric].iloc[0]
468
+ if pd.notna(v):
469
+ values[ric] = float(v)
470
+
471
+ return pd.DataFrame(
472
+ {ric: [values[ric]] for ric in rics}, index=[target_ts]
473
+ )
474
+
475
+ def _download_per_ric(
476
+ self, target: pd.Timestamp, rics: list[str]
477
+ ) -> pd.DataFrame:
478
+ """Per-RIC sequential rescue.
479
+
480
+ Used as fallback by :meth:`_download_history_grouped` for RICs
481
+ that the bulk call returned NaN for (or whose group raised).
482
+ Each RIC gets up to :data:`_PER_RIC_MAX_RETRIES` attempts with
483
+ a :data:`_DEFAULT_COOLDOWN` wait between retries. A failing RIC
484
+ does NOT cause the rest of the universe to be re-fetched — only
485
+ its own column is left NaN for the caller to handle.
486
+
487
+ ``UserRequestError.90006 — universe does not support the
488
+ following fields`` is treated as deterministic: the RIC's
489
+ ``history_field`` declaration is wrong; retries cannot recover.
490
+ Logged and the column stays NaN.
491
+ """
492
+ end = target.strftime("%Y-%m-%d")
493
+
494
+ values: dict[str, float] = {}
495
+
496
+ for ric in rics:
497
+ field = self._history_field_per_ric[ric]
498
+ val = float("nan")
499
+ last_exc: BaseException | None = None
500
+
501
+ for attempt in range(1, _PER_RIC_MAX_RETRIES + 1):
502
+ try:
503
+ df = _ld_get_history(
504
+ universe=[ric],
505
+ end=end,
506
+ count=1,
507
+ interval="daily",
508
+ fields=[field],
509
+ )
510
+ except Exception as exc:
511
+ if "does not support the following fields" in str(exc):
512
+ logger.warning(
513
+ "LSEG per-RIC %s: declared history_field=%s "
514
+ "not supported (no retries); fix registry",
515
+ ric,
516
+ field,
517
+ )
518
+ break
519
+ last_exc = exc
520
+ logger.warning(
521
+ "LSEG per-RIC %s (%s) attempt %d/%d failed: %s",
522
+ ric,
523
+ field,
524
+ attempt,
525
+ _PER_RIC_MAX_RETRIES,
526
+ exc,
527
+ )
528
+ if attempt < _PER_RIC_MAX_RETRIES:
529
+ time.sleep(_DEFAULT_COOLDOWN)
530
+ continue
531
+
532
+ if df is None or df.empty or field not in df.columns:
533
+ break
534
+
535
+ new_index = pd.DatetimeIndex(pd.to_datetime(df.index))
536
+ df.index = new_index
537
+ mask = new_index.normalize() == target.normalize()
538
+ if mask.any():
539
+ matching = df.loc[mask, field]
540
+ v = matching.iloc[0]
541
+ val = float(v) if pd.notna(v) else float("nan")
542
+ break
543
+ else:
544
+ logger.warning(
545
+ "LSEG per-RIC %s exhausted %d retries (last error: %s)",
546
+ ric,
547
+ _PER_RIC_MAX_RETRIES,
548
+ last_exc,
549
+ )
550
+
551
+ values[ric] = val
552
+
553
+ return pd.DataFrame(
554
+ {ric: [values[ric]] for ric in rics}, index=[target]
555
+ )
556
+
557
+ # ------------------------------------------------------------------
558
+ # Live snapshot (mode="live" only)
559
+ # ------------------------------------------------------------------
560
+
561
+ def _download_snapshot_grouped(
562
+ self, rics: list[str], target: pd.Timestamp
563
+ ) -> pd.DataFrame:
564
+ """One ``ld.get_data`` call per ``snapshot_field`` group, chunked.
565
+
566
+ Groups ``rics`` by their declared ``snapshot_field`` (typically
567
+ ``CF_LAST`` for prices, ``CF_YIELD`` for yields) and issues one
568
+ batched call per group, in chunks of :data:`_GET_DATA_CHUNK`.
569
+
570
+ Two-stage robustness per group:
571
+
572
+ 1. **Bulk** — chunks of 50 RICs.
573
+ 2. **Rescue per-RIC** for any RIC still NaN after the bulk.
574
+
575
+ Returns a one-row DataFrame indexed at ``target`` with one
576
+ column per RIC in ``rics``. RICs that fail every retry stay
577
+ NaN — the caller can pick them up downstream.
578
+ """
579
+ values: dict[str, float] = {ric: float("nan") for ric in rics}
580
+
581
+ by_field: dict[str, list[str]] = {}
582
+ for ric in rics:
583
+ field = self._snapshot_field_per_ric[ric]
584
+ by_field.setdefault(field, []).append(ric)
585
+
586
+ for field, group in by_field.items():
587
+ for i in range(0, len(group), _GET_DATA_CHUNK):
588
+ chunk = group[i : i + _GET_DATA_CHUNK]
589
+ try:
590
+ snap = _ld_get_data(universe=chunk, fields=[field])
591
+ except Exception as exc:
592
+ logger.warning(
593
+ "LSEG %s call failed for chunk %d-%d: %s",
594
+ field,
595
+ i,
596
+ i + len(chunk) - 1,
597
+ exc,
598
+ )
599
+ continue
600
+ if snap is None or snap.empty:
601
+ continue
602
+ for _, row in snap.iterrows():
603
+ ric = row.get("Instrument")
604
+ v = row.get(field)
605
+ if ric in values and pd.notna(v):
606
+ values[ric] = float(v)
607
+
608
+ still_nan = [r for r in group if pd.isna(values[r])]
609
+ if still_nan:
610
+ logger.info(
611
+ "LSEG %s rescue: retrying %d RIC(s) individually",
612
+ field,
613
+ len(still_nan),
614
+ )
615
+ for ric in still_nan:
616
+ try:
617
+ snap = _ld_get_data(universe=[ric], fields=[field])
618
+ except Exception as exc:
619
+ logger.warning(
620
+ "LSEG %s rescue failed for %s: %s",
621
+ field,
622
+ ric,
623
+ exc,
624
+ )
625
+ continue
626
+ if snap is None or snap.empty:
627
+ continue
628
+ v = snap.iloc[0].get(field)
629
+ if pd.notna(v):
630
+ values[ric] = float(v)
631
+
632
+ return pd.DataFrame(
633
+ {ric: [values[ric]] for ric in rics}, index=[target]
634
+ )
635
+
636
+ @staticmethod
637
+ def _normalize_history(
638
+ df: pd.DataFrame,
639
+ rics: list[str],
640
+ field_per_ric: dict[str, str],
641
+ ) -> pd.DataFrame:
642
+ """Reduce ``ld.get_history`` output to one column per RIC.
643
+
644
+ Handles three response shapes:
645
+
646
+ - **MultiIndex columns** ``(RIC, field)`` — multi-RIC bulk call.
647
+ Pick ``field_per_ric[ric]`` for each RIC.
648
+ - **Flat columns labelled by field** — single-RIC call. The
649
+ declared field becomes the only data column, renamed to the
650
+ RIC.
651
+ - **Flat columns labelled by RIC** — single-field call against
652
+ multiple RICs (degenerate). Returned as-is.
653
+ """
654
+ if df.empty:
655
+ return pd.DataFrame(columns=pd.Index(rics))
656
+
657
+ if isinstance(df.columns, pd.MultiIndex):
658
+ picked: dict[str, pd.Series[float]] = {}
659
+ for ric in rics:
660
+ field = field_per_ric[ric]
661
+ key = (ric, field)
662
+ if key in df.columns:
663
+ picked[ric] = df[key]
664
+ return (
665
+ pd.DataFrame(picked).drop_duplicates().sort_index()
666
+ if picked
667
+ else pd.DataFrame(columns=pd.Index(rics))
668
+ )
669
+
670
+ if len(rics) == 1:
671
+ ric = rics[0]
672
+ field = field_per_ric[ric]
673
+ if field in df.columns:
674
+ return (
675
+ pd.DataFrame({ric: df[field]})
676
+ .drop_duplicates()
677
+ .sort_index()
678
+ )
679
+ return pd.DataFrame(columns=pd.Index([ric]))
680
+
681
+ return df.drop_duplicates().sort_index()
682
+
683
+ # ------------------------------------------------------------------
684
+ # Internal: per-RIC batched range download (close path)
685
+ # ------------------------------------------------------------------
686
+
687
+ def _download_batched(
688
+ self,
689
+ dates_to_download: list[str],
690
+ step_datapoints: int,
691
+ csv_path: Path,
692
+ ric: str,
693
+ force_wait: bool,
694
+ cooldown: int,
695
+ ) -> None:
696
+ n_downloaded = 0
697
+ while n_downloaded < len(dates_to_download):
698
+ batch_dates = dates_to_download[
699
+ n_downloaded : n_downloaded + step_datapoints
700
+ ]
701
+ start, end = batch_dates[0], batch_dates[-1]
702
+ path = csv_path / f"{ric}_from_{start}_to_{end}.csv"
703
+ if not path.exists():
704
+ logger.info("downloading %s from %s to %s ...", ric, start, end)
705
+ batch = self._download_batch(start, end, ric, cooldown)
706
+ index = batch.index
707
+ n_downloaded += len(index)
708
+ actual_start = index[0].strftime("%Y-%m-%d")
709
+ actual_end = index[-1].strftime("%Y-%m-%d")
710
+ actual_path = (
711
+ csv_path / f"{ric}_from_{actual_start}_to_{actual_end}.csv"
712
+ )
713
+ batch.to_csv(actual_path)
714
+ if n_downloaded < len(dates_to_download) or force_wait:
715
+ logger.info("waiting %ds...", cooldown)
716
+ time.sleep(cooldown)
717
+ else:
718
+ logger.info("%s already exists, skipping...", path.name)
719
+ batch = pd.read_csv(path, index_col=_CSV_DATE_TITLE)
720
+ n_downloaded += len(batch.index)
721
+
722
+ def _download_batch(
723
+ self,
724
+ start_date: str,
725
+ end_date: str,
726
+ ric: str,
727
+ cooldown: int = _DEFAULT_COOLDOWN,
728
+ ) -> pd.DataFrame:
729
+ for attempt in range(1, _MAX_RETRIES + 1):
730
+ try:
731
+ field = self._history_field_per_ric[ric]
732
+ df = _ld_get_history(
733
+ universe=[ric],
734
+ start=start_date,
735
+ end=end_date,
736
+ interval="daily",
737
+ fields=[field],
738
+ )
739
+ if df is None or df.empty:
740
+ raise RuntimeError(
741
+ f"LSEG returned empty for {ric} "
742
+ f"{start_date}..{end_date}"
743
+ )
744
+ df = self._normalize_history(
745
+ df, [ric], self._history_field_per_ric
746
+ )
747
+ dates = pd.DatetimeIndex(pd.to_datetime(df.index))
748
+ df.index = dates
749
+ bdays_mask = ~dates.weekday.isin([5, 6])
750
+ df = df[bdays_mask]
751
+ df = df[~df.index.duplicated(keep="first")]
752
+ full_range = pd.date_range(
753
+ start=start_date, end=end_date, freq="B"
754
+ )
755
+ df = df.reindex(full_range).sort_index()
756
+ df.index.name = _CSV_DATE_TITLE
757
+ return df
758
+ except Exception as exc:
759
+ logger.warning(
760
+ "LSEG batch error %s %s..%s (attempt %d/%d): %s",
761
+ ric,
762
+ start_date,
763
+ end_date,
764
+ attempt,
765
+ _MAX_RETRIES,
766
+ exc,
767
+ )
768
+ if attempt == _MAX_RETRIES:
769
+ raise
770
+ logger.info("waiting %ds due to LSEG error...", cooldown)
771
+ time.sleep(cooldown)
772
+ raise RuntimeError("unreachable")
773
+
774
+ @staticmethod
775
+ def _unify_batches(csv_path: Path) -> pd.DataFrame:
776
+ same_col_groups: dict[str, list[pd.DataFrame]] = {}
777
+ for csv_file in csv_path.iterdir():
778
+ if csv_file.suffix != ".csv":
779
+ continue
780
+ df = pd.read_csv(csv_file, index_col=_CSV_DATE_TITLE)
781
+ col_name = df.columns[0]
782
+ same_col_groups.setdefault(col_name, []).append(df)
783
+
784
+ vertical_dfs: list[pd.DataFrame] = []
785
+ for dfs in same_col_groups.values():
786
+ concatenated = pd.concat(dfs, axis=0)
787
+ concatenated = concatenated[
788
+ ~concatenated.index.duplicated(keep="first")
789
+ ]
790
+ vertical_dfs.append(concatenated)
791
+
792
+ return pd.concat(vertical_dfs, axis=1).sort_index()