tesorotools-python 0.0.30__tar.gz → 0.0.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/PKG-INFO +1 -1
  2. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/pyproject.toml +1 -1
  3. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/artists/line_plot.py +14 -9
  4. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/artists/stacked.py +4 -2
  5. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/database/__init__.py +3 -0
  6. tesorotools_python-0.0.32/src/tesorotools/database/shared.py +177 -0
  7. tesorotools_python-0.0.32/src/tesorotools/manifest.py +120 -0
  8. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/rules.py +16 -0
  9. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/.gitignore +0 -0
  10. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/__init__.py +0 -0
  11. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/artists/__init__.py +0 -0
  12. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/artists/barh.md +0 -0
  13. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/artists/barh_plot.py +0 -0
  14. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/artists/table.py +0 -0
  15. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/artists/type_curve.py +0 -0
  16. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/README.md +0 -0
  17. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Black.otf +0 -0
  18. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Bold.otf +0 -0
  19. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Extrabold.otf +0 -0
  20. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Extralight.otf +0 -0
  21. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Light.otf +0 -0
  22. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Medium.otf +0 -0
  23. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Regular.otf +0 -0
  24. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Thin.otf +0 -0
  25. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/README.md +0 -0
  26. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/plots.yaml +0 -0
  27. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/assets/tesoro.mplstyle +0 -0
  28. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/convert.py +0 -0
  29. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/README.md +0 -0
  30. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/__init__.py +0 -0
  31. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/debug.py +0 -0
  32. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/lseg.py +0 -0
  33. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/database/local.py +0 -0
  34. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/database/push.py +0 -0
  35. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/dependencies/__init__.py +0 -0
  36. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/dependencies/node.py +0 -0
  37. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/dependencies/resolution.py +0 -0
  38. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/main.py +0 -0
  39. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/offsets/__init__.py +0 -0
  40. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/offsets/offsets.py +0 -0
  41. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/offsets/outliers.py +0 -0
  42. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/__init__.py +0 -0
  43. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/diagnose.py +0 -0
  44. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/engine.py +0 -0
  45. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/providers/__init__.py +0 -0
  46. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/providers/base.py +0 -0
  47. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/providers/bde.py +0 -0
  48. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/providers/ecb.py +0 -0
  49. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/py.typed +0 -0
  50. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/__init__.py +0 -0
  51. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/__init__.py +0 -0
  52. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/content.py +0 -0
  53. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/images.py +0 -0
  54. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/section.py +0 -0
  55. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/subtitle.py +0 -0
  56. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/table.py +0 -0
  57. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/text.py +0 -0
  58. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/content/title.py +0 -0
  59. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/render/report.py +0 -0
  60. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/__init__.py +0 -0
  61. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/config.py +0 -0
  62. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/format.py +0 -0
  63. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/globals.py +0 -0
  64. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/matplotlib.py +0 -0
  65. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/series.py +0 -0
  66. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/shortcuts.py +0 -0
  67. {tesorotools_python-0.0.30 → tesorotools_python-0.0.32}/src/tesorotools/utils/template.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tesorotools-python
3
- Version: 0.0.30
3
+ Version: 0.0.32
4
4
  Requires-Python: >=3.13
5
5
  Requires-Dist: babel>=2.17
6
6
  Requires-Dist: eikon>=1.1
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "tesorotools-python"
3
3
  requires-python = ">=3.13"
4
- version = "0.0.30"
4
+ version = "0.0.32"
5
5
  dependencies = [
6
6
  # database and ORM
7
7
  "psycopg[binary]>=3.1",
@@ -102,11 +102,17 @@ def annotate_last_values(
102
102
  *,
103
103
  decimals: int,
104
104
  units: str,
105
+ labels: dict[str, str] | None = None,
105
106
  series_styles: dict[str, dict[str, Any]] | None = None,
106
107
  annotate_color: str | None = None,
107
108
  ) -> None:
108
109
  """Label the last non-NaN value of each column on the right.
109
110
 
111
+ ``plot_data.columns`` and ``series_styles`` keys must be
112
+ canonical series IDs. ``labels`` maps each ID to the
113
+ Matplotlib line label used when the axes were drawn; if
114
+ omitted, the ID itself is used as the line label.
115
+
110
116
  Colour priority (highest first): ``annotate_color``
111
117
  (global override), ``series_styles[col]['color']``,
112
118
  the Matplotlib line colour.
@@ -120,6 +126,7 @@ def annotate_last_values(
120
126
  if fig is None:
121
127
  return
122
128
  styles = series_styles or {}
129
+ label_map = labels or {}
123
130
 
124
131
  lines_by_label = {str(line.get_label()): line for line in ax.lines}
125
132
  entries: list[tuple[Any, float, str, Any]] = []
@@ -136,7 +143,7 @@ def annotate_last_values(
136
143
  if override is not None:
137
144
  color = override
138
145
  else:
139
- line = lines_by_label.get(col)
146
+ line = lines_by_label.get(label_map.get(col, col))
140
147
  color = line.get_color() if line is not None else "black"
141
148
  entries.append((last_date, last_val, col, color))
142
149
 
@@ -454,7 +461,6 @@ class LinePlot:
454
461
  plot_data: pd.DataFrame = self.data.loc[
455
462
  slice(start_date, end_date), self.series.keys()
456
463
  ]
457
- plot_data = plot_data.rename(columns=self.series)
458
464
 
459
465
  plot_data = plot_data * self.scale
460
466
 
@@ -468,12 +474,10 @@ class LinePlot:
468
474
  **fig_kw
469
475
  )
470
476
  ax = fig.add_subplot()
471
- if self.series_styles:
472
- for col in plot_data.columns:
473
- style = self.series_styles.get(col, {})
474
- plot_data[col].plot(ax=ax, label=col, **style)
475
- else:
476
- plot_data.plot(ax=ax)
477
+ styles = self.series_styles
478
+ for col in plot_data.columns:
479
+ style = styles.get(col, {}) if styles else {}
480
+ plot_data[col].plot(ax=ax, label=self.series[col], **style)
477
481
 
478
482
  assert self.format is not None
479
483
  if self.annotate:
@@ -482,6 +486,7 @@ class LinePlot:
482
486
  plot_data,
483
487
  decimals=self.format.decimals,
484
488
  units=self.format.units,
489
+ labels=self.series,
485
490
  series_styles=self.series_styles,
486
491
  annotate_color=self.annotate_color,
487
492
  )
@@ -497,7 +502,7 @@ class LinePlot:
497
502
  style_baseline(ax, reference, **AX_CONFIG["baseline"])
498
503
 
499
504
  if self.legend is not None:
500
- labels = list(plot_data.columns)
505
+ labels = [self.series[c] for c in plot_data.columns]
501
506
  ncol = (
502
507
  self.legend.ncol
503
508
  if self.legend.ncol is not None
@@ -84,7 +84,8 @@ class StackedAreaPlot:
84
84
  else self.data.index.max()
85
85
  )
86
86
 
87
- plot_data = self.data.loc[start:end, list(self.series.keys())].dropna()
87
+ plot_data = self.data.loc[start:end, list(self.series.keys())]
88
+ plot_data = plot_data.dropna(how="all").fillna(0)
88
89
  plot_data = plot_data * self.scale
89
90
 
90
91
  fig_kw = dict(FIG_CONFIG)
@@ -211,7 +212,8 @@ class StackedBarPlot:
211
212
  else self.data.index.max()
212
213
  )
213
214
  all_cols = list(self.series.keys()) + list(self.overlay_series.keys())
214
- plot_data = self.data.loc[start:end, all_cols].dropna()
215
+ plot_data = self.data.loc[start:end, all_cols]
216
+ plot_data = plot_data.dropna(how="all").fillna(0)
215
217
  return plot_data * self.scale
216
218
 
217
219
  def _format_xticks(
@@ -4,8 +4,11 @@ Contiene las utilidades para interactuar con bases de datos locales y remotas.
4
4
  """
5
5
 
6
6
  from tesorotools.database.local import LocalDatabase, ShortcutDatabase
7
+ from tesorotools.database.shared import SharedDatabase, resolve_shared_root
7
8
 
8
9
  __all__ = [
9
10
  "LocalDatabase",
11
+ "SharedDatabase",
10
12
  "ShortcutDatabase",
13
+ "resolve_shared_root",
11
14
  ]
@@ -0,0 +1,177 @@
1
+ """Resolver for shared OneDrive folders used as a team database.
2
+
3
+ Absorbed from the ``src/bbdd.py`` copies in ``cnmv_python`` and
4
+ ``epf`` — both had the same logic with only a per-project env var
5
+ name differing. The old ``ShortcutDatabase`` (Windows ``.lnk``
6
+ per user) is superseded by this resolver for the ``bbdd/``
7
+ layout: each member of the team gets the same path auto-discovered
8
+ from their OneDrive mount.
9
+
10
+ Resolution order
11
+ ----------------
12
+ 1. An explicit override environment variable (``env_var`` argument,
13
+ e.g. ``"CNMV_BBDD_ROOT"``). Used for local dev, CI and tests.
14
+ 2. ``%OneDriveCommercial%`` + glob for a SharePoint folder whose
15
+ name contains ``team_marker`` and that contains ``subdir``.
16
+ 3. ``%OneDrive%`` as a fallback.
17
+ 4. ``RuntimeError`` with a diagnostic listing every attempt.
18
+
19
+ Callers supply their own ``team_marker`` — this module does not
20
+ assume any specific SharePoint tenant. The substring match is
21
+ tolerant of the OneDrive client variant (``General - {team}``
22
+ vs. ``{team} - General`` depending on locale).
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import os
29
+ from functools import cache
30
+ from pathlib import Path
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ #: OneDrive env vars published by the Windows client.
35
+ _ONEDRIVE_ENV_VARS = ("OneDriveCommercial", "OneDrive")
36
+
37
+
38
+ @cache
39
+ def resolve_shared_root(
40
+ *,
41
+ env_var: str,
42
+ team_marker: str,
43
+ subdir: str = "bbdd",
44
+ ) -> Path:
45
+ """Resolve the local path to a shared OneDrive folder.
46
+
47
+ Parameters
48
+ ----------
49
+ env_var
50
+ Name of the environment variable for explicit override
51
+ (e.g. ``"CNMV_BBDD_ROOT"``). Each project picks its own
52
+ to avoid collisions on shared dev machines.
53
+ team_marker
54
+ Substring matched against the SharePoint folder name.
55
+ subdir
56
+ Shared folder subdirectory under the SharePoint mount
57
+ (default ``"bbdd"``).
58
+
59
+ Returns
60
+ -------
61
+ Path
62
+ Absolute path to the existing shared folder.
63
+
64
+ Raises
65
+ ------
66
+ RuntimeError
67
+ If none of the resolution steps yields an existing
68
+ directory. The message lists every attempt so the user
69
+ can see what was tried.
70
+ """
71
+ tried: list[str] = []
72
+
73
+ override = os.environ.get(env_var)
74
+ if override:
75
+ p = Path(override)
76
+ if p.is_dir():
77
+ logger.info("shared root resolved via %s: %s", env_var, p)
78
+ return p
79
+ raise RuntimeError(
80
+ f"{env_var}={override!r} does not point to an existing directory."
81
+ )
82
+ tried.append(f"{env_var} (not set)")
83
+
84
+ for var in _ONEDRIVE_ENV_VARS:
85
+ val = os.environ.get(var)
86
+ if not val:
87
+ tried.append(f"%{var}% (not set)")
88
+ continue
89
+ root = Path(val)
90
+ if not root.is_dir():
91
+ tried.append(f"%{var}%={val!r} (not a directory)")
92
+ continue
93
+ matches = sorted(
94
+ c for c in root.glob(f"*{team_marker}*/{subdir}") if c.is_dir()
95
+ )
96
+ if matches:
97
+ chosen = matches[0]
98
+ if len(matches) > 1:
99
+ logger.warning(
100
+ "Multiple %s/ candidates under %s: %s. Picking %s.",
101
+ subdir,
102
+ root,
103
+ matches,
104
+ chosen,
105
+ )
106
+ logger.info(
107
+ "shared root auto-discovered via %%%s%%: %s", var, chosen
108
+ )
109
+ return chosen
110
+ tried.append(f"%{var}% without '*{team_marker}*/{subdir}' under {root}")
111
+
112
+ raise RuntimeError(
113
+ f"Cannot locate the {subdir}/ folder. Attempts:\n - "
114
+ + "\n - ".join(tried)
115
+ + f"\nHint: set {env_var} to the local path where OneDrive "
116
+ f"syncs the shared {subdir}/ folder."
117
+ )
118
+
119
+
120
+ class SharedDatabase:
121
+ """Per-project view over a shared OneDrive database folder.
122
+
123
+ Replaces ``ShortcutDatabase`` (``.lnk`` per user) for the
124
+ ``bbdd/`` layout: each project has a subtree with
125
+ ``raw/`` and ``processed/`` conventional subdirectories.
126
+
127
+ Parameters
128
+ ----------
129
+ project
130
+ Project name (segment under ``{shared_root}/``, e.g.
131
+ ``"cnmv"`` or ``"epf"``).
132
+ env_var
133
+ Environment variable for explicit override of the shared
134
+ root (e.g. ``"CNMV_BBDD_ROOT"``).
135
+ team_marker
136
+ Substring of the SharePoint folder name (see
137
+ :func:`resolve_shared_root`).
138
+ subdir
139
+ Shared folder subdirectory (default ``"bbdd"``).
140
+ """
141
+
142
+ def __init__(
143
+ self,
144
+ project: str,
145
+ *,
146
+ env_var: str,
147
+ team_marker: str,
148
+ subdir: str = "bbdd",
149
+ ) -> None:
150
+ self.project = project
151
+ self._env_var = env_var
152
+ self._team_marker = team_marker
153
+ self._subdir = subdir
154
+
155
+ @property
156
+ def root(self) -> Path:
157
+ """Resolved path to the shared folder."""
158
+ return resolve_shared_root(
159
+ env_var=self._env_var,
160
+ team_marker=self._team_marker,
161
+ subdir=self._subdir,
162
+ )
163
+
164
+ @property
165
+ def project_root(self) -> Path:
166
+ """``{root}/{project}/`` — subtree reserved for this project."""
167
+ return self.root / self.project
168
+
169
+ @property
170
+ def processed_root(self) -> Path:
171
+ """``{root}/{project}/processed/`` — datasets for consumers."""
172
+ return self.project_root / "processed"
173
+
174
+ @property
175
+ def raw_root(self) -> Path:
176
+ """``{root}/{project}/raw/`` — original source files."""
177
+ return self.project_root / "raw"
@@ -0,0 +1,120 @@
1
+ """YAML manifest sidecar for published parquet datasets.
2
+
3
+ Absorbed from the ``src/manifest.py`` copies in ``cnmv_python``
4
+ and ``epf``: both had identical ``write_manifest`` and a slightly
5
+ different provenance builder (kept per-project). This module
6
+ provides the shared pieces — the YAML writer and the hashing
7
+ helper.
8
+
9
+ A bare ``data.parquet`` in a shared folder is opaque to consumers:
10
+ schema and provenance are not visible without opening the file.
11
+ The manifest pins both as a YAML sidecar at write time::
12
+
13
+ bbdd/cnmv/processed/capa1/concepto=inversiones/ejercicio=2024/
14
+ data.parquet
15
+ _MANIFEST.yaml
16
+
17
+ Structure
18
+ ---------
19
+ .. code-block:: yaml
20
+
21
+ dataset: cnmv/capa1/inversiones/2024
22
+ layer: 1
23
+ row_count: 120013
24
+ parquet_file: data.parquet
25
+ generated_at: 2026-04-21T15:30:00+00:00
26
+ generated_by: cnmv_python
27
+ schema:
28
+ - {name: tipo_fichero, dtype: string[pyarrow]}
29
+ - ...
30
+ provenance: # optional, caller-supplied
31
+ ...
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import datetime as dt
37
+ import hashlib
38
+ import logging
39
+ from pathlib import Path
40
+ from typing import Any
41
+
42
+ import pandas as pd
43
+ import yaml
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+ MANIFEST_FILENAME = "_MANIFEST.yaml"
48
+
49
+
50
+ def sha256_file(path: Path, chunk: int = 1 << 20) -> str:
51
+ """Compute the SHA-256 of a file, streaming in 1 MiB chunks."""
52
+ h = hashlib.sha256()
53
+ with path.open("rb") as f:
54
+ while True:
55
+ block = f.read(chunk)
56
+ if not block:
57
+ break
58
+ h.update(block)
59
+ return h.hexdigest()
60
+
61
+
62
+ def write_manifest(
63
+ parquet_path: Path,
64
+ df: pd.DataFrame,
65
+ dataset: str,
66
+ layer: int,
67
+ provenance: dict[str, Any] | None = None,
68
+ generated_by: str = "",
69
+ ) -> Path:
70
+ """Write ``_MANIFEST.yaml`` next to *parquet_path*.
71
+
72
+ Parameters
73
+ ----------
74
+ parquet_path
75
+ Full path to the ``data.parquet`` that was just written.
76
+ The manifest is placed in the same directory.
77
+ df
78
+ DataFrame used only to derive ``schema`` and ``row_count``
79
+ — not persisted.
80
+ dataset
81
+ Canonical dataset name (e.g. ``"cnmv/capa1/inversiones/2024"``).
82
+ layer
83
+ Convention: ``0`` = raw, ``1`` = processed.
84
+ provenance
85
+ Optional project-specific dict (source files, hashes, ...).
86
+ Omitted from the YAML when ``None``.
87
+ generated_by
88
+ Identifier of the generating project (e.g. ``"cnmv_python"``).
89
+
90
+ Returns
91
+ -------
92
+ Path
93
+ The path of the written manifest.
94
+ """
95
+ payload: dict[str, Any] = {
96
+ "dataset": dataset,
97
+ "layer": layer,
98
+ "row_count": int(len(df)),
99
+ "parquet_file": parquet_path.name,
100
+ "generated_at": dt.datetime.now(dt.UTC).isoformat(timespec="seconds"),
101
+ "generated_by": generated_by,
102
+ "schema": [
103
+ {"name": str(col), "dtype": str(df[col].dtype)}
104
+ for col in df.columns
105
+ ],
106
+ }
107
+ if provenance:
108
+ payload["provenance"] = provenance
109
+
110
+ out_path = parquet_path.parent / MANIFEST_FILENAME
111
+ with out_path.open("w", encoding="utf-8") as f:
112
+ yaml.safe_dump(
113
+ payload,
114
+ f,
115
+ sort_keys=False,
116
+ allow_unicode=True,
117
+ default_flow_style=False,
118
+ )
119
+ logger.debug("Manifest written: %s", out_path)
120
+ return out_path
@@ -45,6 +45,21 @@ def sum_rule(output: str, sources: list[str]) -> TransformationRule:
45
45
  )
46
46
 
47
47
 
48
+ def mean_rule(output: str, sources: list[str]) -> TransformationRule:
49
+ """Row-wise arithmetic mean of multiple columns.
50
+
51
+ Equivalent to ``=AVERAGE(...)`` in Excel: NaN components
52
+ are skipped (not treated as zero), so a row with any
53
+ non-NaN value yields the mean of whatever is present.
54
+ A row of all NaN yields NaN.
55
+ """
56
+ return TransformationRule(
57
+ output_name=output,
58
+ dependencies=list(sources),
59
+ compute=lambda df, cols=list(sources): df[cols].mean(axis=1),
60
+ )
61
+
62
+
48
63
  def ratio_rule(
49
64
  output: str, numerator: str, denominator: str
50
65
  ) -> TransformationRule:
@@ -333,6 +348,7 @@ FACTORIES: dict[
333
348
  ] = {
334
349
  "scale": scale_rule,
335
350
  "sum": sum_rule,
351
+ "mean": mean_rule,
336
352
  "ratio": ratio_rule,
337
353
  "difference": difference_rule,
338
354
  "inverse": inverse_rule,