tesorotools-python 0.0.31__tar.gz → 0.0.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/PKG-INFO +1 -1
  2. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/pyproject.toml +1 -1
  3. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/database/__init__.py +3 -0
  4. tesorotools_python-0.0.32/src/tesorotools/database/shared.py +177 -0
  5. tesorotools_python-0.0.32/src/tesorotools/manifest.py +120 -0
  6. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/.gitignore +0 -0
  7. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/__init__.py +0 -0
  8. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/artists/__init__.py +0 -0
  9. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/artists/barh.md +0 -0
  10. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/artists/barh_plot.py +0 -0
  11. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/artists/line_plot.py +0 -0
  12. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/artists/stacked.py +0 -0
  13. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/artists/table.py +0 -0
  14. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/artists/type_curve.py +0 -0
  15. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/README.md +0 -0
  16. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Black.otf +0 -0
  17. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Bold.otf +0 -0
  18. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Extrabold.otf +0 -0
  19. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Extralight.otf +0 -0
  20. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Light.otf +0 -0
  21. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Medium.otf +0 -0
  22. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Regular.otf +0 -0
  23. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/CabinetGrotesk-Thin.otf +0 -0
  24. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/fonts/README.md +0 -0
  25. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/plots.yaml +0 -0
  26. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/assets/tesoro.mplstyle +0 -0
  27. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/convert.py +0 -0
  28. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/README.md +0 -0
  29. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/__init__.py +0 -0
  30. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/debug.py +0 -0
  31. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/data_sources/lseg.py +0 -0
  32. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/database/local.py +0 -0
  33. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/database/push.py +0 -0
  34. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/dependencies/__init__.py +0 -0
  35. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/dependencies/node.py +0 -0
  36. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/dependencies/resolution.py +0 -0
  37. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/main.py +0 -0
  38. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/offsets/__init__.py +0 -0
  39. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/offsets/offsets.py +0 -0
  40. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/offsets/outliers.py +0 -0
  41. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/__init__.py +0 -0
  42. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/diagnose.py +0 -0
  43. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/engine.py +0 -0
  44. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/pipeline/rules.py +0 -0
  45. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/providers/__init__.py +0 -0
  46. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/providers/base.py +0 -0
  47. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/providers/bde.py +0 -0
  48. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/providers/ecb.py +0 -0
  49. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/py.typed +0 -0
  50. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/__init__.py +0 -0
  51. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/__init__.py +0 -0
  52. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/content.py +0 -0
  53. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/images.py +0 -0
  54. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/section.py +0 -0
  55. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/subtitle.py +0 -0
  56. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/table.py +0 -0
  57. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/text.py +0 -0
  58. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/content/title.py +0 -0
  59. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/render/report.py +0 -0
  60. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/__init__.py +0 -0
  61. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/config.py +0 -0
  62. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/format.py +0 -0
  63. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/globals.py +0 -0
  64. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/matplotlib.py +0 -0
  65. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/series.py +0 -0
  66. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/shortcuts.py +0 -0
  67. {tesorotools_python-0.0.31 → tesorotools_python-0.0.32}/src/tesorotools/utils/template.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tesorotools-python
3
- Version: 0.0.31
3
+ Version: 0.0.32
4
4
  Requires-Python: >=3.13
5
5
  Requires-Dist: babel>=2.17
6
6
  Requires-Dist: eikon>=1.1
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "tesorotools-python"
3
3
  requires-python = ">=3.13"
4
- version = "0.0.31"
4
+ version = "0.0.32"
5
5
  dependencies = [
6
6
  # database and ORM
7
7
  "psycopg[binary]>=3.1",
@@ -4,8 +4,11 @@ Contiene las utilidades para interactuar con bases de datos locales y remotas.
4
4
  """
5
5
 
6
6
  from tesorotools.database.local import LocalDatabase, ShortcutDatabase
7
+ from tesorotools.database.shared import SharedDatabase, resolve_shared_root
7
8
 
8
9
  __all__ = [
9
10
  "LocalDatabase",
11
+ "SharedDatabase",
10
12
  "ShortcutDatabase",
13
+ "resolve_shared_root",
11
14
  ]
@@ -0,0 +1,177 @@
1
+ """Resolver for shared OneDrive folders used as a team database.
2
+
3
+ Absorbed from the ``src/bbdd.py`` copies in ``cnmv_python`` and
4
+ ``epf`` — both had the same logic with only a per-project env var
5
+ name differing. The old ``ShortcutDatabase`` (Windows ``.lnk``
6
+ per user) is superseded by this resolver for the ``bbdd/``
7
+ layout: each member of the team gets the same path auto-discovered
8
+ from their OneDrive mount.
9
+
10
+ Resolution order
11
+ ----------------
12
+ 1. An explicit override environment variable (``env_var`` argument,
13
+ e.g. ``"CNMV_BBDD_ROOT"``). Used for local dev, CI and tests.
14
+ 2. ``%OneDriveCommercial%`` + glob for a SharePoint folder whose
15
+ name contains ``team_marker`` and that contains ``subdir``.
16
+ 3. ``%OneDrive%`` as a fallback.
17
+ 4. ``RuntimeError`` with a diagnostic listing every attempt.
18
+
19
+ Callers supply their own ``team_marker`` — this module does not
20
+ assume any specific SharePoint tenant. The substring match is
21
+ tolerant of the OneDrive client variant (``General - {team}``
22
+ vs. ``{team} - General`` depending on locale).
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import os
29
+ from functools import cache
30
+ from pathlib import Path
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ #: OneDrive env vars published by the Windows client.
35
+ _ONEDRIVE_ENV_VARS = ("OneDriveCommercial", "OneDrive")
36
+
37
+
38
+ @cache
39
+ def resolve_shared_root(
40
+ *,
41
+ env_var: str,
42
+ team_marker: str,
43
+ subdir: str = "bbdd",
44
+ ) -> Path:
45
+ """Resolve the local path to a shared OneDrive folder.
46
+
47
+ Parameters
48
+ ----------
49
+ env_var
50
+ Name of the environment variable for explicit override
51
+ (e.g. ``"CNMV_BBDD_ROOT"``). Each project picks its own
52
+ to avoid collisions on shared dev machines.
53
+ team_marker
54
+ Substring matched against the SharePoint folder name.
55
+ subdir
56
+ Shared folder subdirectory under the SharePoint mount
57
+ (default ``"bbdd"``).
58
+
59
+ Returns
60
+ -------
61
+ Path
62
+ Absolute path to the existing shared folder.
63
+
64
+ Raises
65
+ ------
66
+ RuntimeError
67
+ If none of the resolution steps yields an existing
68
+ directory. The message lists every attempt so the user
69
+ can see what was tried.
70
+ """
71
+ tried: list[str] = []
72
+
73
+ override = os.environ.get(env_var)
74
+ if override:
75
+ p = Path(override)
76
+ if p.is_dir():
77
+ logger.info("shared root resolved via %s: %s", env_var, p)
78
+ return p
79
+ raise RuntimeError(
80
+ f"{env_var}={override!r} does not point to an existing directory."
81
+ )
82
+ tried.append(f"{env_var} (not set)")
83
+
84
+ for var in _ONEDRIVE_ENV_VARS:
85
+ val = os.environ.get(var)
86
+ if not val:
87
+ tried.append(f"%{var}% (not set)")
88
+ continue
89
+ root = Path(val)
90
+ if not root.is_dir():
91
+ tried.append(f"%{var}%={val!r} (not a directory)")
92
+ continue
93
+ matches = sorted(
94
+ c for c in root.glob(f"*{team_marker}*/{subdir}") if c.is_dir()
95
+ )
96
+ if matches:
97
+ chosen = matches[0]
98
+ if len(matches) > 1:
99
+ logger.warning(
100
+ "Multiple %s/ candidates under %s: %s. Picking %s.",
101
+ subdir,
102
+ root,
103
+ matches,
104
+ chosen,
105
+ )
106
+ logger.info(
107
+ "shared root auto-discovered via %%%s%%: %s", var, chosen
108
+ )
109
+ return chosen
110
+ tried.append(f"%{var}% without '*{team_marker}*/{subdir}' under {root}")
111
+
112
+ raise RuntimeError(
113
+ f"Cannot locate the {subdir}/ folder. Attempts:\n - "
114
+ + "\n - ".join(tried)
115
+ + f"\nHint: set {env_var} to the local path where OneDrive "
116
+ f"syncs the shared {subdir}/ folder."
117
+ )
118
+
119
+
120
+ class SharedDatabase:
121
+ """Per-project view over a shared OneDrive database folder.
122
+
123
+ Replaces ``ShortcutDatabase`` (``.lnk`` per user) for the
124
+ ``bbdd/`` layout: each project has a subtree with
125
+ ``raw/`` and ``processed/`` conventional subdirectories.
126
+
127
+ Parameters
128
+ ----------
129
+ project
130
+ Project name (segment under ``{shared_root}/``, e.g.
131
+ ``"cnmv"`` or ``"epf"``).
132
+ env_var
133
+ Environment variable for explicit override of the shared
134
+ root (e.g. ``"CNMV_BBDD_ROOT"``).
135
+ team_marker
136
+ Substring of the SharePoint folder name (see
137
+ :func:`resolve_shared_root`).
138
+ subdir
139
+ Shared folder subdirectory (default ``"bbdd"``).
140
+ """
141
+
142
+ def __init__(
143
+ self,
144
+ project: str,
145
+ *,
146
+ env_var: str,
147
+ team_marker: str,
148
+ subdir: str = "bbdd",
149
+ ) -> None:
150
+ self.project = project
151
+ self._env_var = env_var
152
+ self._team_marker = team_marker
153
+ self._subdir = subdir
154
+
155
+ @property
156
+ def root(self) -> Path:
157
+ """Resolved path to the shared folder."""
158
+ return resolve_shared_root(
159
+ env_var=self._env_var,
160
+ team_marker=self._team_marker,
161
+ subdir=self._subdir,
162
+ )
163
+
164
+ @property
165
+ def project_root(self) -> Path:
166
+ """``{root}/{project}/`` — subtree reserved for this project."""
167
+ return self.root / self.project
168
+
169
+ @property
170
+ def processed_root(self) -> Path:
171
+ """``{root}/{project}/processed/`` — datasets for consumers."""
172
+ return self.project_root / "processed"
173
+
174
+ @property
175
+ def raw_root(self) -> Path:
176
+ """``{root}/{project}/raw/`` — original source files."""
177
+ return self.project_root / "raw"
@@ -0,0 +1,120 @@
1
+ """YAML manifest sidecar for published parquet datasets.
2
+
3
+ Absorbed from the ``src/manifest.py`` copies in ``cnmv_python``
4
+ and ``epf``: both had identical ``write_manifest`` and a slightly
5
+ different provenance builder (kept per-project). This module
6
+ provides the shared pieces — the YAML writer and the hashing
7
+ helper.
8
+
9
+ A bare ``data.parquet`` in a shared folder is opaque to consumers:
10
+ schema and provenance are not visible without opening the file.
11
+ The manifest pins both as a YAML sidecar at write time::
12
+
13
+ bbdd/cnmv/processed/capa1/concepto=inversiones/ejercicio=2024/
14
+ data.parquet
15
+ _MANIFEST.yaml
16
+
17
+ Structure
18
+ ---------
19
+ .. code-block:: yaml
20
+
21
+ dataset: cnmv/capa1/inversiones/2024
22
+ layer: 1
23
+ row_count: 120013
24
+ parquet_file: data.parquet
25
+ generated_at: 2026-04-21T15:30:00+00:00
26
+ generated_by: cnmv_python
27
+ schema:
28
+ - {name: tipo_fichero, dtype: string[pyarrow]}
29
+ - ...
30
+ provenance: # optional, caller-supplied
31
+ ...
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import datetime as dt
37
+ import hashlib
38
+ import logging
39
+ from pathlib import Path
40
+ from typing import Any
41
+
42
+ import pandas as pd
43
+ import yaml
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+ MANIFEST_FILENAME = "_MANIFEST.yaml"
48
+
49
+
50
+ def sha256_file(path: Path, chunk: int = 1 << 20) -> str:
51
+ """Compute the SHA-256 of a file, streaming in 1 MiB chunks."""
52
+ h = hashlib.sha256()
53
+ with path.open("rb") as f:
54
+ while True:
55
+ block = f.read(chunk)
56
+ if not block:
57
+ break
58
+ h.update(block)
59
+ return h.hexdigest()
60
+
61
+
62
+ def write_manifest(
63
+ parquet_path: Path,
64
+ df: pd.DataFrame,
65
+ dataset: str,
66
+ layer: int,
67
+ provenance: dict[str, Any] | None = None,
68
+ generated_by: str = "",
69
+ ) -> Path:
70
+ """Write ``_MANIFEST.yaml`` next to *parquet_path*.
71
+
72
+ Parameters
73
+ ----------
74
+ parquet_path
75
+ Full path to the ``data.parquet`` that was just written.
76
+ The manifest is placed in the same directory.
77
+ df
78
+ DataFrame used only to derive ``schema`` and ``row_count``
79
+ — not persisted.
80
+ dataset
81
+ Canonical dataset name (e.g. ``"cnmv/capa1/inversiones/2024"``).
82
+ layer
83
+ Convention: ``0`` = raw, ``1`` = processed.
84
+ provenance
85
+ Optional project-specific dict (source files, hashes, ...).
86
+ Omitted from the YAML when ``None``.
87
+ generated_by
88
+ Identifier of the generating project (e.g. ``"cnmv_python"``).
89
+
90
+ Returns
91
+ -------
92
+ Path
93
+ The path of the written manifest.
94
+ """
95
+ payload: dict[str, Any] = {
96
+ "dataset": dataset,
97
+ "layer": layer,
98
+ "row_count": int(len(df)),
99
+ "parquet_file": parquet_path.name,
100
+ "generated_at": dt.datetime.now(dt.UTC).isoformat(timespec="seconds"),
101
+ "generated_by": generated_by,
102
+ "schema": [
103
+ {"name": str(col), "dtype": str(df[col].dtype)}
104
+ for col in df.columns
105
+ ],
106
+ }
107
+ if provenance:
108
+ payload["provenance"] = provenance
109
+
110
+ out_path = parquet_path.parent / MANIFEST_FILENAME
111
+ with out_path.open("w", encoding="utf-8") as f:
112
+ yaml.safe_dump(
113
+ payload,
114
+ f,
115
+ sort_keys=False,
116
+ allow_unicode=True,
117
+ default_flow_style=False,
118
+ )
119
+ logger.debug("Manifest written: %s", out_path)
120
+ return out_path