tesorotools-python 0.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tesorotools/__init__.py +0 -0
  2. tesorotools/artists/__init__.py +5 -0
  3. tesorotools/artists/barh_plot.py +310 -0
  4. tesorotools/artists/line_plot.py +114 -0
  5. tesorotools/artists/table.py +199 -0
  6. tesorotools/artists/type_curve.py +216 -0
  7. tesorotools/convert.py +93 -0
  8. tesorotools/data_sources/__init__.py +0 -0
  9. tesorotools/data_sources/debug.py +26 -0
  10. tesorotools/data_sources/eikon.py +117 -0
  11. tesorotools/database/__init__.py +0 -0
  12. tesorotools/database/push.py +70 -0
  13. tesorotools/dependencies/__init__.py +0 -0
  14. tesorotools/dependencies/functions.py +11 -0
  15. tesorotools/dependencies/node.py +34 -0
  16. tesorotools/dependencies/resolution.py +118 -0
  17. tesorotools/main.py +37 -0
  18. tesorotools/offsets/__init__.py +0 -0
  19. tesorotools/offsets/offsets.py +439 -0
  20. tesorotools/offsets/outliers.py +15 -0
  21. tesorotools/render/__init__.py +11 -0
  22. tesorotools/render/content/__init__.py +0 -0
  23. tesorotools/render/content/content.py +17 -0
  24. tesorotools/render/content/images.py +147 -0
  25. tesorotools/render/content/section.py +53 -0
  26. tesorotools/render/content/table.py +283 -0
  27. tesorotools/render/headline.py +40 -0
  28. tesorotools/render/introduction.py +49 -0
  29. tesorotools/render/report.py +29 -0
  30. tesorotools/utils/__init__.py +0 -0
  31. tesorotools/utils/config.py +35 -0
  32. tesorotools/utils/globals.py +12 -0
  33. tesorotools/utils/matplotlib.py +38 -0
  34. tesorotools/utils/series.py +40 -0
  35. tesorotools/utils/template.py +126 -0
  36. tesorotools_python-0.0.0.dist-info/METADATA +13 -0
  37. tesorotools_python-0.0.0.dist-info/RECORD +38 -0
  38. tesorotools_python-0.0.0.dist-info/WHEEL +5 -0
@@ -0,0 +1,216 @@
1
+ # pending to assure stylesheet data and fonts are only loaded once
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ from matplotlib.ticker import FuncFormatter
9
+ from pandas import Timestamp
10
+
11
+ from tesorotools.utils.config import merge
12
+ from tesorotools.utils.globals import DEBUG
13
+ from tesorotools.utils.matplotlib import (
14
+ PLOT_CONFIG,
15
+ format_annotation,
16
+ load_fonts,
17
+ )
18
+
19
+ TYPE_CURVE_CONFIG: dict[str, Any] = PLOT_CONFIG["type_curve"]
20
+ AX_CONFIG: dict[str, Any] = PLOT_CONFIG["ax"]
21
+ FIG_CONFIG: dict[str, Any] = PLOT_CONFIG["figure"]
22
+
23
+ load_fonts()
24
+
25
+
26
+ def _style_spines(
27
+ ax: plt.Axes,
28
+ decimals: int,
29
+ units: str,
30
+ *,
31
+ color: str,
32
+ linewidth: str,
33
+ ):
34
+ ax.grid(visible=True, axis="y")
35
+ for spine in ax.spines.values():
36
+ spine.set_color(color)
37
+ spine.set_linewidth(linewidth)
38
+ ax.yaxis.tick_right()
39
+ ax.yaxis.set_major_formatter(
40
+ FuncFormatter(lambda y, _: format_annotation(y, decimals, units))
41
+ )
42
+ ax.tick_params(axis="both", which="major")
43
+ ax.set_xticks(
44
+ ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right"
45
+ )
46
+ for tick in ax.get_xticklines():
47
+ tick.set_markeredgecolor(color)
48
+ for tick in ax.get_yticklines():
49
+ tick.set_markeredgecolor(color)
50
+
51
+
52
+ def _style_baseline(ax: plt.Axes, **baseline_config):
53
+ color: str = baseline_config["color"]
54
+ bottom_lim, top_lim = ax.get_ylim()
55
+ ax.set_ylim(bottom=min(0, bottom_lim), top=max(0, top_lim))
56
+ bottom_lim, top_lim = ax.get_ylim()
57
+ if bottom_lim == 0:
58
+ ax.spines["bottom"].set_edgecolor(color)
59
+ elif top_lim == 0:
60
+ ax.spines["top"].set_edgecolor(color)
61
+ else:
62
+ ax.axhline(y=0, **baseline_config)
63
+
64
+
65
+ def _format_data(data: pd.DataFrame) -> dict[str, Any]:
66
+ # metadata
67
+ date_index: pd.DatetimeIndex = data.index
68
+ current_date: Timestamp = date_index.max()
69
+ current_year: int = current_date.year
70
+ last_year: int = (current_date - pd.DateOffset(years=1)).year
71
+
72
+ # current data
73
+ current_data: pd.Series = data.loc[current_date, :]
74
+ current_data.name = "current_data"
75
+
76
+ # current year
77
+ current_year_data: pd.DataFrame = data.loc[
78
+ date_index.year == current_year, :
79
+ ]
80
+ current_year_max: pd.Series = current_year_data.max()
81
+ current_year_max.name = "current_year_max"
82
+ current_year_min: pd.Series = current_year_data.min()
83
+ current_year_min.name = "current_year_min"
84
+
85
+ # last year
86
+ last_year_data: pd.DataFrame = data.loc[date_index.year == last_year, :]
87
+ last_year_max: pd.Series = last_year_data.max()
88
+ last_year_max.name = "last_year_max"
89
+ last_year_min: pd.Series = last_year_data.min()
90
+ last_year_min.name = "last_year_min"
91
+
92
+ formatted_data: pd.DataFrame = pd.concat(
93
+ [
94
+ last_year_max,
95
+ last_year_min,
96
+ current_year_max,
97
+ current_year_min,
98
+ current_data,
99
+ ],
100
+ axis=1,
101
+ )
102
+
103
+ return {
104
+ "data": formatted_data,
105
+ "current_date": current_date.strftime("%d/%m/%Y"),
106
+ "current_year": current_year,
107
+ "last_year": last_year,
108
+ }
109
+
110
+
111
+ def _plot_current_data(
112
+ ax: plt.Axes,
113
+ data: pd.DataFrame,
114
+ date_fmt: str,
115
+ *,
116
+ linewidth: float,
117
+ marker: str,
118
+ points_to_mark: list[str],
119
+ color: str,
120
+ decimals: int,
121
+ units: str,
122
+ ):
123
+ data.plot(
124
+ ax=ax,
125
+ color=color,
126
+ linewidth=linewidth,
127
+ label=date_fmt,
128
+ )
129
+ for point in points_to_mark:
130
+ value = data.loc[point]
131
+ ax.plot(
132
+ point,
133
+ value,
134
+ marker=marker,
135
+ color=color,
136
+ )
137
+ ax.annotate(
138
+ format_annotation(value, decimals, units),
139
+ (point, value),
140
+ textcoords="offset points",
141
+ xytext=(0, 10),
142
+ ha="center",
143
+ )
144
+
145
+
146
+ def plot_type_curve(
147
+ data: pd.DataFrame,
148
+ out_file: Path,
149
+ **config,
150
+ ):
151
+ config: dict[str, Any] = merge(config, TYPE_CURVE_CONFIG)
152
+
153
+ if config["yaxis"]["units"] == "p.b.":
154
+ data = data * 100
155
+
156
+ formatted_assets: dict[str, Any] = _format_data(data)
157
+ formatted_data: pd.DataFrame = formatted_assets["data"]
158
+ due_index: pd.Index = formatted_data.index
159
+
160
+ fig = plt.figure(**FIG_CONFIG)
161
+ ax: plt.Axes = fig.add_subplot()
162
+
163
+ last_config: dict[str, Any] = config["last"]
164
+ ax.fill_between(
165
+ due_index,
166
+ formatted_data["last_year_min"],
167
+ formatted_data["last_year_max"],
168
+ alpha=last_config["alpha"],
169
+ color=last_config["color"],
170
+ edgecolor=None,
171
+ label=f"Rango {formatted_assets['last_year']}",
172
+ )
173
+
174
+ current_config: dict[str, Any] = config["current"]
175
+ ax.fill_between(
176
+ due_index,
177
+ formatted_data["current_year_min"],
178
+ formatted_data["current_year_max"],
179
+ alpha=current_config["alpha"],
180
+ color=current_config["color"],
181
+ edgecolor=None,
182
+ label=f"Rango {formatted_assets['current_year']}",
183
+ )
184
+
185
+ _plot_current_data(
186
+ ax,
187
+ formatted_data["current_data"],
188
+ formatted_assets["current_date"],
189
+ **config["line"],
190
+ )
191
+ _style_spines(ax, **config["yaxis"], **AX_CONFIG["spines"])
192
+ _style_baseline(ax, **AX_CONFIG["baseline"])
193
+ ax.legend(
194
+ loc="upper center",
195
+ bbox_to_anchor=(0.5, config["legend_sep"]),
196
+ ncol=3,
197
+ )
198
+ fig.savefig(out_file)
199
+
200
+
201
+ # data is expected to be a simple time series data, columns are series and rows represents dates
202
+ def plot_type_curves(data: pd.DataFrame, config_dicts: dict[str, Any]):
203
+ for name, config in config_dicts.items():
204
+ if not name.startswith("."): # aux entries
205
+ series: dict[str, str] = config["series"]
206
+ if len(series) < 2:
207
+ raise ValueError(
208
+ f"In plot {name}: A type curve must have at least two due periods. Given periods: {series.keys()}"
209
+ )
210
+ trimmed_data: pd.DataFrame = data.loc[:, series.keys()]
211
+ trimmed_data: pd.DataFrame = trimmed_data.rename(columns=series)
212
+ plot_type_curve(
213
+ data=trimmed_data,
214
+ out_file=DEBUG / "type_curve" / f"{name}.png",
215
+ **config,
216
+ )
tesorotools/convert.py ADDED
@@ -0,0 +1,93 @@
1
+ # this file may migrate to the utils package
2
+
3
+ from pathlib import Path
4
+
5
+ import pandas as pd
6
+
7
+ from tesorotools.artists.barh_plot import plot_barh_charts_from_flash
8
+ from tesorotools.artists.line_plot import plot_line_charts
9
+ from tesorotools.artists.table import generate_tables_from_flash
10
+ from tesorotools.artists.type_curve import plot_type_curves
11
+ from tesorotools.dependencies.resolution import (
12
+ compute_derivate_series,
13
+ concat_derivate_series,
14
+ resolve_series,
15
+ )
16
+ from tesorotools.offsets.offsets import trim
17
+ from tesorotools.utils.config import read_config
18
+
19
+
20
+ def index_replace(old: str, new: str, index: pd.MultiIndex) -> pd.MultiIndex:
21
+ return pd.MultiIndex.from_tuples(
22
+ [
23
+ tuple(
24
+ [
25
+ x.replace(old, new) if isinstance(x, str) else x
26
+ for x in tuple_index
27
+ ]
28
+ )
29
+ for tuple_index in index
30
+ ]
31
+ )
32
+
33
+
34
+ def cheap_convert(old_file: Path) -> pd.DataFrame:
35
+ old = pd.read_feather(old_file)
36
+
37
+ trimmed = old.loc[(slice(None), "no", "absolute", "value"), :].copy()
38
+
39
+ old = old.loc[
40
+ (slice(None), ["day", "mtd", "week", "year"], slice(None), slice(None)),
41
+ :,
42
+ ]
43
+ old.index = index_replace("day", "bday", old.index)
44
+ old.index = index_replace("week", "ftd", old.index)
45
+ old.index = index_replace("year", "ytd", old.index)
46
+ old.index = index_replace("roll_var", "roll_std", old.index)
47
+
48
+ trimmed.index = index_replace("absolute", "no", trimmed.index)
49
+
50
+ new = pd.concat([old, trimmed])
51
+ return new
52
+
53
+
54
+ if __name__ == "__main__":
55
+ preprocess = False
56
+ barh_config_dicts = read_config(Path("examples") / "barh_plots.yaml")
57
+ line_config_dicts = read_config(Path("examples") / "line_plots.yaml")
58
+ type_config_dicts = read_config(Path("examples") / "type_curves.yaml")
59
+ table_config_dicts = read_config(Path("examples") / "tables.yaml")
60
+
61
+ if preprocess:
62
+ old_file: Path = Path("debug") / "flash.feather"
63
+ dependencies_cfg = read_config(Path("examples") / "dependencies.yaml")
64
+ flash: pd.DataFrame = cheap_convert(old_file)
65
+ resolved_dict = resolve_series(
66
+ [
67
+ barh_config_dicts,
68
+ line_config_dicts,
69
+ type_config_dicts,
70
+ table_config_dicts,
71
+ ],
72
+ dependencies_cfg,
73
+ )
74
+ independent_full_df = flash.loc[:, list(resolved_dict["independent"])]
75
+ independent_trimmed_df = trim(independent_full_df)
76
+ dependent_trimmed_df = compute_derivate_series(
77
+ resolved_dict["dependent"], independent_trimmed_df
78
+ )
79
+ offsets_config = read_config(Path("examples") / "offsets.yaml")
80
+ full_df = concat_derivate_series(
81
+ independent_full_df,
82
+ dependent_trimmed_df,
83
+ offsets_config,
84
+ force_trim=True,
85
+ )
86
+ full_df.to_feather("derivates.feather")
87
+
88
+ full_df = pd.read_feather("derivates.feather")
89
+ trimmed_df = trim(full_df)
90
+ plot_barh_charts_from_flash(full_df, barh_config_dicts)
91
+ plot_line_charts(trimmed_df, line_config_dicts)
92
+ plot_type_curves(trimmed_df, type_config_dicts)
93
+ generate_tables_from_flash(full_df, table_config_dicts)
File without changes
@@ -0,0 +1,26 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from tesorotools.utils.config import read_config
5
+ from tesorotools.utils.globals import DEBUG, EXAMPLES
6
+
7
+ # just mocking
8
+ CATALOG: dict[str, str] = read_config(EXAMPLES / "data.yaml")["debug"]
9
+
10
+
11
+ def get_series(start: str, end: str, series: list[str]) -> pd.DataFrame:
12
+ # series is a list of agnostic ids
13
+ specific_ids = []
14
+ for agnostic_id in series:
15
+ specific_id: str | None = CATALOG.get(agnostic_id, None)
16
+ if specific_id is None:
17
+ raise KeyError(f"{agnostic_id} not found in debug configuration")
18
+ specific_ids.append(specific_id)
19
+
20
+ index: pd.DatetimeIndex = pd.date_range(start=start, end=end).sort_values()
21
+ df: pd.DataFrame = pd.DataFrame(
22
+ data=np.random.randn(len(index), len(series)),
23
+ index=index,
24
+ columns=series,
25
+ )
26
+ return df
@@ -0,0 +1,117 @@
1
+ import time
2
+ from pathlib import Path
3
+
4
+ import eikon as ek
5
+ import pandas as pd
6
+
7
+
8
+ def get_series(
9
+ api_key: str,
10
+ series_id_list: list[str],
11
+ start_date: str,
12
+ end_date: str,
13
+ freq: str = "B",
14
+ fields: list[str] | None = None,
15
+ cooldown: int = 60,
16
+ datapoint_limit: int = 2_000,
17
+ cache_path: Path | None = None,
18
+ ) -> pd.DataFrame:
19
+ """Downloads data from eikon given that tou have a valid API key"""
20
+ ek.set_app_key(api_key)
21
+ fields = (
22
+ ["TIMESTAMP", "CLOSE", "CF_LAST", "CF_YIELD"]
23
+ if fields is None
24
+ else fields
25
+ )
26
+
27
+ dates_list: list[str] = list(
28
+ pd.date_range(start=start_date, end=end_date, freq=freq).astype("str")
29
+ )
30
+ partial_data: list[pd.DataFrame] = []
31
+ download_step: int = datapoint_limit // (
32
+ len(series_id_list) * (len(fields) - 1)
33
+ )
34
+ downloaded_dates: int = 0
35
+ while downloaded_dates < len(dates_list):
36
+ dates_to_download = dates_list[
37
+ downloaded_dates : downloaded_dates + download_step
38
+ ]
39
+ start = dates_to_download[0]
40
+ end = dates_to_download[-1]
41
+ cache_file_path: Path = (
42
+ cache_path / f"from_{start}_to_{end}.csv"
43
+ if cache_path is not None
44
+ else None
45
+ )
46
+ if (cache_file_path is None) or (not cache_file_path.exists()):
47
+ data: pd.DataFrame = block_download(
48
+ series_id_list,
49
+ start_date=start,
50
+ end_date=end,
51
+ freq=freq,
52
+ fields=fields,
53
+ cooldown=cooldown,
54
+ file_path=cache_file_path,
55
+ debug=True,
56
+ )
57
+ if cache_file_path is None:
58
+ partial_data.append(data)
59
+ if downloaded_dates + download_step < len(dates_list):
60
+ print(f"Waiting {cooldown} seconds for Eikon to cool down...")
61
+ time.sleep(cooldown)
62
+ downloaded_dates += download_step
63
+ data = concat_partial_data(cache_path, partial_data)
64
+ return data
65
+
66
+
67
+ def block_download(
68
+ series_id_list: list[str],
69
+ start_date: str,
70
+ end_date: str,
71
+ freq: str = "B",
72
+ fields: list[str] | None = None,
73
+ cooldown: int = 60,
74
+ file_path: Path | None = None,
75
+ debug: bool = False,
76
+ ):
77
+ interval = "daily" if freq == "B" else freq
78
+
79
+ while True:
80
+ try:
81
+ data: pd.DataFrame | None = ek.get_timeseries(
82
+ rics=series_id_list,
83
+ start_date=start_date,
84
+ end_date=end_date,
85
+ fields=fields,
86
+ interval=interval,
87
+ debug=debug,
88
+ )
89
+ if data is None:
90
+ raise ek.EikonError(
91
+ code=404, message="Service temporarily unavailable"
92
+ )
93
+ data = data.drop_duplicates()
94
+ data = data.sort_index()
95
+ if len(data.columns) == 1:
96
+ data.columns = series_id_list
97
+ if file_path is not None:
98
+ data.to_csv(file_path)
99
+ break
100
+ except ek.EikonError as e:
101
+ print(f"Eikon error: {e}")
102
+ print("This is probably not our fault")
103
+ print(f"Waiting {cooldown} seconds for Eikon to cool down...")
104
+ time.sleep(cooldown)
105
+ return data
106
+
107
+
108
+ def concat_partial_data(
109
+ cache_path: Path, partial_data: list[pd.DataFrame]
110
+ ) -> pd.DataFrame:
111
+ dfs = partial_data
112
+ if cache_path is not None:
113
+ for chunk in cache_path.iterdir():
114
+ df = pd.read_csv(chunk, index_col="Date")
115
+ dfs.append(df)
116
+ full = pd.concat(dfs)
117
+ return full
File without changes
@@ -0,0 +1,70 @@
1
+ import json
2
+
3
+ import pandas as pd
4
+ from sqlalchemy import create_engine
5
+
6
+
7
+ def get_connection_string(
8
+ engine: str,
9
+ driver: str,
10
+ username: str,
11
+ password: str,
12
+ host: str,
13
+ database: str,
14
+ ) -> str:
15
+ return f"{engine}+{driver}://{username}:{password}@{host}/{database}"
16
+
17
+
18
+ def flash_to_database_format(data: pd.DataFrame) -> pd.DataFrame:
19
+ """Transform the data in *flash* format to data in *database* format
20
+
21
+ This operation takes a little while"""
22
+
23
+ data.columns.name = "name"
24
+ stacked_data = data.stack().to_frame(name="value")
25
+ database_data = stacked_data.reset_index()
26
+ database_data["dimensions"] = database_data.apply(
27
+ lambda row: json.dumps(
28
+ {
29
+ "offset": row["offset"],
30
+ "difference_type": row["difference_type"],
31
+ "stat": row["stat"],
32
+ }
33
+ ),
34
+ axis=1,
35
+ )
36
+ database_data = database_data.drop(
37
+ columns=["offset", "difference_type", "stat"]
38
+ )
39
+ database_data["value_meta"] = pd.NA
40
+ print(database_data)
41
+ return database_data
42
+
43
+
44
+ def database_to_flash_format():
45
+ pass
46
+
47
+
48
+ def push_to_database(data: pd.DataFrame, conn_string: str, table: str) -> None:
49
+ engine = create_engine(url=conn_string)
50
+ data.to_sql(
51
+ name=table, con=engine, if_exists="append", chunksize=1000, index=False
52
+ )
53
+
54
+
55
+ # this may be just as a datasource (maybe a common protocol?)
56
+ # check the api long video for further reference
57
+ def pull_from_database(
58
+ conn_string: str, start: str, end: str, series: list[str]
59
+ ):
60
+ # just testing...
61
+ pd.read_sql_query(
62
+ """
63
+ SELECT date, value
64
+ FROM measurements
65
+ WHERE
66
+ date BETWEEN '2007-12-31' AND '2025-04-10' AND name='ES10YT=RR_DIFF' AND
67
+ dimensions @> '{"offset":"no", "stat":"value"}'
68
+ ORDER BY date ASC
69
+ """
70
+ )
File without changes
@@ -0,0 +1,11 @@
1
+ import pandas as pd
2
+
3
+
4
+ def difference(
5
+ target: float | pd.Series, reference: float | pd.Series
6
+ ) -> float | pd.Series:
7
+ return target - reference
8
+
9
+
10
+ def inverse(target: float | pd.Series) -> float | pd.Series:
11
+ return 1 / target
@@ -0,0 +1,34 @@
1
+ from collections.abc import Callable
2
+ from typing import Self
3
+
4
+ import pandas as pd
5
+
6
+ from . import functions
7
+
8
+
9
+ class Node:
10
+ def __init__(self, name: str) -> None:
11
+ self._name: str = name
12
+ self._edges: list[Self] = []
13
+
14
+ def add_edge(self, node: Self) -> None:
15
+ self._edges.append(node)
16
+
17
+ def build_edges(self, *, dependencies: list[str], function: str) -> None:
18
+ self._resolving_function: Callable[..., float | pd.Series] = getattr(
19
+ functions, function
20
+ )
21
+ for d in dependencies:
22
+ self.add_edge(Node(d))
23
+
24
+ @property
25
+ def name(self) -> str:
26
+ return self._name
27
+
28
+ @property
29
+ def edges(self) -> list[Self]:
30
+ return self._edges
31
+
32
+ @property
33
+ def resolving_function(self) -> Callable[..., float | pd.Series]:
34
+ return self._resolving_function
@@ -0,0 +1,118 @@
1
+ from typing import Any
2
+
3
+ import pandas as pd
4
+
5
+ from tesorotools.offsets.offsets import process_raw_data, trim
6
+
7
+ from .node import Node
8
+
9
+
10
+ def resolve(
11
+ start: Node,
12
+ resolved: list[Node],
13
+ unresolved: list[str],
14
+ independent: set[str],
15
+ dependencies_cfg: dict[str, Any],
16
+ ):
17
+ if start.name in dependencies_cfg:
18
+ is_independent: bool = False
19
+ config: dict[str, Any] = dependencies_cfg[start.name]
20
+ start.build_edges(**config)
21
+ else:
22
+ is_independent: bool = True
23
+ independent.add(start.name)
24
+
25
+ if not is_independent:
26
+ unresolved.append(start.name)
27
+ for node in start.edges:
28
+ if node.name in unresolved:
29
+ raise ValueError(
30
+ f"circular dependency: {start.name} <-> {node.name}"
31
+ )
32
+ if node not in resolved:
33
+ resolve(
34
+ node, resolved, unresolved, independent, dependencies_cfg
35
+ )
36
+ resolved.append(start)
37
+ unresolved.remove(start.name)
38
+
39
+
40
+ def collect_document_series(
41
+ config_dicts: list[dict[str, Any]], find: str = "series"
42
+ ) -> list[str]:
43
+ series: set[str] = set()
44
+ for config_dict in config_dicts:
45
+ series = series | collect_series(config_dict, find)
46
+ return list(series)
47
+
48
+
49
+ def resolve_series(
50
+ config_dicts: list[dict[str, Any]], dependencies_cfg: dict[str, Any]
51
+ ):
52
+ series: list[str] = collect_document_series(config_dicts)
53
+ nodes: list[Node] = [Node(name=name) for name in series]
54
+ independent_nodes: set[str] = set()
55
+ resolved: list[Node] = []
56
+ for node in nodes:
57
+ resolve(
58
+ start=node,
59
+ resolved=resolved,
60
+ unresolved=[],
61
+ independent=independent_nodes,
62
+ dependencies_cfg=dependencies_cfg,
63
+ )
64
+
65
+ return {
66
+ "independent": independent_nodes,
67
+ "dependent": resolved,
68
+ }
69
+
70
+
71
+ def compute_derivate_series(
72
+ dependent_nodes: list[Node], trimmed_data: pd.DataFrame
73
+ ):
74
+ inferred_series: list[pd.Series] = []
75
+ for node in dependent_nodes:
76
+ dependencies_names: list[str] = [n.name for n in node.edges]
77
+ dependencies_df = trimmed_data.loc[:, dependencies_names]
78
+ dependencies_dict = dependencies_df.to_dict(orient="series").values()
79
+ inferred: pd.Series = node.resolving_function(*dependencies_dict)
80
+ inferred.name = node.name
81
+ inferred_series.append(inferred)
82
+ inferred_df: pd.DataFrame = pd.concat(inferred_series, axis=1)
83
+ return inferred_df
84
+
85
+
86
+ def concat_derivate_series(
87
+ independent_full_df: pd.DataFrame,
88
+ derivate_trimmed_df: pd.DataFrame,
89
+ offsets_config: dict[str, Any],
90
+ force_trim: bool = False,
91
+ ) -> pd.DataFrame:
92
+
93
+ # useful when adding emergency fixed offsets
94
+ if force_trim:
95
+ independent_full_df: pd.DataFrame = process_raw_data(
96
+ trim(independent_full_df), **offsets_config
97
+ )
98
+
99
+ derivate_full_df: pd.DataFrame = process_raw_data(
100
+ derivate_trimmed_df, **offsets_config
101
+ )
102
+ full: pd.DataFrame = pd.concat(
103
+ [independent_full_df, derivate_full_df], axis=1
104
+ )
105
+ return full
106
+
107
+
108
+ def collect_series(
109
+ config_dict: dict[str, Any], find: str = "series"
110
+ ) -> set[str]:
111
+ series: set[str] = set()
112
+ if find in config_dict:
113
+ config_series: dict[str, str] = config_dict[find]
114
+ series = series | set(config_series.keys())
115
+ for k, v in config_dict.items():
116
+ if k != find and isinstance(v, dict):
117
+ series = series | collect_series(v, find)
118
+ return series