tesorotools-python 0.0.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tesorotools/__init__.py +0 -0
- tesorotools/artists/__init__.py +5 -0
- tesorotools/artists/barh_plot.py +310 -0
- tesorotools/artists/line_plot.py +114 -0
- tesorotools/artists/table.py +199 -0
- tesorotools/artists/type_curve.py +216 -0
- tesorotools/convert.py +93 -0
- tesorotools/data_sources/__init__.py +0 -0
- tesorotools/data_sources/debug.py +26 -0
- tesorotools/data_sources/eikon.py +117 -0
- tesorotools/database/__init__.py +0 -0
- tesorotools/database/push.py +70 -0
- tesorotools/dependencies/__init__.py +0 -0
- tesorotools/dependencies/functions.py +11 -0
- tesorotools/dependencies/node.py +34 -0
- tesorotools/dependencies/resolution.py +118 -0
- tesorotools/main.py +37 -0
- tesorotools/offsets/__init__.py +0 -0
- tesorotools/offsets/offsets.py +439 -0
- tesorotools/offsets/outliers.py +15 -0
- tesorotools/render/__init__.py +11 -0
- tesorotools/render/content/__init__.py +0 -0
- tesorotools/render/content/content.py +17 -0
- tesorotools/render/content/images.py +147 -0
- tesorotools/render/content/section.py +53 -0
- tesorotools/render/content/table.py +283 -0
- tesorotools/render/headline.py +40 -0
- tesorotools/render/introduction.py +49 -0
- tesorotools/render/report.py +29 -0
- tesorotools/utils/__init__.py +0 -0
- tesorotools/utils/config.py +35 -0
- tesorotools/utils/globals.py +12 -0
- tesorotools/utils/matplotlib.py +38 -0
- tesorotools/utils/series.py +40 -0
- tesorotools/utils/template.py +126 -0
- tesorotools_python-0.0.0.dist-info/METADATA +13 -0
- tesorotools_python-0.0.0.dist-info/RECORD +38 -0
- tesorotools_python-0.0.0.dist-info/WHEEL +5 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# pending to assure stylesheet data and fonts are only loaded once
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from matplotlib.ticker import FuncFormatter
|
|
9
|
+
from pandas import Timestamp
|
|
10
|
+
|
|
11
|
+
from tesorotools.utils.config import merge
|
|
12
|
+
from tesorotools.utils.globals import DEBUG
|
|
13
|
+
from tesorotools.utils.matplotlib import (
|
|
14
|
+
PLOT_CONFIG,
|
|
15
|
+
format_annotation,
|
|
16
|
+
load_fonts,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
TYPE_CURVE_CONFIG: dict[str, Any] = PLOT_CONFIG["type_curve"]
|
|
20
|
+
AX_CONFIG: dict[str, Any] = PLOT_CONFIG["ax"]
|
|
21
|
+
FIG_CONFIG: dict[str, Any] = PLOT_CONFIG["figure"]
|
|
22
|
+
|
|
23
|
+
load_fonts()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _style_spines(
|
|
27
|
+
ax: plt.Axes,
|
|
28
|
+
decimals: int,
|
|
29
|
+
units: str,
|
|
30
|
+
*,
|
|
31
|
+
color: str,
|
|
32
|
+
linewidth: str,
|
|
33
|
+
):
|
|
34
|
+
ax.grid(visible=True, axis="y")
|
|
35
|
+
for spine in ax.spines.values():
|
|
36
|
+
spine.set_color(color)
|
|
37
|
+
spine.set_linewidth(linewidth)
|
|
38
|
+
ax.yaxis.tick_right()
|
|
39
|
+
ax.yaxis.set_major_formatter(
|
|
40
|
+
FuncFormatter(lambda y, _: format_annotation(y, decimals, units))
|
|
41
|
+
)
|
|
42
|
+
ax.tick_params(axis="both", which="major")
|
|
43
|
+
ax.set_xticks(
|
|
44
|
+
ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right"
|
|
45
|
+
)
|
|
46
|
+
for tick in ax.get_xticklines():
|
|
47
|
+
tick.set_markeredgecolor(color)
|
|
48
|
+
for tick in ax.get_yticklines():
|
|
49
|
+
tick.set_markeredgecolor(color)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _style_baseline(ax: plt.Axes, **baseline_config):
|
|
53
|
+
color: str = baseline_config["color"]
|
|
54
|
+
bottom_lim, top_lim = ax.get_ylim()
|
|
55
|
+
ax.set_ylim(bottom=min(0, bottom_lim), top=max(0, top_lim))
|
|
56
|
+
bottom_lim, top_lim = ax.get_ylim()
|
|
57
|
+
if bottom_lim == 0:
|
|
58
|
+
ax.spines["bottom"].set_edgecolor(color)
|
|
59
|
+
elif top_lim == 0:
|
|
60
|
+
ax.spines["top"].set_edgecolor(color)
|
|
61
|
+
else:
|
|
62
|
+
ax.axhline(y=0, **baseline_config)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _format_data(data: pd.DataFrame) -> dict[str, Any]:
|
|
66
|
+
# metadata
|
|
67
|
+
date_index: pd.DatetimeIndex = data.index
|
|
68
|
+
current_date: Timestamp = date_index.max()
|
|
69
|
+
current_year: int = current_date.year
|
|
70
|
+
last_year: int = (current_date - pd.DateOffset(years=1)).year
|
|
71
|
+
|
|
72
|
+
# current data
|
|
73
|
+
current_data: pd.Series = data.loc[current_date, :]
|
|
74
|
+
current_data.name = "current_data"
|
|
75
|
+
|
|
76
|
+
# current year
|
|
77
|
+
current_year_data: pd.DataFrame = data.loc[
|
|
78
|
+
date_index.year == current_year, :
|
|
79
|
+
]
|
|
80
|
+
current_year_max: pd.Series = current_year_data.max()
|
|
81
|
+
current_year_max.name = "current_year_max"
|
|
82
|
+
current_year_min: pd.Series = current_year_data.min()
|
|
83
|
+
current_year_min.name = "current_year_min"
|
|
84
|
+
|
|
85
|
+
# last year
|
|
86
|
+
last_year_data: pd.DataFrame = data.loc[date_index.year == last_year, :]
|
|
87
|
+
last_year_max: pd.Series = last_year_data.max()
|
|
88
|
+
last_year_max.name = "last_year_max"
|
|
89
|
+
last_year_min: pd.Series = last_year_data.min()
|
|
90
|
+
last_year_min.name = "last_year_min"
|
|
91
|
+
|
|
92
|
+
formatted_data: pd.DataFrame = pd.concat(
|
|
93
|
+
[
|
|
94
|
+
last_year_max,
|
|
95
|
+
last_year_min,
|
|
96
|
+
current_year_max,
|
|
97
|
+
current_year_min,
|
|
98
|
+
current_data,
|
|
99
|
+
],
|
|
100
|
+
axis=1,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
"data": formatted_data,
|
|
105
|
+
"current_date": current_date.strftime("%d/%m/%Y"),
|
|
106
|
+
"current_year": current_year,
|
|
107
|
+
"last_year": last_year,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _plot_current_data(
|
|
112
|
+
ax: plt.Axes,
|
|
113
|
+
data: pd.DataFrame,
|
|
114
|
+
date_fmt: str,
|
|
115
|
+
*,
|
|
116
|
+
linewidth: float,
|
|
117
|
+
marker: str,
|
|
118
|
+
points_to_mark: list[str],
|
|
119
|
+
color: str,
|
|
120
|
+
decimals: int,
|
|
121
|
+
units: str,
|
|
122
|
+
):
|
|
123
|
+
data.plot(
|
|
124
|
+
ax=ax,
|
|
125
|
+
color=color,
|
|
126
|
+
linewidth=linewidth,
|
|
127
|
+
label=date_fmt,
|
|
128
|
+
)
|
|
129
|
+
for point in points_to_mark:
|
|
130
|
+
value = data.loc[point]
|
|
131
|
+
ax.plot(
|
|
132
|
+
point,
|
|
133
|
+
value,
|
|
134
|
+
marker=marker,
|
|
135
|
+
color=color,
|
|
136
|
+
)
|
|
137
|
+
ax.annotate(
|
|
138
|
+
format_annotation(value, decimals, units),
|
|
139
|
+
(point, value),
|
|
140
|
+
textcoords="offset points",
|
|
141
|
+
xytext=(0, 10),
|
|
142
|
+
ha="center",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def plot_type_curve(
|
|
147
|
+
data: pd.DataFrame,
|
|
148
|
+
out_file: Path,
|
|
149
|
+
**config,
|
|
150
|
+
):
|
|
151
|
+
config: dict[str, Any] = merge(config, TYPE_CURVE_CONFIG)
|
|
152
|
+
|
|
153
|
+
if config["yaxis"]["units"] == "p.b.":
|
|
154
|
+
data = data * 100
|
|
155
|
+
|
|
156
|
+
formatted_assets: dict[str, Any] = _format_data(data)
|
|
157
|
+
formatted_data: pd.DataFrame = formatted_assets["data"]
|
|
158
|
+
due_index: pd.Index = formatted_data.index
|
|
159
|
+
|
|
160
|
+
fig = plt.figure(**FIG_CONFIG)
|
|
161
|
+
ax: plt.Axes = fig.add_subplot()
|
|
162
|
+
|
|
163
|
+
last_config: dict[str, Any] = config["last"]
|
|
164
|
+
ax.fill_between(
|
|
165
|
+
due_index,
|
|
166
|
+
formatted_data["last_year_min"],
|
|
167
|
+
formatted_data["last_year_max"],
|
|
168
|
+
alpha=last_config["alpha"],
|
|
169
|
+
color=last_config["color"],
|
|
170
|
+
edgecolor=None,
|
|
171
|
+
label=f"Rango {formatted_assets['last_year']}",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
current_config: dict[str, Any] = config["current"]
|
|
175
|
+
ax.fill_between(
|
|
176
|
+
due_index,
|
|
177
|
+
formatted_data["current_year_min"],
|
|
178
|
+
formatted_data["current_year_max"],
|
|
179
|
+
alpha=current_config["alpha"],
|
|
180
|
+
color=current_config["color"],
|
|
181
|
+
edgecolor=None,
|
|
182
|
+
label=f"Rango {formatted_assets['current_year']}",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
_plot_current_data(
|
|
186
|
+
ax,
|
|
187
|
+
formatted_data["current_data"],
|
|
188
|
+
formatted_assets["current_date"],
|
|
189
|
+
**config["line"],
|
|
190
|
+
)
|
|
191
|
+
_style_spines(ax, **config["yaxis"], **AX_CONFIG["spines"])
|
|
192
|
+
_style_baseline(ax, **AX_CONFIG["baseline"])
|
|
193
|
+
ax.legend(
|
|
194
|
+
loc="upper center",
|
|
195
|
+
bbox_to_anchor=(0.5, config["legend_sep"]),
|
|
196
|
+
ncol=3,
|
|
197
|
+
)
|
|
198
|
+
fig.savefig(out_file)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# data is expected to be a simple time series data, columns are series and rows represents dates
|
|
202
|
+
def plot_type_curves(data: pd.DataFrame, config_dicts: dict[str, Any]):
|
|
203
|
+
for name, config in config_dicts.items():
|
|
204
|
+
if not name.startswith("."): # aux entries
|
|
205
|
+
series: dict[str, str] = config["series"]
|
|
206
|
+
if len(series) < 2:
|
|
207
|
+
raise ValueError(
|
|
208
|
+
f"In plot {name}: A type curve must have at least two due periods. Given periods: {series.keys()}"
|
|
209
|
+
)
|
|
210
|
+
trimmed_data: pd.DataFrame = data.loc[:, series.keys()]
|
|
211
|
+
trimmed_data: pd.DataFrame = trimmed_data.rename(columns=series)
|
|
212
|
+
plot_type_curve(
|
|
213
|
+
data=trimmed_data,
|
|
214
|
+
out_file=DEBUG / "type_curve" / f"{name}.png",
|
|
215
|
+
**config,
|
|
216
|
+
)
|
tesorotools/convert.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# this file may migrate to the utils package
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from tesorotools.artists.barh_plot import plot_barh_charts_from_flash
|
|
8
|
+
from tesorotools.artists.line_plot import plot_line_charts
|
|
9
|
+
from tesorotools.artists.table import generate_tables_from_flash
|
|
10
|
+
from tesorotools.artists.type_curve import plot_type_curves
|
|
11
|
+
from tesorotools.dependencies.resolution import (
|
|
12
|
+
compute_derivate_series,
|
|
13
|
+
concat_derivate_series,
|
|
14
|
+
resolve_series,
|
|
15
|
+
)
|
|
16
|
+
from tesorotools.offsets.offsets import trim
|
|
17
|
+
from tesorotools.utils.config import read_config
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def index_replace(old: str, new: str, index: pd.MultiIndex) -> pd.MultiIndex:
|
|
21
|
+
return pd.MultiIndex.from_tuples(
|
|
22
|
+
[
|
|
23
|
+
tuple(
|
|
24
|
+
[
|
|
25
|
+
x.replace(old, new) if isinstance(x, str) else x
|
|
26
|
+
for x in tuple_index
|
|
27
|
+
]
|
|
28
|
+
)
|
|
29
|
+
for tuple_index in index
|
|
30
|
+
]
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def cheap_convert(old_file: Path) -> pd.DataFrame:
|
|
35
|
+
old = pd.read_feather(old_file)
|
|
36
|
+
|
|
37
|
+
trimmed = old.loc[(slice(None), "no", "absolute", "value"), :].copy()
|
|
38
|
+
|
|
39
|
+
old = old.loc[
|
|
40
|
+
(slice(None), ["day", "mtd", "week", "year"], slice(None), slice(None)),
|
|
41
|
+
:,
|
|
42
|
+
]
|
|
43
|
+
old.index = index_replace("day", "bday", old.index)
|
|
44
|
+
old.index = index_replace("week", "ftd", old.index)
|
|
45
|
+
old.index = index_replace("year", "ytd", old.index)
|
|
46
|
+
old.index = index_replace("roll_var", "roll_std", old.index)
|
|
47
|
+
|
|
48
|
+
trimmed.index = index_replace("absolute", "no", trimmed.index)
|
|
49
|
+
|
|
50
|
+
new = pd.concat([old, trimmed])
|
|
51
|
+
return new
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
preprocess = False
|
|
56
|
+
barh_config_dicts = read_config(Path("examples") / "barh_plots.yaml")
|
|
57
|
+
line_config_dicts = read_config(Path("examples") / "line_plots.yaml")
|
|
58
|
+
type_config_dicts = read_config(Path("examples") / "type_curves.yaml")
|
|
59
|
+
table_config_dicts = read_config(Path("examples") / "tables.yaml")
|
|
60
|
+
|
|
61
|
+
if preprocess:
|
|
62
|
+
old_file: Path = Path("debug") / "flash.feather"
|
|
63
|
+
dependencies_cfg = read_config(Path("examples") / "dependencies.yaml")
|
|
64
|
+
flash: pd.DataFrame = cheap_convert(old_file)
|
|
65
|
+
resolved_dict = resolve_series(
|
|
66
|
+
[
|
|
67
|
+
barh_config_dicts,
|
|
68
|
+
line_config_dicts,
|
|
69
|
+
type_config_dicts,
|
|
70
|
+
table_config_dicts,
|
|
71
|
+
],
|
|
72
|
+
dependencies_cfg,
|
|
73
|
+
)
|
|
74
|
+
independent_full_df = flash.loc[:, list(resolved_dict["independent"])]
|
|
75
|
+
independent_trimmed_df = trim(independent_full_df)
|
|
76
|
+
dependent_trimmed_df = compute_derivate_series(
|
|
77
|
+
resolved_dict["dependent"], independent_trimmed_df
|
|
78
|
+
)
|
|
79
|
+
offsets_config = read_config(Path("examples") / "offsets.yaml")
|
|
80
|
+
full_df = concat_derivate_series(
|
|
81
|
+
independent_full_df,
|
|
82
|
+
dependent_trimmed_df,
|
|
83
|
+
offsets_config,
|
|
84
|
+
force_trim=True,
|
|
85
|
+
)
|
|
86
|
+
full_df.to_feather("derivates.feather")
|
|
87
|
+
|
|
88
|
+
full_df = pd.read_feather("derivates.feather")
|
|
89
|
+
trimmed_df = trim(full_df)
|
|
90
|
+
plot_barh_charts_from_flash(full_df, barh_config_dicts)
|
|
91
|
+
plot_line_charts(trimmed_df, line_config_dicts)
|
|
92
|
+
plot_type_curves(trimmed_df, type_config_dicts)
|
|
93
|
+
generate_tables_from_flash(full_df, table_config_dicts)
|
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
from tesorotools.utils.config import read_config
|
|
5
|
+
from tesorotools.utils.globals import DEBUG, EXAMPLES
|
|
6
|
+
|
|
7
|
+
# just mocking
|
|
8
|
+
CATALOG: dict[str, str] = read_config(EXAMPLES / "data.yaml")["debug"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_series(start: str, end: str, series: list[str]) -> pd.DataFrame:
|
|
12
|
+
# series is a list of agnostic ids
|
|
13
|
+
specific_ids = []
|
|
14
|
+
for agnostic_id in series:
|
|
15
|
+
specific_id: str | None = CATALOG.get(agnostic_id, None)
|
|
16
|
+
if specific_id is None:
|
|
17
|
+
raise KeyError(f"{agnostic_id} not found in debug configuration")
|
|
18
|
+
specific_ids.append(specific_id)
|
|
19
|
+
|
|
20
|
+
index: pd.DatetimeIndex = pd.date_range(start=start, end=end).sort_values()
|
|
21
|
+
df: pd.DataFrame = pd.DataFrame(
|
|
22
|
+
data=np.random.randn(len(index), len(series)),
|
|
23
|
+
index=index,
|
|
24
|
+
columns=series,
|
|
25
|
+
)
|
|
26
|
+
return df
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import eikon as ek
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_series(
|
|
9
|
+
api_key: str,
|
|
10
|
+
series_id_list: list[str],
|
|
11
|
+
start_date: str,
|
|
12
|
+
end_date: str,
|
|
13
|
+
freq: str = "B",
|
|
14
|
+
fields: list[str] | None = None,
|
|
15
|
+
cooldown: int = 60,
|
|
16
|
+
datapoint_limit: int = 2_000,
|
|
17
|
+
cache_path: Path | None = None,
|
|
18
|
+
) -> pd.DataFrame:
|
|
19
|
+
"""Downloads data from eikon given that tou have a valid API key"""
|
|
20
|
+
ek.set_app_key(api_key)
|
|
21
|
+
fields = (
|
|
22
|
+
["TIMESTAMP", "CLOSE", "CF_LAST", "CF_YIELD"]
|
|
23
|
+
if fields is None
|
|
24
|
+
else fields
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
dates_list: list[str] = list(
|
|
28
|
+
pd.date_range(start=start_date, end=end_date, freq=freq).astype("str")
|
|
29
|
+
)
|
|
30
|
+
partial_data: list[pd.DataFrame] = []
|
|
31
|
+
download_step: int = datapoint_limit // (
|
|
32
|
+
len(series_id_list) * (len(fields) - 1)
|
|
33
|
+
)
|
|
34
|
+
downloaded_dates: int = 0
|
|
35
|
+
while downloaded_dates < len(dates_list):
|
|
36
|
+
dates_to_download = dates_list[
|
|
37
|
+
downloaded_dates : downloaded_dates + download_step
|
|
38
|
+
]
|
|
39
|
+
start = dates_to_download[0]
|
|
40
|
+
end = dates_to_download[-1]
|
|
41
|
+
cache_file_path: Path = (
|
|
42
|
+
cache_path / f"from_{start}_to_{end}.csv"
|
|
43
|
+
if cache_path is not None
|
|
44
|
+
else None
|
|
45
|
+
)
|
|
46
|
+
if (cache_file_path is None) or (not cache_file_path.exists()):
|
|
47
|
+
data: pd.DataFrame = block_download(
|
|
48
|
+
series_id_list,
|
|
49
|
+
start_date=start,
|
|
50
|
+
end_date=end,
|
|
51
|
+
freq=freq,
|
|
52
|
+
fields=fields,
|
|
53
|
+
cooldown=cooldown,
|
|
54
|
+
file_path=cache_file_path,
|
|
55
|
+
debug=True,
|
|
56
|
+
)
|
|
57
|
+
if cache_file_path is None:
|
|
58
|
+
partial_data.append(data)
|
|
59
|
+
if downloaded_dates + download_step < len(dates_list):
|
|
60
|
+
print(f"Waiting {cooldown} seconds for Eikon to cool down...")
|
|
61
|
+
time.sleep(cooldown)
|
|
62
|
+
downloaded_dates += download_step
|
|
63
|
+
data = concat_partial_data(cache_path, partial_data)
|
|
64
|
+
return data
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def block_download(
|
|
68
|
+
series_id_list: list[str],
|
|
69
|
+
start_date: str,
|
|
70
|
+
end_date: str,
|
|
71
|
+
freq: str = "B",
|
|
72
|
+
fields: list[str] | None = None,
|
|
73
|
+
cooldown: int = 60,
|
|
74
|
+
file_path: Path | None = None,
|
|
75
|
+
debug: bool = False,
|
|
76
|
+
):
|
|
77
|
+
interval = "daily" if freq == "B" else freq
|
|
78
|
+
|
|
79
|
+
while True:
|
|
80
|
+
try:
|
|
81
|
+
data: pd.DataFrame | None = ek.get_timeseries(
|
|
82
|
+
rics=series_id_list,
|
|
83
|
+
start_date=start_date,
|
|
84
|
+
end_date=end_date,
|
|
85
|
+
fields=fields,
|
|
86
|
+
interval=interval,
|
|
87
|
+
debug=debug,
|
|
88
|
+
)
|
|
89
|
+
if data is None:
|
|
90
|
+
raise ek.EikonError(
|
|
91
|
+
code=404, message="Service temporarily unavailable"
|
|
92
|
+
)
|
|
93
|
+
data = data.drop_duplicates()
|
|
94
|
+
data = data.sort_index()
|
|
95
|
+
if len(data.columns) == 1:
|
|
96
|
+
data.columns = series_id_list
|
|
97
|
+
if file_path is not None:
|
|
98
|
+
data.to_csv(file_path)
|
|
99
|
+
break
|
|
100
|
+
except ek.EikonError as e:
|
|
101
|
+
print(f"Eikon error: {e}")
|
|
102
|
+
print("This is probably not our fault")
|
|
103
|
+
print(f"Waiting {cooldown} seconds for Eikon to cool down...")
|
|
104
|
+
time.sleep(cooldown)
|
|
105
|
+
return data
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def concat_partial_data(
|
|
109
|
+
cache_path: Path, partial_data: list[pd.DataFrame]
|
|
110
|
+
) -> pd.DataFrame:
|
|
111
|
+
dfs = partial_data
|
|
112
|
+
if cache_path is not None:
|
|
113
|
+
for chunk in cache_path.iterdir():
|
|
114
|
+
df = pd.read_csv(chunk, index_col="Date")
|
|
115
|
+
dfs.append(df)
|
|
116
|
+
full = pd.concat(dfs)
|
|
117
|
+
return full
|
|
File without changes
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sqlalchemy import create_engine
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_connection_string(
|
|
8
|
+
engine: str,
|
|
9
|
+
driver: str,
|
|
10
|
+
username: str,
|
|
11
|
+
password: str,
|
|
12
|
+
host: str,
|
|
13
|
+
database: str,
|
|
14
|
+
) -> str:
|
|
15
|
+
return f"{engine}+{driver}://{username}:{password}@{host}/{database}"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def flash_to_database_format(data: pd.DataFrame) -> pd.DataFrame:
|
|
19
|
+
"""Transform the data in *flash* format to data in *database* format
|
|
20
|
+
|
|
21
|
+
This operation takes a little while"""
|
|
22
|
+
|
|
23
|
+
data.columns.name = "name"
|
|
24
|
+
stacked_data = data.stack().to_frame(name="value")
|
|
25
|
+
database_data = stacked_data.reset_index()
|
|
26
|
+
database_data["dimensions"] = database_data.apply(
|
|
27
|
+
lambda row: json.dumps(
|
|
28
|
+
{
|
|
29
|
+
"offset": row["offset"],
|
|
30
|
+
"difference_type": row["difference_type"],
|
|
31
|
+
"stat": row["stat"],
|
|
32
|
+
}
|
|
33
|
+
),
|
|
34
|
+
axis=1,
|
|
35
|
+
)
|
|
36
|
+
database_data = database_data.drop(
|
|
37
|
+
columns=["offset", "difference_type", "stat"]
|
|
38
|
+
)
|
|
39
|
+
database_data["value_meta"] = pd.NA
|
|
40
|
+
print(database_data)
|
|
41
|
+
return database_data
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def database_to_flash_format():
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def push_to_database(data: pd.DataFrame, conn_string: str, table: str) -> None:
|
|
49
|
+
engine = create_engine(url=conn_string)
|
|
50
|
+
data.to_sql(
|
|
51
|
+
name=table, con=engine, if_exists="append", chunksize=1000, index=False
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# this may be just as a datasource (maybe a common protocol?)
|
|
56
|
+
# check the api long video for further reference
|
|
57
|
+
def pull_from_database(
|
|
58
|
+
conn_string: str, start: str, end: str, series: list[str]
|
|
59
|
+
):
|
|
60
|
+
# just testing...
|
|
61
|
+
pd.read_sql_query(
|
|
62
|
+
"""
|
|
63
|
+
SELECT date, value
|
|
64
|
+
FROM measurements
|
|
65
|
+
WHERE
|
|
66
|
+
date BETWEEN '2007-12-31' AND '2025-04-10' AND name='ES10YT=RR_DIFF' AND
|
|
67
|
+
dimensions @> '{"offset":"no", "stat":"value"}'
|
|
68
|
+
ORDER BY date ASC
|
|
69
|
+
"""
|
|
70
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import Self
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from . import functions
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Node:
|
|
10
|
+
def __init__(self, name: str) -> None:
|
|
11
|
+
self._name: str = name
|
|
12
|
+
self._edges: list[Self] = []
|
|
13
|
+
|
|
14
|
+
def add_edge(self, node: Self) -> None:
|
|
15
|
+
self._edges.append(node)
|
|
16
|
+
|
|
17
|
+
def build_edges(self, *, dependencies: list[str], function: str) -> None:
|
|
18
|
+
self._resolving_function: Callable[..., float | pd.Series] = getattr(
|
|
19
|
+
functions, function
|
|
20
|
+
)
|
|
21
|
+
for d in dependencies:
|
|
22
|
+
self.add_edge(Node(d))
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
return self._name
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def edges(self) -> list[Self]:
|
|
30
|
+
return self._edges
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def resolving_function(self) -> Callable[..., float | pd.Series]:
|
|
34
|
+
return self._resolving_function
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from tesorotools.offsets.offsets import process_raw_data, trim
|
|
6
|
+
|
|
7
|
+
from .node import Node
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def resolve(
|
|
11
|
+
start: Node,
|
|
12
|
+
resolved: list[Node],
|
|
13
|
+
unresolved: list[str],
|
|
14
|
+
independent: set[str],
|
|
15
|
+
dependencies_cfg: dict[str, Any],
|
|
16
|
+
):
|
|
17
|
+
if start.name in dependencies_cfg:
|
|
18
|
+
is_independent: bool = False
|
|
19
|
+
config: dict[str, Any] = dependencies_cfg[start.name]
|
|
20
|
+
start.build_edges(**config)
|
|
21
|
+
else:
|
|
22
|
+
is_independent: bool = True
|
|
23
|
+
independent.add(start.name)
|
|
24
|
+
|
|
25
|
+
if not is_independent:
|
|
26
|
+
unresolved.append(start.name)
|
|
27
|
+
for node in start.edges:
|
|
28
|
+
if node.name in unresolved:
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"circular dependency: {start.name} <-> {node.name}"
|
|
31
|
+
)
|
|
32
|
+
if node not in resolved:
|
|
33
|
+
resolve(
|
|
34
|
+
node, resolved, unresolved, independent, dependencies_cfg
|
|
35
|
+
)
|
|
36
|
+
resolved.append(start)
|
|
37
|
+
unresolved.remove(start.name)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def collect_document_series(
|
|
41
|
+
config_dicts: list[dict[str, Any]], find: str = "series"
|
|
42
|
+
) -> list[str]:
|
|
43
|
+
series: set[str] = set()
|
|
44
|
+
for config_dict in config_dicts:
|
|
45
|
+
series = series | collect_series(config_dict, find)
|
|
46
|
+
return list(series)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def resolve_series(
|
|
50
|
+
config_dicts: list[dict[str, Any]], dependencies_cfg: dict[str, Any]
|
|
51
|
+
):
|
|
52
|
+
series: list[str] = collect_document_series(config_dicts)
|
|
53
|
+
nodes: list[Node] = [Node(name=name) for name in series]
|
|
54
|
+
independent_nodes: set[str] = set()
|
|
55
|
+
resolved: list[Node] = []
|
|
56
|
+
for node in nodes:
|
|
57
|
+
resolve(
|
|
58
|
+
start=node,
|
|
59
|
+
resolved=resolved,
|
|
60
|
+
unresolved=[],
|
|
61
|
+
independent=independent_nodes,
|
|
62
|
+
dependencies_cfg=dependencies_cfg,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"independent": independent_nodes,
|
|
67
|
+
"dependent": resolved,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def compute_derivate_series(
|
|
72
|
+
dependent_nodes: list[Node], trimmed_data: pd.DataFrame
|
|
73
|
+
):
|
|
74
|
+
inferred_series: list[pd.Series] = []
|
|
75
|
+
for node in dependent_nodes:
|
|
76
|
+
dependencies_names: list[str] = [n.name for n in node.edges]
|
|
77
|
+
dependencies_df = trimmed_data.loc[:, dependencies_names]
|
|
78
|
+
dependencies_dict = dependencies_df.to_dict(orient="series").values()
|
|
79
|
+
inferred: pd.Series = node.resolving_function(*dependencies_dict)
|
|
80
|
+
inferred.name = node.name
|
|
81
|
+
inferred_series.append(inferred)
|
|
82
|
+
inferred_df: pd.DataFrame = pd.concat(inferred_series, axis=1)
|
|
83
|
+
return inferred_df
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def concat_derivate_series(
|
|
87
|
+
independent_full_df: pd.DataFrame,
|
|
88
|
+
derivate_trimmed_df: pd.DataFrame,
|
|
89
|
+
offsets_config: dict[str, Any],
|
|
90
|
+
force_trim: bool = False,
|
|
91
|
+
) -> pd.DataFrame:
|
|
92
|
+
|
|
93
|
+
# useful when adding emergency fixed offsets
|
|
94
|
+
if force_trim:
|
|
95
|
+
independent_full_df: pd.DataFrame = process_raw_data(
|
|
96
|
+
trim(independent_full_df), **offsets_config
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
derivate_full_df: pd.DataFrame = process_raw_data(
|
|
100
|
+
derivate_trimmed_df, **offsets_config
|
|
101
|
+
)
|
|
102
|
+
full: pd.DataFrame = pd.concat(
|
|
103
|
+
[independent_full_df, derivate_full_df], axis=1
|
|
104
|
+
)
|
|
105
|
+
return full
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def collect_series(
|
|
109
|
+
config_dict: dict[str, Any], find: str = "series"
|
|
110
|
+
) -> set[str]:
|
|
111
|
+
series: set[str] = set()
|
|
112
|
+
if find in config_dict:
|
|
113
|
+
config_series: dict[str, str] = config_dict[find]
|
|
114
|
+
series = series | set(config_series.keys())
|
|
115
|
+
for k, v in config_dict.items():
|
|
116
|
+
if k != find and isinstance(v, dict):
|
|
117
|
+
series = series | collect_series(v, find)
|
|
118
|
+
return series
|