tesorotools-python 0.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tesorotools/__init__.py +6 -0
- tesorotools/artists/__init__.py +5 -0
- tesorotools/artists/barh_plot.py +310 -0
- tesorotools/artists/line_plot.py +245 -0
- tesorotools/artists/table.py +200 -0
- tesorotools/artists/type_curve.py +218 -0
- tesorotools/assets/README.md +5 -0
- tesorotools/assets/fonts/CabinetGrotesk-Black.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Bold.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Extrabold.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Extralight.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Light.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Medium.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Regular.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Thin.otf +0 -0
- tesorotools/assets/fonts/README.md +1 -0
- tesorotools/assets/plots.yaml +43 -0
- tesorotools/assets/tesoro.mplstyle +21 -0
- tesorotools/convert.py +99 -0
- tesorotools/data_sources/README.md +14 -0
- tesorotools/data_sources/__init__.py +0 -0
- tesorotools/data_sources/debug.py +26 -0
- tesorotools/data_sources/lseg.py +117 -0
- tesorotools/database/__init__.py +0 -0
- tesorotools/database/push.py +70 -0
- tesorotools/dependencies/__init__.py +0 -0
- tesorotools/dependencies/functions.py +11 -0
- tesorotools/dependencies/node.py +34 -0
- tesorotools/dependencies/resolution.py +118 -0
- tesorotools/main.py +37 -0
- tesorotools/offsets/__init__.py +0 -0
- tesorotools/offsets/offsets.py +439 -0
- tesorotools/offsets/outliers.py +15 -0
- tesorotools/render/__init__.py +17 -0
- tesorotools/render/content/__init__.py +0 -0
- tesorotools/render/content/content.py +17 -0
- tesorotools/render/content/images.py +147 -0
- tesorotools/render/content/section.py +53 -0
- tesorotools/render/content/subtitle.py +53 -0
- tesorotools/render/content/table.py +308 -0
- tesorotools/render/content/text.py +23 -0
- tesorotools/render/content/title.py +40 -0
- tesorotools/render/report.py +31 -0
- tesorotools/utils/__init__.py +0 -0
- tesorotools/utils/config.py +35 -0
- tesorotools/utils/globals.py +14 -0
- tesorotools/utils/matplotlib.py +38 -0
- tesorotools/utils/series.py +40 -0
- tesorotools/utils/shortcuts.py +32 -0
- tesorotools/utils/template.py +126 -0
- tesorotools_python-0.0.18.dist-info/METADATA +16 -0
- tesorotools_python-0.0.18.dist-info/RECORD +53 -0
- tesorotools_python-0.0.18.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
# stats and stat functions
|
|
10
|
+
type StatFunction = Callable[[pd.Series], np.float64]
|
|
11
|
+
type StatRollingFunction = Callable[..., float]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Stat(Enum):
|
|
15
|
+
VALUE = "value"
|
|
16
|
+
ROLL_AVG = "roll_avg"
|
|
17
|
+
ROLL_STD = "roll_std"
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def stat_function(self) -> StatFunction:
|
|
21
|
+
match self:
|
|
22
|
+
case self.VALUE:
|
|
23
|
+
return lambda x: x.iloc[-1]
|
|
24
|
+
case self.ROLL_AVG:
|
|
25
|
+
return lambda x: np.mean(x)
|
|
26
|
+
case self.ROLL_STD:
|
|
27
|
+
return lambda x: np.std(x, ddof=0)
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def update_function(self) -> StatRollingFunction:
|
|
31
|
+
match self:
|
|
32
|
+
case self.VALUE:
|
|
33
|
+
return lambda **kwargs: kwargs["newest_point"]
|
|
34
|
+
case self.ROLL_AVG:
|
|
35
|
+
return lambda **kwargs: _update_rolling_avg(**kwargs)
|
|
36
|
+
case self.ROLL_STD:
|
|
37
|
+
return lambda **kwargs: _update_rolling_std(**kwargs)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# offsets and offset functions
|
|
41
|
+
type OffsetFunction = Callable[[pd.DatetimeIndex], pd.DatetimeIndex]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _fixed_date_offset_function(reference_date: datetime) -> OffsetFunction:
|
|
45
|
+
return lambda dates: pd.DatetimeIndex([reference_date] * len(dates))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FloatingOffset(Enum):
|
|
49
|
+
NO = "no"
|
|
50
|
+
BDAY = "bday"
|
|
51
|
+
FTD = "ftd"
|
|
52
|
+
MTD = "mtd"
|
|
53
|
+
YTD = "ytd"
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def offset_function(self) -> OffsetFunction:
|
|
57
|
+
match self:
|
|
58
|
+
case self.BDAY:
|
|
59
|
+
return lambda dates: dates - pd.tseries.offsets.BDay(1)
|
|
60
|
+
case self.FTD:
|
|
61
|
+
return lambda dates: dates - pd.tseries.offsets.Week(weekday=4)
|
|
62
|
+
case self.MTD:
|
|
63
|
+
return (
|
|
64
|
+
lambda dates: dates
|
|
65
|
+
- pd.offsets.MonthBegin()
|
|
66
|
+
- pd.tseries.offsets.BDay(1)
|
|
67
|
+
)
|
|
68
|
+
case self.YTD:
|
|
69
|
+
return (
|
|
70
|
+
lambda dates: dates
|
|
71
|
+
- pd.offsets.YearBegin()
|
|
72
|
+
- pd.tseries.offsets.BDay(1)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# differences and difference functions
|
|
77
|
+
type DifferenceFunction = Callable[[pd.Series, pd.Series], pd.Series]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class Difference(Enum):
|
|
81
|
+
NO = "no"
|
|
82
|
+
ABS = "absolute"
|
|
83
|
+
REL = "relative"
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def difference_function(self) -> DifferenceFunction:
|
|
87
|
+
match self:
|
|
88
|
+
case self.ABS:
|
|
89
|
+
return lambda original, offset: original - offset
|
|
90
|
+
case self.REL:
|
|
91
|
+
return lambda original, offset: (original - offset) / offset
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def process_raw_data(raw_data: pd.DataFrame, **config) -> pd.DataFrame:
|
|
95
|
+
# preprocess index
|
|
96
|
+
dates: pd.DatetimeIndex = pd.to_datetime(raw_data.index)
|
|
97
|
+
raw_data.index = dates
|
|
98
|
+
raw_data = raw_data.sort_index()
|
|
99
|
+
|
|
100
|
+
# parse config and compute common values
|
|
101
|
+
window: int = min(config["window"], len(raw_data.index))
|
|
102
|
+
offsets_dict = _parse_offsets(config["offsets"], dates)
|
|
103
|
+
differences_dict = _parse_differences(config["differences"])
|
|
104
|
+
stats_dict = _parse_stats(config["stats"])
|
|
105
|
+
|
|
106
|
+
names = ["date", "offset", "difference_type"]
|
|
107
|
+
columns = stats_dict.keys()
|
|
108
|
+
|
|
109
|
+
offset_df, raw_df = _result_templates(
|
|
110
|
+
dates,
|
|
111
|
+
offsets_dict.keys(),
|
|
112
|
+
differences_dict.keys(),
|
|
113
|
+
names,
|
|
114
|
+
columns,
|
|
115
|
+
columns_name="stat",
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# computing enriched data
|
|
119
|
+
enriched_data: pd.DataFrame = raw_data.apply(
|
|
120
|
+
lambda x: _process_raw_data(
|
|
121
|
+
x,
|
|
122
|
+
offsets_dict,
|
|
123
|
+
differences_dict,
|
|
124
|
+
stats_dict,
|
|
125
|
+
raw_df=raw_df,
|
|
126
|
+
offset_df=offset_df,
|
|
127
|
+
window=window,
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
return enriched_data
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _parse_offsets(
|
|
134
|
+
offsets_cfg: list[str | datetime], dates: pd.DatetimeIndex
|
|
135
|
+
) -> dict[str, pd.DatetimeIndex]:
|
|
136
|
+
offsets: dict[str | datetime, OffsetFunction] = {}
|
|
137
|
+
for offset_str in offsets_cfg:
|
|
138
|
+
if offset_str is FloatingOffset.NO:
|
|
139
|
+
continue
|
|
140
|
+
if offset_str in FloatingOffset:
|
|
141
|
+
offset_enum = FloatingOffset(offset_str)
|
|
142
|
+
offsets[offset_str] = offset_enum.offset_function
|
|
143
|
+
else:
|
|
144
|
+
offsets[str(offset_str)] = _fixed_date_offset_function(offset_str)
|
|
145
|
+
offsets_dict: dict[str, pd.DatetimeIndex] = {
|
|
146
|
+
offset_str: offset_function(dates)
|
|
147
|
+
for offset_str, offset_function in offsets.items()
|
|
148
|
+
}
|
|
149
|
+
return offsets_dict
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _parse_differences(
|
|
153
|
+
differences_cfg: list[str],
|
|
154
|
+
) -> dict[str, DifferenceFunction]:
|
|
155
|
+
differences: dict[str, DifferenceFunction] = {
|
|
156
|
+
diff_str: Difference(diff_str).difference_function
|
|
157
|
+
for diff_str in differences_cfg
|
|
158
|
+
if diff_str is not Difference.NO
|
|
159
|
+
}
|
|
160
|
+
return differences
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _parse_stats(
|
|
164
|
+
stats_cfg: list[str], update: bool = False
|
|
165
|
+
) -> dict[str, StatFunction]:
|
|
166
|
+
if update:
|
|
167
|
+
stats: dict[str, StatFunction] = {
|
|
168
|
+
stat_str: Stat(stat_str).update_function for stat_str in stats_cfg
|
|
169
|
+
}
|
|
170
|
+
stats[Stat.VALUE.value] = Stat.VALUE.update_function
|
|
171
|
+
else:
|
|
172
|
+
stats: dict[str, StatFunction] = {
|
|
173
|
+
stat_str: Stat(stat_str).stat_function for stat_str in stats_cfg
|
|
174
|
+
}
|
|
175
|
+
stats[Stat.VALUE.value] = Stat.VALUE.stat_function
|
|
176
|
+
return stats
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _result_templates(
|
|
180
|
+
dates: pd.DatetimeIndex,
|
|
181
|
+
offsets: list[str],
|
|
182
|
+
differences: list[str],
|
|
183
|
+
names: list[str],
|
|
184
|
+
columns: list[str],
|
|
185
|
+
columns_name: str,
|
|
186
|
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
187
|
+
|
|
188
|
+
offset_idx: pd.MultiIndex = pd.MultiIndex.from_product(
|
|
189
|
+
[
|
|
190
|
+
dates,
|
|
191
|
+
offsets,
|
|
192
|
+
differences,
|
|
193
|
+
],
|
|
194
|
+
names=names,
|
|
195
|
+
)
|
|
196
|
+
offset_df: pd.DataFrame = pd.DataFrame(
|
|
197
|
+
index=offset_idx,
|
|
198
|
+
columns=columns,
|
|
199
|
+
)
|
|
200
|
+
offset_df.columns.name = columns_name
|
|
201
|
+
|
|
202
|
+
raw_idx: pd.MultiIndex = pd.MultiIndex.from_product(
|
|
203
|
+
[
|
|
204
|
+
dates,
|
|
205
|
+
[FloatingOffset.NO.value],
|
|
206
|
+
[Difference.NO.value],
|
|
207
|
+
],
|
|
208
|
+
names=names,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
raw_df: pd.DataFrame = pd.DataFrame(
|
|
212
|
+
index=raw_idx,
|
|
213
|
+
columns=columns,
|
|
214
|
+
)
|
|
215
|
+
raw_df.columns.name = columns_name
|
|
216
|
+
|
|
217
|
+
return offset_df, raw_df
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _process_raw_data(
|
|
221
|
+
column: pd.Series,
|
|
222
|
+
offsets_dict: dict[str, pd.DatetimeIndex],
|
|
223
|
+
differences_dict: dict[str, DifferenceFunction],
|
|
224
|
+
stats_dict: dict[str, StatFunction],
|
|
225
|
+
*,
|
|
226
|
+
raw_df: pd.DataFrame,
|
|
227
|
+
offset_df: pd.DataFrame,
|
|
228
|
+
window: int,
|
|
229
|
+
) -> pd.Series:
|
|
230
|
+
|
|
231
|
+
stat_functions = list(stats_dict.values())
|
|
232
|
+
|
|
233
|
+
# for every non trivial offset and difference apply all the stats
|
|
234
|
+
for offset_str, offset_idx in offsets_dict.items():
|
|
235
|
+
for diff_str, diff_func in differences_dict.items():
|
|
236
|
+
offset_data = column.reindex(offset_idx)
|
|
237
|
+
offset_data.index = column.index
|
|
238
|
+
diff_data = diff_func(column, offset_data)
|
|
239
|
+
stat_data: pd.DataFrame = diff_data.rolling(
|
|
240
|
+
window=window, min_periods=0
|
|
241
|
+
).agg(stat_functions)
|
|
242
|
+
offset_df.loc[(slice(None), offset_str, diff_str), :] = (
|
|
243
|
+
stat_data.values
|
|
244
|
+
)
|
|
245
|
+
offset_series: pd.Series = offset_df.stack()
|
|
246
|
+
|
|
247
|
+
# apply the stats to the original data
|
|
248
|
+
stat_data: pd.DataFrame = column.rolling(window=window, min_periods=0).agg(
|
|
249
|
+
stat_functions
|
|
250
|
+
)
|
|
251
|
+
raw_df.loc[(slice(None)), :] = stat_data.values
|
|
252
|
+
raw_series: pd.Series = raw_df.stack()
|
|
253
|
+
|
|
254
|
+
result = pd.concat([offset_series, raw_series])
|
|
255
|
+
result = result.apply(pd.to_numeric, errors="coerce")
|
|
256
|
+
return result
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def trim(full_data: pd.DataFrame | pd.Series) -> pd.DataFrame | pd.Series:
|
|
260
|
+
indexer = (
|
|
261
|
+
slice(None),
|
|
262
|
+
FloatingOffset.NO.value,
|
|
263
|
+
Difference.NO.value,
|
|
264
|
+
Stat.VALUE.value,
|
|
265
|
+
)
|
|
266
|
+
if isinstance(full_data, pd.DataFrame):
|
|
267
|
+
trimmed_data: pd.DataFrame = full_data.loc[indexer, :]
|
|
268
|
+
else:
|
|
269
|
+
trimmed_data: pd.Series = full_data.loc[indexer]
|
|
270
|
+
trimmed_data.index = trimmed_data.index.get_level_values(level=0)
|
|
271
|
+
return trimmed_data
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def add_rows(
|
|
275
|
+
old_full_df: pd.DataFrame,
|
|
276
|
+
new_trimmed_df: pd.DataFrame,
|
|
277
|
+
offsets_cfg: dict[str, Any],
|
|
278
|
+
):
|
|
279
|
+
## sanity check (raise if failed)
|
|
280
|
+
# same columns
|
|
281
|
+
# simple index
|
|
282
|
+
# datetime index
|
|
283
|
+
# sorted index
|
|
284
|
+
# dates not in the old df
|
|
285
|
+
|
|
286
|
+
dates: pd.DatetimeIndex = old_full_df.index.get_level_values(
|
|
287
|
+
level=0
|
|
288
|
+
).unique()
|
|
289
|
+
window: int = min(
|
|
290
|
+
offsets_cfg["window"],
|
|
291
|
+
len(dates) + len(new_trimmed_df.index),
|
|
292
|
+
)
|
|
293
|
+
offsets_dict = _parse_offsets(offsets_cfg["offsets"], new_trimmed_df.index)
|
|
294
|
+
differences_dict = _parse_differences(offsets_cfg["differences"])
|
|
295
|
+
stats_dict = _parse_stats(offsets_cfg["stats"], update=True)
|
|
296
|
+
names = ["date", "offset", "difference_type"]
|
|
297
|
+
columns = stats_dict.keys()
|
|
298
|
+
offset_df, raw_df = _result_templates(
|
|
299
|
+
new_trimmed_df.index,
|
|
300
|
+
offsets_dict.keys(),
|
|
301
|
+
differences_dict.keys(),
|
|
302
|
+
names,
|
|
303
|
+
columns,
|
|
304
|
+
columns_name="stat",
|
|
305
|
+
)
|
|
306
|
+
updated_data: pd.DataFrame = new_trimmed_df.apply(
|
|
307
|
+
lambda x: _process_new_data(
|
|
308
|
+
x,
|
|
309
|
+
old_full_df.loc[:, x.name],
|
|
310
|
+
offsets_dict,
|
|
311
|
+
differences_dict,
|
|
312
|
+
stats_dict,
|
|
313
|
+
raw_df=raw_df,
|
|
314
|
+
offset_df=offset_df,
|
|
315
|
+
window=window,
|
|
316
|
+
)
|
|
317
|
+
)
|
|
318
|
+
result = pd.concat([old_full_df, updated_data])
|
|
319
|
+
return result
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _process_new_data(
|
|
323
|
+
new_trimmed_column: pd.Series,
|
|
324
|
+
old_full_column: pd.Series,
|
|
325
|
+
offsets_dict: dict[str, pd.DatetimeIndex],
|
|
326
|
+
differences_dict: dict[str, DifferenceFunction],
|
|
327
|
+
stats_dict: dict[str, StatFunction],
|
|
328
|
+
*,
|
|
329
|
+
raw_df: pd.DataFrame,
|
|
330
|
+
offset_df: pd.DataFrame,
|
|
331
|
+
window: int,
|
|
332
|
+
) -> pd.Series:
|
|
333
|
+
|
|
334
|
+
old_trimmed_column: pd.Series = trim(old_full_column)
|
|
335
|
+
fused_trimmed_column: pd.Series = pd.concat(
|
|
336
|
+
[old_trimmed_column, new_trimmed_column]
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# for every non trivial offset and difference apply all the stats
|
|
340
|
+
for offset_str, offset_idx in offsets_dict.items():
|
|
341
|
+
for diff_str, diff_func in differences_dict.items():
|
|
342
|
+
offset_data = fused_trimmed_column.reindex(offset_idx)
|
|
343
|
+
offset_data.index = new_trimmed_column.index
|
|
344
|
+
diff_data = diff_func(new_trimmed_column, offset_data)
|
|
345
|
+
old_diff_stats: pd.Series = old_full_column.loc[
|
|
346
|
+
(slice(None), offset_str, diff_str)
|
|
347
|
+
]
|
|
348
|
+
stat_data: pd.DataFrame = _rolling_update(
|
|
349
|
+
window, diff_data, old_diff_stats, stats_dict
|
|
350
|
+
)
|
|
351
|
+
offset_df.loc[(stat_data.index.values, offset_str, diff_str), :] = (
|
|
352
|
+
stat_data.values
|
|
353
|
+
)
|
|
354
|
+
offset_series: pd.Series = offset_df.stack()
|
|
355
|
+
|
|
356
|
+
# apply the stats to the original data
|
|
357
|
+
old_stats: pd.Series = old_full_column.loc[
|
|
358
|
+
(slice(None), FloatingOffset.NO.value, Difference.NO.value)
|
|
359
|
+
]
|
|
360
|
+
stat_data: pd.DataFrame = _rolling_update(
|
|
361
|
+
window, new_trimmed_column, old_stats, stats_dict
|
|
362
|
+
)
|
|
363
|
+
raw_df.loc[(slice(None)), :] = stat_data.values
|
|
364
|
+
raw_series: pd.Series = raw_df.stack()
|
|
365
|
+
|
|
366
|
+
result = pd.concat([offset_series, raw_series])
|
|
367
|
+
result = result.apply(pd.to_numeric, errors="coerce")
|
|
368
|
+
return result
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _rolling_update(
|
|
372
|
+
window: int,
|
|
373
|
+
new_trimmed_data: pd.Series,
|
|
374
|
+
old_full_data: pd.Series,
|
|
375
|
+
stats_dict: dict[str, StatRollingFunction],
|
|
376
|
+
):
|
|
377
|
+
result_df: pd.DataFrame = old_full_data.unstack(level=-1)
|
|
378
|
+
fused_values = pd.concat(
|
|
379
|
+
[result_df.loc[:, Stat.VALUE.value], new_trimmed_data]
|
|
380
|
+
)
|
|
381
|
+
result_df = result_df.reindex(fused_values.index)
|
|
382
|
+
for index, value in new_trimmed_data.items():
|
|
383
|
+
current_position = fused_values.index.get_loc(index)
|
|
384
|
+
previous_position = max(current_position - window, 0)
|
|
385
|
+
previous_value = fused_values.iloc[previous_position]
|
|
386
|
+
actual_window = min(window, current_position + 1)
|
|
387
|
+
expanding = actual_window < window
|
|
388
|
+
for f_name, function in stats_dict.items():
|
|
389
|
+
result_df.loc[index, f_name] = function(
|
|
390
|
+
window=actual_window,
|
|
391
|
+
expanding=expanding,
|
|
392
|
+
old_stat=result_df.iloc[current_position - 1][f_name],
|
|
393
|
+
oldest_point=previous_value,
|
|
394
|
+
newest_point=value,
|
|
395
|
+
other_stats=result_df.iloc[current_position - 1],
|
|
396
|
+
)
|
|
397
|
+
return result_df.loc[new_trimmed_data.index, :]
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def _update_rolling_std(
|
|
401
|
+
window: int,
|
|
402
|
+
expanding: bool,
|
|
403
|
+
old_stat: float,
|
|
404
|
+
oldest_point: float,
|
|
405
|
+
newest_point: float,
|
|
406
|
+
other_stats: pd.Series,
|
|
407
|
+
**_,
|
|
408
|
+
) -> float:
|
|
409
|
+
old_rolling_mean: float = other_stats[Stat.ROLL_AVG.value]
|
|
410
|
+
if expanding:
|
|
411
|
+
# https://math.stackexchange.com/questions/374881/recursive-formula-for-variance
|
|
412
|
+
new_rolling_var = (
|
|
413
|
+
old_stat**2
|
|
414
|
+
+ (1 / (window + 1)) * (old_rolling_mean - newest_point) ** 2
|
|
415
|
+
) * (window / (window + 1))
|
|
416
|
+
else:
|
|
417
|
+
# https://jonisalonen.com/2014/efficient-and-accurate-rolling-standard-deviation/
|
|
418
|
+
|
|
419
|
+
delta = newest_point - oldest_point
|
|
420
|
+
new_rolling_mean = old_rolling_mean + (delta / window)
|
|
421
|
+
new_rolling_var = old_stat**2 + (delta / window) * (
|
|
422
|
+
newest_point - new_rolling_mean + oldest_point - old_rolling_mean
|
|
423
|
+
)
|
|
424
|
+
new_rolling_var = max(new_rolling_var, 0) # avoid complex numbers
|
|
425
|
+
return new_rolling_var**0.5
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def _update_rolling_avg(
|
|
429
|
+
window: int,
|
|
430
|
+
expanding: bool,
|
|
431
|
+
old_stat: float,
|
|
432
|
+
oldest_point: float,
|
|
433
|
+
newest_point: float,
|
|
434
|
+
**_,
|
|
435
|
+
) -> float:
|
|
436
|
+
if expanding:
|
|
437
|
+
return ((window - 1) * old_stat + newest_point) / window
|
|
438
|
+
else:
|
|
439
|
+
return old_stat + (newest_point - oldest_point) / window
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from .offsets import Stat
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def flag_outliers(data: pd.DataFrame) -> pd.Series:
|
|
7
|
+
return (data[Stat.VALUE.value] - data[Stat.ROLL_AVG.value]) / data[
|
|
8
|
+
Stat.ROLL_STD.value
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def flag_outliers_with_limit(data: pd.DataFrame, limit: float) -> pd.Series:
|
|
13
|
+
return (data[Stat.VALUE.value] - data[Stat.ROLL_AVG.value]) / data[
|
|
14
|
+
Stat.ROLL_STD.value
|
|
15
|
+
] > limit
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from tesorotools.render.content.images import Image, Images
|
|
2
|
+
from tesorotools.render.content.section import Section
|
|
3
|
+
from tesorotools.render.content.subtitle import Subtitle
|
|
4
|
+
from tesorotools.render.content.table import Table
|
|
5
|
+
from tesorotools.render.content.text import Text
|
|
6
|
+
from tesorotools.render.content.title import Title
|
|
7
|
+
from tesorotools.render.report import Report
|
|
8
|
+
from tesorotools.utils.template import TemplateLoader
|
|
9
|
+
|
|
10
|
+
TemplateLoader.add_constructor("!report", Report.from_yaml)
|
|
11
|
+
TemplateLoader.add_constructor("!section", Section.from_yaml)
|
|
12
|
+
TemplateLoader.add_constructor("!image", Image.from_yaml)
|
|
13
|
+
TemplateLoader.add_constructor("!images", Images.from_yaml)
|
|
14
|
+
TemplateLoader.add_constructor("!table", Table.from_yaml)
|
|
15
|
+
TemplateLoader.add_constructor("!text", Text.from_yaml)
|
|
16
|
+
TemplateLoader.add_constructor("!title", Title.from_yaml)
|
|
17
|
+
TemplateLoader.add_constructor("!subtitle", Subtitle.from_yaml)
|
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Protocol, Self
|
|
2
|
+
|
|
3
|
+
from docx.document import Document
|
|
4
|
+
from yaml import Loader, MappingNode
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Content(Protocol):
|
|
8
|
+
def render(self, document: Document) -> Document: ...
|
|
9
|
+
|
|
10
|
+
@classmethod
|
|
11
|
+
def from_yaml(cls, loader: Loader, node: MappingNode) -> Self: ...
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def level(self) -> int: ...
|
|
15
|
+
|
|
16
|
+
@level.setter
|
|
17
|
+
def level(self, level: int) -> None: ...
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any, Self
|
|
3
|
+
|
|
4
|
+
from docx.document import Document
|
|
5
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
6
|
+
from docx.shared import Inches
|
|
7
|
+
from docx.table import Table, _Cell
|
|
8
|
+
from docx.text.paragraph import Paragraph
|
|
9
|
+
from docx.text.run import Run
|
|
10
|
+
from yaml import MappingNode
|
|
11
|
+
|
|
12
|
+
from tesorotools.utils.template import TemplateLoader
|
|
13
|
+
|
|
14
|
+
CENTER = WD_ALIGN_PARAGRAPH.CENTER
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _style_container_table(table: Table) -> None:
|
|
18
|
+
"""Center and bold every paragraph in every cell"""
|
|
19
|
+
for row in table.rows:
|
|
20
|
+
for cell in row.cells:
|
|
21
|
+
for paragraph in cell.paragraphs:
|
|
22
|
+
paragraph.alignment = CENTER
|
|
23
|
+
for run in paragraph.runs:
|
|
24
|
+
run.bold = True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _fill_titles(
|
|
28
|
+
cell: _Cell, title: str | None, subtitle: str | None, bold: bool = True
|
|
29
|
+
):
|
|
30
|
+
"""Fill the title cells of an image(s) container table"""
|
|
31
|
+
title_par: Paragraph = cell.paragraphs[0]
|
|
32
|
+
title_run: Run = title_par.add_run(title)
|
|
33
|
+
title_run.bold = bold
|
|
34
|
+
if subtitle is not None:
|
|
35
|
+
title_run.add_break()
|
|
36
|
+
subtitle_run: Run = title_par.add_run(subtitle)
|
|
37
|
+
subtitle_run.bold = False
|
|
38
|
+
subtitle_run.italic = True
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Image:
|
|
42
|
+
"""A single image with an optional title"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
file: Path,
|
|
47
|
+
title: str | None = None,
|
|
48
|
+
subtitle: str | None = None,
|
|
49
|
+
width: int = 4,
|
|
50
|
+
):
|
|
51
|
+
self._title: str | None = title
|
|
52
|
+
self._subtitle: str | None = subtitle
|
|
53
|
+
self._file: Path = file
|
|
54
|
+
self._width = width
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_yaml(cls, loader: TemplateLoader, node: MappingNode) -> Self:
|
|
58
|
+
image_cfg: dict[str, Any] = loader.construct_mapping(node, deep=True)
|
|
59
|
+
return cls(
|
|
60
|
+
file=loader.imports["image"] / image_cfg.pop("id"), **image_cfg
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def render(self, document: Document) -> Document:
|
|
64
|
+
# add container table
|
|
65
|
+
container_table: Table = document.add_table(2, 1)
|
|
66
|
+
container_table.alignment = CENTER
|
|
67
|
+
_style_container_table(container_table)
|
|
68
|
+
|
|
69
|
+
# set titles
|
|
70
|
+
title_cell: _Cell = container_table.cell(0, 0)
|
|
71
|
+
_fill_titles(title_cell, title=self._title, subtitle=self._subtitle)
|
|
72
|
+
|
|
73
|
+
# fill container table
|
|
74
|
+
content_cell: _Cell = container_table.cell(1, 0)
|
|
75
|
+
content_par: Paragraph = content_cell.paragraphs[0]
|
|
76
|
+
content_run: Run = content_par.add_run()
|
|
77
|
+
content_run.add_picture(str(self._file), width=Inches(self._width))
|
|
78
|
+
|
|
79
|
+
return document
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class Images:
|
|
83
|
+
"""Multiple images side by side"""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
id: str,
|
|
88
|
+
images: list[Image],
|
|
89
|
+
title: str | None = None,
|
|
90
|
+
subtitle: str | None = None,
|
|
91
|
+
width: int = 3,
|
|
92
|
+
) -> None:
|
|
93
|
+
self._id: str = id
|
|
94
|
+
self._images: list[Image] = images
|
|
95
|
+
self._title: str = title
|
|
96
|
+
self._subtitle: str = subtitle
|
|
97
|
+
self._width: int = width
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_yaml(cls, loader: TemplateLoader, node: MappingNode) -> Self:
|
|
101
|
+
images_cfg: dict[str, Any] = loader.construct_mapping(node, deep=True)
|
|
102
|
+
images_dict: dict[str, Image] = {
|
|
103
|
+
id: image
|
|
104
|
+
for (id, image) in images_cfg.items()
|
|
105
|
+
if isinstance(image, Image)
|
|
106
|
+
}
|
|
107
|
+
other_dict: dict[str, Any] = {
|
|
108
|
+
k: v for (k, v) in images_cfg.items() if k not in images_dict
|
|
109
|
+
}
|
|
110
|
+
return cls(
|
|
111
|
+
id=other_dict.pop("id"),
|
|
112
|
+
images=list(images_dict.values()),
|
|
113
|
+
**other_dict
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def render(self, document: Document) -> Document:
|
|
117
|
+
# add container table
|
|
118
|
+
columns: int = len(self._images)
|
|
119
|
+
rows: int = 2 if columns == 1 else 3
|
|
120
|
+
container_table: Table = document.add_table(rows, columns)
|
|
121
|
+
container_table.alignment = CENTER
|
|
122
|
+
_style_container_table(container_table)
|
|
123
|
+
|
|
124
|
+
# merge title cells if necessary
|
|
125
|
+
title_cell: _Cell = container_table.cell(0, 0)
|
|
126
|
+
for idx, _ in enumerate(container_table.columns):
|
|
127
|
+
title_cell.merge(container_table.cell(0, idx))
|
|
128
|
+
|
|
129
|
+
# fill container table titles
|
|
130
|
+
_fill_titles(title_cell, title=self._title, subtitle=self._subtitle)
|
|
131
|
+
# fill container table subtitles
|
|
132
|
+
for idx, subtitle_cell in enumerate(container_table.rows[1].cells):
|
|
133
|
+
_fill_titles(
|
|
134
|
+
subtitle_cell,
|
|
135
|
+
title=self._images[idx]._title,
|
|
136
|
+
subtitle=self._images[idx]._subtitle,
|
|
137
|
+
bold=False,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# fill container table plots
|
|
141
|
+
for idx, content_cell in enumerate(container_table.rows[2].cells):
|
|
142
|
+
content_par: Paragraph = content_cell.paragraphs[0]
|
|
143
|
+
content_run: Run = content_par.add_run()
|
|
144
|
+
content_run.add_picture(
|
|
145
|
+
str(self._images[idx]._file), width=Inches(self._width)
|
|
146
|
+
)
|
|
147
|
+
return document
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Any, Self
|
|
2
|
+
|
|
3
|
+
from docx.document import Document
|
|
4
|
+
from yaml import Loader, MappingNode
|
|
5
|
+
|
|
6
|
+
from tesorotools.render.content.content import Content
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Section:
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
id: str,
|
|
13
|
+
title: str | None = None,
|
|
14
|
+
contents: dict[str, Content] | None = None,
|
|
15
|
+
) -> None:
|
|
16
|
+
self._id: str = id
|
|
17
|
+
self._title: str = title if title is not None else ""
|
|
18
|
+
self._contents: dict[str, Content] = (
|
|
19
|
+
contents if contents is not None else {}
|
|
20
|
+
)
|
|
21
|
+
self._level = 1
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def level(self) -> int:
|
|
25
|
+
return self._level
|
|
26
|
+
|
|
27
|
+
@level.setter
|
|
28
|
+
def level(self, level: int) -> None:
|
|
29
|
+
self._level = level
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def from_yaml(cls, loader: Loader, node: MappingNode) -> Self:
|
|
33
|
+
values: dict[str, Any] = loader.construct_mapping(node, deep=True)
|
|
34
|
+
id: str = values.pop("id")
|
|
35
|
+
title: str = values.pop("title", None)
|
|
36
|
+
contents: dict[str, Content] = values
|
|
37
|
+
section: Self = cls(id=id, title=title, contents=contents)
|
|
38
|
+
section.nest()
|
|
39
|
+
return section
|
|
40
|
+
|
|
41
|
+
def render(self, document: Document) -> Document:
|
|
42
|
+
# Use the "Heading `level`" style from the base document
|
|
43
|
+
document.add_heading(self._title, level=self._level)
|
|
44
|
+
for _, content in self._contents.items():
|
|
45
|
+
document.add_paragraph()
|
|
46
|
+
document = content.render(document)
|
|
47
|
+
return document
|
|
48
|
+
|
|
49
|
+
def nest(self):
|
|
50
|
+
for _, content in self._contents.items():
|
|
51
|
+
if isinstance(content, Section):
|
|
52
|
+
content.level += 1
|
|
53
|
+
content.nest()
|