tesorotools-python 0.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tesorotools/__init__.py +6 -0
- tesorotools/artists/__init__.py +5 -0
- tesorotools/artists/barh_plot.py +310 -0
- tesorotools/artists/line_plot.py +245 -0
- tesorotools/artists/table.py +200 -0
- tesorotools/artists/type_curve.py +218 -0
- tesorotools/assets/README.md +5 -0
- tesorotools/assets/fonts/CabinetGrotesk-Black.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Bold.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Extrabold.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Extralight.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Light.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Medium.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Regular.otf +0 -0
- tesorotools/assets/fonts/CabinetGrotesk-Thin.otf +0 -0
- tesorotools/assets/fonts/README.md +1 -0
- tesorotools/assets/plots.yaml +43 -0
- tesorotools/assets/tesoro.mplstyle +21 -0
- tesorotools/convert.py +99 -0
- tesorotools/data_sources/README.md +14 -0
- tesorotools/data_sources/__init__.py +0 -0
- tesorotools/data_sources/debug.py +26 -0
- tesorotools/data_sources/lseg.py +117 -0
- tesorotools/database/__init__.py +0 -0
- tesorotools/database/push.py +70 -0
- tesorotools/dependencies/__init__.py +0 -0
- tesorotools/dependencies/functions.py +11 -0
- tesorotools/dependencies/node.py +34 -0
- tesorotools/dependencies/resolution.py +118 -0
- tesorotools/main.py +37 -0
- tesorotools/offsets/__init__.py +0 -0
- tesorotools/offsets/offsets.py +439 -0
- tesorotools/offsets/outliers.py +15 -0
- tesorotools/render/__init__.py +17 -0
- tesorotools/render/content/__init__.py +0 -0
- tesorotools/render/content/content.py +17 -0
- tesorotools/render/content/images.py +147 -0
- tesorotools/render/content/section.py +53 -0
- tesorotools/render/content/subtitle.py +53 -0
- tesorotools/render/content/table.py +308 -0
- tesorotools/render/content/text.py +23 -0
- tesorotools/render/content/title.py +40 -0
- tesorotools/render/report.py +31 -0
- tesorotools/utils/__init__.py +0 -0
- tesorotools/utils/config.py +35 -0
- tesorotools/utils/globals.py +14 -0
- tesorotools/utils/matplotlib.py +38 -0
- tesorotools/utils/series.py +40 -0
- tesorotools/utils/shortcuts.py +32 -0
- tesorotools/utils/template.py +126 -0
- tesorotools_python-0.0.18.dist-info/METADATA +16 -0
- tesorotools_python-0.0.18.dist-info/RECORD +53 -0
- tesorotools_python-0.0.18.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import lseg.data as ld
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
# there should be a better way for testing this
|
|
8
|
+
|
|
9
|
+
# for the debt stuff we usually use B_YLD_1 as out value
|
|
10
|
+
# for the rest stuff is usually BID
|
|
11
|
+
# sometimes TRD_PRC_1
|
|
12
|
+
# OFF_CLOSE
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_series(
|
|
16
|
+
api_key: str,
|
|
17
|
+
series_id_list: list[str],
|
|
18
|
+
start_date: str,
|
|
19
|
+
end_date: str,
|
|
20
|
+
freq: str = "B",
|
|
21
|
+
fields: list[str] | None = None,
|
|
22
|
+
cooldown: int = 60,
|
|
23
|
+
datapoint_limit: int = 2_000,
|
|
24
|
+
cache_path: Path | None = None,
|
|
25
|
+
) -> pd.DataFrame:
|
|
26
|
+
"""Downloads data from LSEG given that tou have a valid API key"""
|
|
27
|
+
ld.open_session(app_key=api_key)
|
|
28
|
+
fields = ["TIMESTAMP", "CLOSE"] if fields is None else fields
|
|
29
|
+
|
|
30
|
+
dates_list: list[str] = list(
|
|
31
|
+
pd.date_range(start=start_date, end=end_date, freq=freq).astype("str")
|
|
32
|
+
)
|
|
33
|
+
partial_data: list[pd.DataFrame] = []
|
|
34
|
+
download_step: int = datapoint_limit // (
|
|
35
|
+
len(series_id_list) * (len(fields) - 1)
|
|
36
|
+
)
|
|
37
|
+
downloaded_dates: int = 0
|
|
38
|
+
while downloaded_dates < len(dates_list):
|
|
39
|
+
dates_to_download = dates_list[
|
|
40
|
+
downloaded_dates : downloaded_dates + download_step
|
|
41
|
+
]
|
|
42
|
+
start = dates_to_download[0]
|
|
43
|
+
end = dates_to_download[-1]
|
|
44
|
+
cache_file_path: Path = (
|
|
45
|
+
cache_path / f"from_{start}_to_{end}.csv"
|
|
46
|
+
if cache_path is not None
|
|
47
|
+
else None
|
|
48
|
+
)
|
|
49
|
+
if (cache_file_path is None) or (not cache_file_path.exists()):
|
|
50
|
+
data: pd.DataFrame = block_download(
|
|
51
|
+
series_id_list,
|
|
52
|
+
start_date=start,
|
|
53
|
+
end_date=end,
|
|
54
|
+
freq=freq,
|
|
55
|
+
fields=fields,
|
|
56
|
+
cooldown=cooldown,
|
|
57
|
+
file_path=cache_file_path,
|
|
58
|
+
)
|
|
59
|
+
if cache_file_path is None:
|
|
60
|
+
partial_data.append(data)
|
|
61
|
+
if downloaded_dates + download_step < len(dates_list):
|
|
62
|
+
print(f"Waiting {cooldown} seconds for LSEG to cool down...")
|
|
63
|
+
time.sleep(cooldown)
|
|
64
|
+
downloaded_dates += download_step
|
|
65
|
+
# data = concat_partial_data(cache_path, partial_data)
|
|
66
|
+
return data
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def block_download(
|
|
70
|
+
series_id_list: list[str],
|
|
71
|
+
start_date: str,
|
|
72
|
+
end_date: str,
|
|
73
|
+
freq: str = "B",
|
|
74
|
+
fields: list[str] | None = None,
|
|
75
|
+
cooldown: int = 60,
|
|
76
|
+
file_path: Path | None = None,
|
|
77
|
+
):
|
|
78
|
+
interval = "daily" if freq == "B" else freq
|
|
79
|
+
|
|
80
|
+
while True:
|
|
81
|
+
try:
|
|
82
|
+
data: pd.DataFrame | None = ld.get_history(
|
|
83
|
+
universe=series_id_list,
|
|
84
|
+
start=start_date,
|
|
85
|
+
end=end_date,
|
|
86
|
+
fields=fields,
|
|
87
|
+
interval=interval,
|
|
88
|
+
)
|
|
89
|
+
if data is None:
|
|
90
|
+
raise ld.errors.LDError(
|
|
91
|
+
code=404, message="Service temporarily unavailable"
|
|
92
|
+
)
|
|
93
|
+
data = data.drop_duplicates()
|
|
94
|
+
data = data.sort_index()
|
|
95
|
+
if len(data.columns) == 1:
|
|
96
|
+
data.columns = series_id_list
|
|
97
|
+
if file_path is not None:
|
|
98
|
+
data.to_csv(file_path)
|
|
99
|
+
break
|
|
100
|
+
except ld.errors.LDError as e:
|
|
101
|
+
print(f"LSEG error: {e}")
|
|
102
|
+
print("This is probably not our fault")
|
|
103
|
+
print(f"Waiting {cooldown} seconds for LSEG to cool down...")
|
|
104
|
+
time.sleep(cooldown)
|
|
105
|
+
return data
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def concat_partial_data(
|
|
109
|
+
cache_path: Path, partial_data: list[pd.DataFrame]
|
|
110
|
+
) -> pd.DataFrame:
|
|
111
|
+
dfs = partial_data
|
|
112
|
+
if cache_path is not None:
|
|
113
|
+
for chunk in cache_path.iterdir():
|
|
114
|
+
df = pd.read_csv(chunk, index_col="Date")
|
|
115
|
+
dfs.append(df)
|
|
116
|
+
full = pd.concat(dfs)
|
|
117
|
+
return full
|
|
File without changes
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sqlalchemy import create_engine
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_connection_string(
|
|
8
|
+
engine: str,
|
|
9
|
+
driver: str,
|
|
10
|
+
username: str,
|
|
11
|
+
password: str,
|
|
12
|
+
host: str,
|
|
13
|
+
database: str,
|
|
14
|
+
) -> str:
|
|
15
|
+
return f"{engine}+{driver}://{username}:{password}@{host}/{database}"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def flash_to_database_format(data: pd.DataFrame) -> pd.DataFrame:
|
|
19
|
+
"""Transform the data in *flash* format to data in *database* format
|
|
20
|
+
|
|
21
|
+
This operation takes a little while"""
|
|
22
|
+
|
|
23
|
+
data.columns.name = "name"
|
|
24
|
+
stacked_data = data.stack().to_frame(name="value")
|
|
25
|
+
database_data = stacked_data.reset_index()
|
|
26
|
+
database_data["dimensions"] = database_data.apply(
|
|
27
|
+
lambda row: json.dumps(
|
|
28
|
+
{
|
|
29
|
+
"offset": row["offset"],
|
|
30
|
+
"difference_type": row["difference_type"],
|
|
31
|
+
"stat": row["stat"],
|
|
32
|
+
}
|
|
33
|
+
),
|
|
34
|
+
axis=1,
|
|
35
|
+
)
|
|
36
|
+
database_data = database_data.drop(
|
|
37
|
+
columns=["offset", "difference_type", "stat"]
|
|
38
|
+
)
|
|
39
|
+
database_data["value_meta"] = pd.NA
|
|
40
|
+
print(database_data)
|
|
41
|
+
return database_data
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def database_to_flash_format():
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def push_to_database(data: pd.DataFrame, conn_string: str, table: str) -> None:
|
|
49
|
+
engine = create_engine(url=conn_string)
|
|
50
|
+
data.to_sql(
|
|
51
|
+
name=table, con=engine, if_exists="append", chunksize=1000, index=False
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# this may be just as a datasource (maybe a common protocol?)
|
|
56
|
+
# check the api long video for further reference
|
|
57
|
+
def pull_from_database(
|
|
58
|
+
conn_string: str, start: str, end: str, series: list[str]
|
|
59
|
+
):
|
|
60
|
+
# just testing...
|
|
61
|
+
pd.read_sql_query(
|
|
62
|
+
"""
|
|
63
|
+
SELECT date, value
|
|
64
|
+
FROM measurements
|
|
65
|
+
WHERE
|
|
66
|
+
date BETWEEN '2007-12-31' AND '2025-04-10' AND name='ES10YT=RR_DIFF' AND
|
|
67
|
+
dimensions @> '{"offset":"no", "stat":"value"}'
|
|
68
|
+
ORDER BY date ASC
|
|
69
|
+
"""
|
|
70
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import Self
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from . import functions
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Node:
|
|
10
|
+
def __init__(self, name: str) -> None:
|
|
11
|
+
self._name: str = name
|
|
12
|
+
self._edges: list[Self] = []
|
|
13
|
+
|
|
14
|
+
def add_edge(self, node: Self) -> None:
|
|
15
|
+
self._edges.append(node)
|
|
16
|
+
|
|
17
|
+
def build_edges(self, *, dependencies: list[str], function: str) -> None:
|
|
18
|
+
self._resolving_function: Callable[..., float | pd.Series] = getattr(
|
|
19
|
+
functions, function
|
|
20
|
+
)
|
|
21
|
+
for d in dependencies:
|
|
22
|
+
self.add_edge(Node(d))
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
return self._name
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def edges(self) -> list[Self]:
|
|
30
|
+
return self._edges
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def resolving_function(self) -> Callable[..., float | pd.Series]:
|
|
34
|
+
return self._resolving_function
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from tesorotools.offsets.offsets import process_raw_data, trim
|
|
6
|
+
|
|
7
|
+
from .node import Node
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def resolve(
|
|
11
|
+
start: Node,
|
|
12
|
+
resolved: list[Node],
|
|
13
|
+
unresolved: list[str],
|
|
14
|
+
independent: set[str],
|
|
15
|
+
dependencies_cfg: dict[str, Any],
|
|
16
|
+
):
|
|
17
|
+
if start.name in dependencies_cfg:
|
|
18
|
+
is_independent: bool = False
|
|
19
|
+
config: dict[str, Any] = dependencies_cfg[start.name]
|
|
20
|
+
start.build_edges(**config)
|
|
21
|
+
else:
|
|
22
|
+
is_independent: bool = True
|
|
23
|
+
independent.add(start.name)
|
|
24
|
+
|
|
25
|
+
if not is_independent:
|
|
26
|
+
unresolved.append(start.name)
|
|
27
|
+
for node in start.edges:
|
|
28
|
+
if node.name in unresolved:
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"circular dependency: {start.name} <-> {node.name}"
|
|
31
|
+
)
|
|
32
|
+
if node not in resolved:
|
|
33
|
+
resolve(
|
|
34
|
+
node, resolved, unresolved, independent, dependencies_cfg
|
|
35
|
+
)
|
|
36
|
+
resolved.append(start)
|
|
37
|
+
unresolved.remove(start.name)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def collect_document_series(
|
|
41
|
+
config_dicts: list[dict[str, Any]], find: str = "series"
|
|
42
|
+
) -> list[str]:
|
|
43
|
+
series: set[str] = set()
|
|
44
|
+
for config_dict in config_dicts:
|
|
45
|
+
series = series | collect_series(config_dict, find)
|
|
46
|
+
return list(series)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def resolve_series(
|
|
50
|
+
config_dicts: list[dict[str, Any]], dependencies_cfg: dict[str, Any]
|
|
51
|
+
):
|
|
52
|
+
series: list[str] = collect_document_series(config_dicts)
|
|
53
|
+
nodes: list[Node] = [Node(name=name) for name in series]
|
|
54
|
+
independent_nodes: set[str] = set()
|
|
55
|
+
resolved: list[Node] = []
|
|
56
|
+
for node in nodes:
|
|
57
|
+
resolve(
|
|
58
|
+
start=node,
|
|
59
|
+
resolved=resolved,
|
|
60
|
+
unresolved=[],
|
|
61
|
+
independent=independent_nodes,
|
|
62
|
+
dependencies_cfg=dependencies_cfg,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"independent": independent_nodes,
|
|
67
|
+
"dependent": resolved,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def compute_derivate_series(
|
|
72
|
+
dependent_nodes: list[Node], trimmed_data: pd.DataFrame
|
|
73
|
+
):
|
|
74
|
+
inferred_series: list[pd.Series] = []
|
|
75
|
+
for node in dependent_nodes:
|
|
76
|
+
dependencies_names: list[str] = [n.name for n in node.edges]
|
|
77
|
+
dependencies_df = trimmed_data.loc[:, dependencies_names]
|
|
78
|
+
dependencies_dict = dependencies_df.to_dict(orient="series").values()
|
|
79
|
+
inferred: pd.Series = node.resolving_function(*dependencies_dict)
|
|
80
|
+
inferred.name = node.name
|
|
81
|
+
inferred_series.append(inferred)
|
|
82
|
+
inferred_df: pd.DataFrame = pd.concat(inferred_series, axis=1)
|
|
83
|
+
return inferred_df
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def concat_derivate_series(
|
|
87
|
+
independent_full_df: pd.DataFrame,
|
|
88
|
+
derivate_trimmed_df: pd.DataFrame,
|
|
89
|
+
offsets_config: dict[str, Any],
|
|
90
|
+
force_trim: bool = False,
|
|
91
|
+
) -> pd.DataFrame:
|
|
92
|
+
|
|
93
|
+
# useful when adding emergency fixed offsets
|
|
94
|
+
if force_trim:
|
|
95
|
+
independent_full_df: pd.DataFrame = process_raw_data(
|
|
96
|
+
trim(independent_full_df), **offsets_config
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
derivate_full_df: pd.DataFrame = process_raw_data(
|
|
100
|
+
derivate_trimmed_df, **offsets_config
|
|
101
|
+
)
|
|
102
|
+
full: pd.DataFrame = pd.concat(
|
|
103
|
+
[independent_full_df, derivate_full_df], axis=1
|
|
104
|
+
)
|
|
105
|
+
return full
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def collect_series(
|
|
109
|
+
config_dict: dict[str, Any], find: str = "series"
|
|
110
|
+
) -> set[str]:
|
|
111
|
+
series: set[str] = set()
|
|
112
|
+
if find in config_dict:
|
|
113
|
+
config_series: dict[str, str] = config_dict[find]
|
|
114
|
+
series = series | set(config_series.keys())
|
|
115
|
+
for k, v in config_dict.items():
|
|
116
|
+
if k != find and isinstance(v, dict):
|
|
117
|
+
series = series | collect_series(v, find)
|
|
118
|
+
return series
|
tesorotools/main.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import docx
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from tesorotools.artists.table import generate_tables_from_flash
|
|
7
|
+
from tesorotools.render.content.table import render_table
|
|
8
|
+
from tesorotools.utils.config import read_config
|
|
9
|
+
from tesorotools.utils.globals import DEBUG, EXAMPLES
|
|
10
|
+
|
|
11
|
+
if __name__ == "__main__":
|
|
12
|
+
# test tables here
|
|
13
|
+
table_config_file: Path = EXAMPLES / "tables.yaml"
|
|
14
|
+
offsets_config_file: Path = EXAMPLES / "offsets.yaml"
|
|
15
|
+
|
|
16
|
+
config_dicts = read_config(table_config_file)
|
|
17
|
+
offsets_config = read_config(offsets_config_file)
|
|
18
|
+
|
|
19
|
+
flash = pd.read_feather("derivates.feather")
|
|
20
|
+
generate_tables_from_flash(flash, config_dicts)
|
|
21
|
+
|
|
22
|
+
document = docx.Document("template.docx")
|
|
23
|
+
for table_path in (DEBUG / "table").iterdir():
|
|
24
|
+
if table_path.stem.endswith(("color", "shade")):
|
|
25
|
+
continue
|
|
26
|
+
table_dict = config_dicts[table_path.stem]
|
|
27
|
+
table: pd.DataFrame = pd.read_feather(table_path)
|
|
28
|
+
color: pd.DataFrame = pd.read_feather(
|
|
29
|
+
table_path.parent / f"{table_path.stem}_color.feather"
|
|
30
|
+
)
|
|
31
|
+
shade: pd.DataFrame = pd.read_feather(
|
|
32
|
+
table_path.parent / f"{table_path.stem}_shade.feather"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
render_table(table, color, shade, document, **table_dict)
|
|
36
|
+
document.add_paragraph()
|
|
37
|
+
document.save("test.docx")
|
|
File without changes
|