tesorotools-python 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tesorotools/__init__.py +6 -0
  2. tesorotools/artists/__init__.py +5 -0
  3. tesorotools/artists/barh_plot.py +310 -0
  4. tesorotools/artists/line_plot.py +245 -0
  5. tesorotools/artists/table.py +200 -0
  6. tesorotools/artists/type_curve.py +218 -0
  7. tesorotools/assets/README.md +5 -0
  8. tesorotools/assets/fonts/CabinetGrotesk-Black.otf +0 -0
  9. tesorotools/assets/fonts/CabinetGrotesk-Bold.otf +0 -0
  10. tesorotools/assets/fonts/CabinetGrotesk-Extrabold.otf +0 -0
  11. tesorotools/assets/fonts/CabinetGrotesk-Extralight.otf +0 -0
  12. tesorotools/assets/fonts/CabinetGrotesk-Light.otf +0 -0
  13. tesorotools/assets/fonts/CabinetGrotesk-Medium.otf +0 -0
  14. tesorotools/assets/fonts/CabinetGrotesk-Regular.otf +0 -0
  15. tesorotools/assets/fonts/CabinetGrotesk-Thin.otf +0 -0
  16. tesorotools/assets/fonts/README.md +1 -0
  17. tesorotools/assets/plots.yaml +43 -0
  18. tesorotools/assets/tesoro.mplstyle +21 -0
  19. tesorotools/convert.py +99 -0
  20. tesorotools/data_sources/README.md +14 -0
  21. tesorotools/data_sources/__init__.py +0 -0
  22. tesorotools/data_sources/debug.py +26 -0
  23. tesorotools/data_sources/lseg.py +117 -0
  24. tesorotools/database/__init__.py +0 -0
  25. tesorotools/database/push.py +70 -0
  26. tesorotools/dependencies/__init__.py +0 -0
  27. tesorotools/dependencies/functions.py +11 -0
  28. tesorotools/dependencies/node.py +34 -0
  29. tesorotools/dependencies/resolution.py +118 -0
  30. tesorotools/main.py +37 -0
  31. tesorotools/offsets/__init__.py +0 -0
  32. tesorotools/offsets/offsets.py +439 -0
  33. tesorotools/offsets/outliers.py +15 -0
  34. tesorotools/render/__init__.py +17 -0
  35. tesorotools/render/content/__init__.py +0 -0
  36. tesorotools/render/content/content.py +17 -0
  37. tesorotools/render/content/images.py +147 -0
  38. tesorotools/render/content/section.py +53 -0
  39. tesorotools/render/content/subtitle.py +53 -0
  40. tesorotools/render/content/table.py +308 -0
  41. tesorotools/render/content/text.py +23 -0
  42. tesorotools/render/content/title.py +40 -0
  43. tesorotools/render/report.py +31 -0
  44. tesorotools/utils/__init__.py +0 -0
  45. tesorotools/utils/config.py +35 -0
  46. tesorotools/utils/globals.py +14 -0
  47. tesorotools/utils/matplotlib.py +38 -0
  48. tesorotools/utils/series.py +40 -0
  49. tesorotools/utils/shortcuts.py +32 -0
  50. tesorotools/utils/template.py +126 -0
  51. tesorotools_python-0.0.18.dist-info/METADATA +16 -0
  52. tesorotools_python-0.0.18.dist-info/RECORD +53 -0
  53. tesorotools_python-0.0.18.dist-info/WHEEL +4 -0
@@ -0,0 +1,117 @@
1
+ import time
2
+ from pathlib import Path
3
+
4
+ import lseg.data as ld
5
+ import pandas as pd
6
+
7
+ # there should be a better way for testing this
8
+
9
+ # for the debt stuff we usually use B_YLD_1 as out value
10
+ # for the rest stuff is usually BID
11
+ # sometimes TRD_PRC_1
12
+ # OFF_CLOSE
13
+
14
+
15
+ def get_series(
16
+ api_key: str,
17
+ series_id_list: list[str],
18
+ start_date: str,
19
+ end_date: str,
20
+ freq: str = "B",
21
+ fields: list[str] | None = None,
22
+ cooldown: int = 60,
23
+ datapoint_limit: int = 2_000,
24
+ cache_path: Path | None = None,
25
+ ) -> pd.DataFrame:
26
+ """Downloads data from LSEG given that tou have a valid API key"""
27
+ ld.open_session(app_key=api_key)
28
+ fields = ["TIMESTAMP", "CLOSE"] if fields is None else fields
29
+
30
+ dates_list: list[str] = list(
31
+ pd.date_range(start=start_date, end=end_date, freq=freq).astype("str")
32
+ )
33
+ partial_data: list[pd.DataFrame] = []
34
+ download_step: int = datapoint_limit // (
35
+ len(series_id_list) * (len(fields) - 1)
36
+ )
37
+ downloaded_dates: int = 0
38
+ while downloaded_dates < len(dates_list):
39
+ dates_to_download = dates_list[
40
+ downloaded_dates : downloaded_dates + download_step
41
+ ]
42
+ start = dates_to_download[0]
43
+ end = dates_to_download[-1]
44
+ cache_file_path: Path = (
45
+ cache_path / f"from_{start}_to_{end}.csv"
46
+ if cache_path is not None
47
+ else None
48
+ )
49
+ if (cache_file_path is None) or (not cache_file_path.exists()):
50
+ data: pd.DataFrame = block_download(
51
+ series_id_list,
52
+ start_date=start,
53
+ end_date=end,
54
+ freq=freq,
55
+ fields=fields,
56
+ cooldown=cooldown,
57
+ file_path=cache_file_path,
58
+ )
59
+ if cache_file_path is None:
60
+ partial_data.append(data)
61
+ if downloaded_dates + download_step < len(dates_list):
62
+ print(f"Waiting {cooldown} seconds for LSEG to cool down...")
63
+ time.sleep(cooldown)
64
+ downloaded_dates += download_step
65
+ # data = concat_partial_data(cache_path, partial_data)
66
+ return data
67
+
68
+
69
+ def block_download(
70
+ series_id_list: list[str],
71
+ start_date: str,
72
+ end_date: str,
73
+ freq: str = "B",
74
+ fields: list[str] | None = None,
75
+ cooldown: int = 60,
76
+ file_path: Path | None = None,
77
+ ):
78
+ interval = "daily" if freq == "B" else freq
79
+
80
+ while True:
81
+ try:
82
+ data: pd.DataFrame | None = ld.get_history(
83
+ universe=series_id_list,
84
+ start=start_date,
85
+ end=end_date,
86
+ fields=fields,
87
+ interval=interval,
88
+ )
89
+ if data is None:
90
+ raise ld.errors.LDError(
91
+ code=404, message="Service temporarily unavailable"
92
+ )
93
+ data = data.drop_duplicates()
94
+ data = data.sort_index()
95
+ if len(data.columns) == 1:
96
+ data.columns = series_id_list
97
+ if file_path is not None:
98
+ data.to_csv(file_path)
99
+ break
100
+ except ld.errors.LDError as e:
101
+ print(f"LSEG error: {e}")
102
+ print("This is probably not our fault")
103
+ print(f"Waiting {cooldown} seconds for LSEG to cool down...")
104
+ time.sleep(cooldown)
105
+ return data
106
+
107
+
108
+ def concat_partial_data(
109
+ cache_path: Path, partial_data: list[pd.DataFrame]
110
+ ) -> pd.DataFrame:
111
+ dfs = partial_data
112
+ if cache_path is not None:
113
+ for chunk in cache_path.iterdir():
114
+ df = pd.read_csv(chunk, index_col="Date")
115
+ dfs.append(df)
116
+ full = pd.concat(dfs)
117
+ return full
File without changes
@@ -0,0 +1,70 @@
1
+ import json
2
+
3
+ import pandas as pd
4
+ from sqlalchemy import create_engine
5
+
6
+
7
+ def get_connection_string(
8
+ engine: str,
9
+ driver: str,
10
+ username: str,
11
+ password: str,
12
+ host: str,
13
+ database: str,
14
+ ) -> str:
15
+ return f"{engine}+{driver}://{username}:{password}@{host}/{database}"
16
+
17
+
18
+ def flash_to_database_format(data: pd.DataFrame) -> pd.DataFrame:
19
+ """Transform the data in *flash* format to data in *database* format
20
+
21
+ This operation takes a little while"""
22
+
23
+ data.columns.name = "name"
24
+ stacked_data = data.stack().to_frame(name="value")
25
+ database_data = stacked_data.reset_index()
26
+ database_data["dimensions"] = database_data.apply(
27
+ lambda row: json.dumps(
28
+ {
29
+ "offset": row["offset"],
30
+ "difference_type": row["difference_type"],
31
+ "stat": row["stat"],
32
+ }
33
+ ),
34
+ axis=1,
35
+ )
36
+ database_data = database_data.drop(
37
+ columns=["offset", "difference_type", "stat"]
38
+ )
39
+ database_data["value_meta"] = pd.NA
40
+ print(database_data)
41
+ return database_data
42
+
43
+
44
+ def database_to_flash_format():
45
+ pass
46
+
47
+
48
+ def push_to_database(data: pd.DataFrame, conn_string: str, table: str) -> None:
49
+ engine = create_engine(url=conn_string)
50
+ data.to_sql(
51
+ name=table, con=engine, if_exists="append", chunksize=1000, index=False
52
+ )
53
+
54
+
55
+ # this may be just as a datasource (maybe a common protocol?)
56
+ # check the api long video for further reference
57
+ def pull_from_database(
58
+ conn_string: str, start: str, end: str, series: list[str]
59
+ ):
60
+ # just testing...
61
+ pd.read_sql_query(
62
+ """
63
+ SELECT date, value
64
+ FROM measurements
65
+ WHERE
66
+ date BETWEEN '2007-12-31' AND '2025-04-10' AND name='ES10YT=RR_DIFF' AND
67
+ dimensions @> '{"offset":"no", "stat":"value"}'
68
+ ORDER BY date ASC
69
+ """
70
+ )
File without changes
@@ -0,0 +1,11 @@
1
+ import pandas as pd
2
+
3
+
4
+ def difference(
5
+ target: float | pd.Series, reference: float | pd.Series
6
+ ) -> float | pd.Series:
7
+ return target - reference
8
+
9
+
10
+ def inverse(target: float | pd.Series) -> float | pd.Series:
11
+ return 1 / target
@@ -0,0 +1,34 @@
1
+ from collections.abc import Callable
2
+ from typing import Self
3
+
4
+ import pandas as pd
5
+
6
+ from . import functions
7
+
8
+
9
+ class Node:
10
+ def __init__(self, name: str) -> None:
11
+ self._name: str = name
12
+ self._edges: list[Self] = []
13
+
14
+ def add_edge(self, node: Self) -> None:
15
+ self._edges.append(node)
16
+
17
+ def build_edges(self, *, dependencies: list[str], function: str) -> None:
18
+ self._resolving_function: Callable[..., float | pd.Series] = getattr(
19
+ functions, function
20
+ )
21
+ for d in dependencies:
22
+ self.add_edge(Node(d))
23
+
24
+ @property
25
+ def name(self) -> str:
26
+ return self._name
27
+
28
+ @property
29
+ def edges(self) -> list[Self]:
30
+ return self._edges
31
+
32
+ @property
33
+ def resolving_function(self) -> Callable[..., float | pd.Series]:
34
+ return self._resolving_function
@@ -0,0 +1,118 @@
1
+ from typing import Any
2
+
3
+ import pandas as pd
4
+
5
+ from tesorotools.offsets.offsets import process_raw_data, trim
6
+
7
+ from .node import Node
8
+
9
+
10
+ def resolve(
11
+ start: Node,
12
+ resolved: list[Node],
13
+ unresolved: list[str],
14
+ independent: set[str],
15
+ dependencies_cfg: dict[str, Any],
16
+ ):
17
+ if start.name in dependencies_cfg:
18
+ is_independent: bool = False
19
+ config: dict[str, Any] = dependencies_cfg[start.name]
20
+ start.build_edges(**config)
21
+ else:
22
+ is_independent: bool = True
23
+ independent.add(start.name)
24
+
25
+ if not is_independent:
26
+ unresolved.append(start.name)
27
+ for node in start.edges:
28
+ if node.name in unresolved:
29
+ raise ValueError(
30
+ f"circular dependency: {start.name} <-> {node.name}"
31
+ )
32
+ if node not in resolved:
33
+ resolve(
34
+ node, resolved, unresolved, independent, dependencies_cfg
35
+ )
36
+ resolved.append(start)
37
+ unresolved.remove(start.name)
38
+
39
+
40
+ def collect_document_series(
41
+ config_dicts: list[dict[str, Any]], find: str = "series"
42
+ ) -> list[str]:
43
+ series: set[str] = set()
44
+ for config_dict in config_dicts:
45
+ series = series | collect_series(config_dict, find)
46
+ return list(series)
47
+
48
+
49
+ def resolve_series(
50
+ config_dicts: list[dict[str, Any]], dependencies_cfg: dict[str, Any]
51
+ ):
52
+ series: list[str] = collect_document_series(config_dicts)
53
+ nodes: list[Node] = [Node(name=name) for name in series]
54
+ independent_nodes: set[str] = set()
55
+ resolved: list[Node] = []
56
+ for node in nodes:
57
+ resolve(
58
+ start=node,
59
+ resolved=resolved,
60
+ unresolved=[],
61
+ independent=independent_nodes,
62
+ dependencies_cfg=dependencies_cfg,
63
+ )
64
+
65
+ return {
66
+ "independent": independent_nodes,
67
+ "dependent": resolved,
68
+ }
69
+
70
+
71
+ def compute_derivate_series(
72
+ dependent_nodes: list[Node], trimmed_data: pd.DataFrame
73
+ ):
74
+ inferred_series: list[pd.Series] = []
75
+ for node in dependent_nodes:
76
+ dependencies_names: list[str] = [n.name for n in node.edges]
77
+ dependencies_df = trimmed_data.loc[:, dependencies_names]
78
+ dependencies_dict = dependencies_df.to_dict(orient="series").values()
79
+ inferred: pd.Series = node.resolving_function(*dependencies_dict)
80
+ inferred.name = node.name
81
+ inferred_series.append(inferred)
82
+ inferred_df: pd.DataFrame = pd.concat(inferred_series, axis=1)
83
+ return inferred_df
84
+
85
+
86
+ def concat_derivate_series(
87
+ independent_full_df: pd.DataFrame,
88
+ derivate_trimmed_df: pd.DataFrame,
89
+ offsets_config: dict[str, Any],
90
+ force_trim: bool = False,
91
+ ) -> pd.DataFrame:
92
+
93
+ # useful when adding emergency fixed offsets
94
+ if force_trim:
95
+ independent_full_df: pd.DataFrame = process_raw_data(
96
+ trim(independent_full_df), **offsets_config
97
+ )
98
+
99
+ derivate_full_df: pd.DataFrame = process_raw_data(
100
+ derivate_trimmed_df, **offsets_config
101
+ )
102
+ full: pd.DataFrame = pd.concat(
103
+ [independent_full_df, derivate_full_df], axis=1
104
+ )
105
+ return full
106
+
107
+
108
+ def collect_series(
109
+ config_dict: dict[str, Any], find: str = "series"
110
+ ) -> set[str]:
111
+ series: set[str] = set()
112
+ if find in config_dict:
113
+ config_series: dict[str, str] = config_dict[find]
114
+ series = series | set(config_series.keys())
115
+ for k, v in config_dict.items():
116
+ if k != find and isinstance(v, dict):
117
+ series = series | collect_series(v, find)
118
+ return series
tesorotools/main.py ADDED
@@ -0,0 +1,37 @@
1
+ from pathlib import Path
2
+
3
+ import docx
4
+ import pandas as pd
5
+
6
+ from tesorotools.artists.table import generate_tables_from_flash
7
+ from tesorotools.render.content.table import render_table
8
+ from tesorotools.utils.config import read_config
9
+ from tesorotools.utils.globals import DEBUG, EXAMPLES
10
+
11
+ if __name__ == "__main__":
12
+ # test tables here
13
+ table_config_file: Path = EXAMPLES / "tables.yaml"
14
+ offsets_config_file: Path = EXAMPLES / "offsets.yaml"
15
+
16
+ config_dicts = read_config(table_config_file)
17
+ offsets_config = read_config(offsets_config_file)
18
+
19
+ flash = pd.read_feather("derivates.feather")
20
+ generate_tables_from_flash(flash, config_dicts)
21
+
22
+ document = docx.Document("template.docx")
23
+ for table_path in (DEBUG / "table").iterdir():
24
+ if table_path.stem.endswith(("color", "shade")):
25
+ continue
26
+ table_dict = config_dicts[table_path.stem]
27
+ table: pd.DataFrame = pd.read_feather(table_path)
28
+ color: pd.DataFrame = pd.read_feather(
29
+ table_path.parent / f"{table_path.stem}_color.feather"
30
+ )
31
+ shade: pd.DataFrame = pd.read_feather(
32
+ table_path.parent / f"{table_path.stem}_shade.feather"
33
+ )
34
+
35
+ render_table(table, color, shade, document, **table_dict)
36
+ document.add_paragraph()
37
+ document.save("test.docx")
File without changes