tda-finance-mapper 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tda_finance_mapper-0.1.0/LICENSE +21 -0
- tda_finance_mapper-0.1.0/PKG-INFO +299 -0
- tda_finance_mapper-0.1.0/README.md +258 -0
- tda_finance_mapper-0.1.0/pyproject.toml +85 -0
- tda_finance_mapper-0.1.0/setup.cfg +4 -0
- tda_finance_mapper-0.1.0/src/tda_finance/__init__.py +3 -0
- tda_finance_mapper-0.1.0/src/tda_finance/data_preprocessing/__init__.py +1 -0
- tda_finance_mapper-0.1.0/src/tda_finance/data_preprocessing/preprocess_kf49.py +100 -0
- tda_finance_mapper-0.1.0/src/tda_finance/data_preprocessing/preprocess_sp500_crsp.py +149 -0
- tda_finance_mapper-0.1.0/src/tda_finance/experiments/__init__.py +1 -0
- tda_finance_mapper-0.1.0/src/tda_finance/experiments/run_mapper_ph_experiments.py +331 -0
- tda_finance_mapper-0.1.0/src/tda_finance/portfolio/__init__.py +1 -0
- tda_finance_mapper-0.1.0/src/tda_finance/portfolio/backtest_engine.py +520 -0
- tda_finance_mapper-0.1.0/src/tda_finance/tda/__init__.py +1 -0
- tda_finance_mapper-0.1.0/src/tda_finance/tda/mapper_clustering.py +567 -0
- tda_finance_mapper-0.1.0/src/tda_finance/tda/persistence_diagrams.py +219 -0
- tda_finance_mapper-0.1.0/src/tda_finance/tda/persistence_features.py +83 -0
- tda_finance_mapper-0.1.0/src/tda_finance/tda/regime_detection.py +118 -0
- tda_finance_mapper-0.1.0/src/tda_finance_mapper.egg-info/PKG-INFO +299 -0
- tda_finance_mapper-0.1.0/src/tda_finance_mapper.egg-info/SOURCES.txt +21 -0
- tda_finance_mapper-0.1.0/src/tda_finance_mapper.egg-info/dependency_links.txt +1 -0
- tda_finance_mapper-0.1.0/src/tda_finance_mapper.egg-info/requires.txt +22 -0
- tda_finance_mapper-0.1.0/src/tda_finance_mapper.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jaime Corzo Galdó
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tda-finance-mapper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Aplicación de análisis topológico de datos a carteras financieras.
|
|
5
|
+
Author-email: Jaime Corzo Galdó <jcggranada04@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jaimecrz3/tda-finance-mapper
|
|
8
|
+
Project-URL: Repository, https://github.com/jaimecrz3/tda-finance-mapper
|
|
9
|
+
Project-URL: Documentation, https://github.com/jaimecrz3/tda-finance-mapper
|
|
10
|
+
Project-URL: Issues, https://github.com/jaimecrz3/tda-finance-mapper/issues
|
|
11
|
+
Keywords: topological-data-analysis,finance,mapper,persistent-homology,portfolio-construction
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Requires-Dist: scikit-learn>=1.2
|
|
22
|
+
Requires-Dist: umap-learn
|
|
23
|
+
Requires-Dist: kmapper
|
|
24
|
+
Requires-Dist: networkx
|
|
25
|
+
Requires-Dist: ripser
|
|
26
|
+
Requires-Dist: persim
|
|
27
|
+
Requires-Dist: gudhi
|
|
28
|
+
Requires-Dist: matplotlib
|
|
29
|
+
Requires-Dist: pyarrow
|
|
30
|
+
Requires-Dist: pandas-datareader
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: flake8; extra == "dev"
|
|
33
|
+
Requires-Dist: flake8-pyproject; extra == "dev"
|
|
34
|
+
Requires-Dist: pyright; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest; extra == "dev"
|
|
36
|
+
Requires-Dist: build; extra == "dev"
|
|
37
|
+
Requires-Dist: twine; extra == "dev"
|
|
38
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
39
|
+
Requires-Dist: sphinx-rtd-theme; extra == "dev"
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
|
|
42
|
+
# TDA Finance Mapper
|
|
43
|
+
|
|
44
|
+
`tda-finance-mapper` is a Python package developed as part of an academic project on the application of Topological Data Analysis (TDA) to financial data.
|
|
45
|
+
|
|
46
|
+
The package provides tools to build Mapper-based portfolio strategies, compute persistent-homology regime signals and evaluate the resulting portfolios through causal backtesting.
|
|
47
|
+
|
|
48
|
+
## Overview
|
|
49
|
+
|
|
50
|
+
The project studies whether topological information extracted from financial return windows can be used for:
|
|
51
|
+
|
|
52
|
+
1. market-structure analysis;
|
|
53
|
+
2. regime detection;
|
|
54
|
+
3. portfolio construction;
|
|
55
|
+
4. comparison against a simple equal-weight benchmark.
|
|
56
|
+
|
|
57
|
+
The main implemented models are:
|
|
58
|
+
|
|
59
|
+
- **Mapper portfolio**: assets are represented by recent return vectors, a Mapper graph is built, and the graph structure is transformed into portfolio weights.
|
|
60
|
+
- **Mapper + persistent homology**: Mapper remains the main portfolio construction method, while persistent homology acts as a regime-control signal.
|
|
61
|
+
- **Equal-weight benchmark**: all available assets receive the same weight and are evaluated with the same backtesting protocol.
|
|
62
|
+
|
|
63
|
+
## Disclaimer
|
|
64
|
+
|
|
65
|
+
This project is for academic and research purposes only. It is not financial advice, investment advice or a production trading system. The results are intended to illustrate and evaluate a methodological pipeline, not to recommend real investment decisions.
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
Clone the repository:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
git clone https://github.com/usuario/tda-finance-mapper.git
|
|
73
|
+
cd tda-finance-mapper
|
|
74
|
+
````
|
|
75
|
+
|
|
76
|
+
Create and activate a virtual environment:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
python -m venv .venv
|
|
80
|
+
source .venv/bin/activate
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
On Windows:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
.venv\Scripts\activate
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Install the package in editable mode:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pip install -r requirements.txt
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
The `requirements.txt` file installs the package with:
|
|
96
|
+
|
|
97
|
+
```txt
|
|
98
|
+
-e .
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
This means that changes made in the source code are immediately available without reinstalling the package.
|
|
102
|
+
|
|
103
|
+
For development tools such as `flake8`, `pytest`, `sphinx`, `build` and `twine`, install the optional development dependencies:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pip install -e .[dev]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Project structure
|
|
110
|
+
|
|
111
|
+
```text
|
|
112
|
+
tda-finance-mapper/
|
|
113
|
+
├── data/
|
|
114
|
+
├── docs/
|
|
115
|
+
├── results_49_Industry_Portfolios/
|
|
116
|
+
├── results_SP500_CRSP/
|
|
117
|
+
├── scripts/
|
|
118
|
+
├── src/
|
|
119
|
+
│ └── tda_finance/
|
|
120
|
+
│ ├── data_preprocessing/
|
|
121
|
+
│ ├── experiments/
|
|
122
|
+
│ ├── portfolio/
|
|
123
|
+
│ └── tda/
|
|
124
|
+
├── pyproject.toml
|
|
125
|
+
├── requirements.txt
|
|
126
|
+
├── LICENSE
|
|
127
|
+
└── README.md
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
The main package is located under `src/tda_finance`.
|
|
131
|
+
|
|
132
|
+
The `scripts/` folder contains auxiliary scripts used during data preparation. These scripts are not part of the main package API.
|
|
133
|
+
|
|
134
|
+
## Main modules
|
|
135
|
+
|
|
136
|
+
### `tda_finance.tda.mapper_clustering`
|
|
137
|
+
|
|
138
|
+
Builds Mapper graphs from financial price windows and converts Mapper clusters into portfolio weights.
|
|
139
|
+
|
|
140
|
+
### `tda_finance.tda.persistence_diagrams`
|
|
141
|
+
|
|
142
|
+
Computes correlation-based distance matrices and persistent-homology diagrams.
|
|
143
|
+
|
|
144
|
+
### `tda_finance.tda.persistence_features`
|
|
145
|
+
|
|
146
|
+
Extracts summary features from persistence diagrams.
|
|
147
|
+
|
|
148
|
+
### `tda_finance.tda.regime_detection`
|
|
149
|
+
|
|
150
|
+
Computes persistence-landscape norms and detects topological anomalies.
|
|
151
|
+
|
|
152
|
+
### `tda_finance.portfolio.backtest_engine`
|
|
153
|
+
|
|
154
|
+
Runs causal long-only backtests, computes portfolio returns, turnover and performance metrics.
|
|
155
|
+
|
|
156
|
+
### `tda_finance.data_preprocessing`
|
|
157
|
+
|
|
158
|
+
Contains utilities to preprocess Kenneth French 49 Industry Portfolios and S&P 500 CRSP monthly data.
|
|
159
|
+
|
|
160
|
+
## Minimal API example
|
|
161
|
+
|
|
162
|
+
The following example shows the basic use of the package API with a generic price matrix.
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
import pandas as pd
|
|
166
|
+
|
|
167
|
+
from tda_finance.portfolio.backtest_engine import backtest_tda, perf_summary
|
|
168
|
+
from tda_finance.tda.mapper_clustering import MapperParams
|
|
169
|
+
|
|
170
|
+
prices = pd.read_csv(
|
|
171
|
+
"data/prices.csv",
|
|
172
|
+
index_col=0,
|
|
173
|
+
parse_dates=True,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
params = MapperParams(
|
|
177
|
+
pca_var=0.80,
|
|
178
|
+
umap_dim=1,
|
|
179
|
+
n_cubes=12,
|
|
180
|
+
perc_overlap=0.25,
|
|
181
|
+
clusterer="haca",
|
|
182
|
+
haca_distance_threshold=0.6,
|
|
183
|
+
haca_linkage="average",
|
|
184
|
+
random_state=1,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
result = backtest_tda(
|
|
188
|
+
prices=prices,
|
|
189
|
+
lookback_days=60,
|
|
190
|
+
rebalance_days=3,
|
|
191
|
+
params=params,
|
|
192
|
+
tc_bps=5.0,
|
|
193
|
+
use_ph_control=False,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
metrics = perf_summary(
|
|
197
|
+
result["port_ret"],
|
|
198
|
+
periods_per_year=12,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
print(metrics)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
This is only a minimal usage example. The full experimental protocol is implemented in the experiment script described below.
|
|
205
|
+
|
|
206
|
+
## Running the final experiments
|
|
207
|
+
|
|
208
|
+
The final experiments can be run from the project root with:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
python -m tda_finance.experiments.run_mapper_ph_experiments
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
The script compares:
|
|
215
|
+
|
|
216
|
+
1. Mapper;
|
|
217
|
+
2. Mapper with persistent-homology regime control;
|
|
218
|
+
3. equal-weight benchmark.
|
|
219
|
+
|
|
220
|
+
The selected dataset is configured inside the script.
|
|
221
|
+
|
|
222
|
+
## Data preparation
|
|
223
|
+
|
|
224
|
+
Auxiliary scripts used to prepare the S&P 500 CRSP data are located in:
|
|
225
|
+
|
|
226
|
+
```text
|
|
227
|
+
scripts/data_preparation/
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Example:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
python scripts/data_preparation/prepare_prices_sp500.py
|
|
234
|
+
python scripts/data_preparation/make_monthly_sp500.py
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
These scripts are included to make the data preparation process more transparent, but they are not part of the main package API.
|
|
238
|
+
|
|
239
|
+
## Results
|
|
240
|
+
|
|
241
|
+
The complete experimental results are discussed in the accompanying TFG report.
|
|
242
|
+
|
|
243
|
+
The repository also stores generated CSV outputs in:
|
|
244
|
+
|
|
245
|
+
```text
|
|
246
|
+
results_49_Industry_Portfolios/
|
|
247
|
+
results_SP500_CRSP/
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
These files include summary metrics, diagnostic outputs and NAV curves used in the experimental analysis.
|
|
251
|
+
|
|
252
|
+
## Reproducibility
|
|
253
|
+
|
|
254
|
+
The experiments use fixed random seeds where stochastic methods are involved, especially in dimensionality reduction.
|
|
255
|
+
|
|
256
|
+
For exact reproducibility of the Python environment used to run the experiments, a lock file can be generated with:
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
pip freeze > requirements-lock.txt
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
This file records the exact package versions installed in the environment. It is mainly useful for reproducing the results of the TFG, not for publishing the package to PyPI.
|
|
263
|
+
|
|
264
|
+
## Development and packaging checks
|
|
265
|
+
|
|
266
|
+
The following commands are useful during development. They require the optional development dependencies:
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
pip install -e .[dev]
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
Run style checks:
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
python -m flake8 src scripts
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
Run a basic import check:
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
python -c "from tda_finance.tda.mapper_clustering import MapperParams; print(MapperParams())"
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
Build the package locally:
|
|
285
|
+
|
|
286
|
+
```bash
|
|
287
|
+
python -m build
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Check the distribution before uploading to PyPI:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
python -m twine check dist/*
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## License
|
|
297
|
+
|
|
298
|
+
This project is released under the MIT License. See the `LICENSE` file for details.
|
|
299
|
+
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# TDA Finance Mapper
|
|
2
|
+
|
|
3
|
+
`tda-finance-mapper` is a Python package developed as part of an academic project on the application of Topological Data Analysis (TDA) to financial data.
|
|
4
|
+
|
|
5
|
+
The package provides tools to build Mapper-based portfolio strategies, compute persistent-homology regime signals and evaluate the resulting portfolios through causal backtesting.
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
The project studies whether topological information extracted from financial return windows can be used for:
|
|
10
|
+
|
|
11
|
+
1. market-structure analysis;
|
|
12
|
+
2. regime detection;
|
|
13
|
+
3. portfolio construction;
|
|
14
|
+
4. comparison against a simple equal-weight benchmark.
|
|
15
|
+
|
|
16
|
+
The main implemented models are:
|
|
17
|
+
|
|
18
|
+
- **Mapper portfolio**: assets are represented by recent return vectors, a Mapper graph is built, and the graph structure is transformed into portfolio weights.
|
|
19
|
+
- **Mapper + persistent homology**: Mapper remains the main portfolio construction method, while persistent homology acts as a regime-control signal.
|
|
20
|
+
- **Equal-weight benchmark**: all available assets receive the same weight and are evaluated with the same backtesting protocol.
|
|
21
|
+
|
|
22
|
+
## Disclaimer
|
|
23
|
+
|
|
24
|
+
This project is for academic and research purposes only. It is not financial advice, investment advice or a production trading system. The results are intended to illustrate and evaluate a methodological pipeline, not to recommend real investment decisions.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
Clone the repository:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
git clone https://github.com/usuario/tda-finance-mapper.git
|
|
32
|
+
cd tda-finance-mapper
|
|
33
|
+
````
|
|
34
|
+
|
|
35
|
+
Create and activate a virtual environment:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
python -m venv .venv
|
|
39
|
+
source .venv/bin/activate
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
On Windows:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
.venv\Scripts\activate
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Install the package in editable mode:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install -r requirements.txt
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The `requirements.txt` file installs the package with:
|
|
55
|
+
|
|
56
|
+
```txt
|
|
57
|
+
-e .
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
This means that changes made in the source code are immediately available without reinstalling the package.
|
|
61
|
+
|
|
62
|
+
For development tools such as `flake8`, `pytest`, `sphinx`, `build` and `twine`, install the optional development dependencies:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install -e .[dev]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Project structure
|
|
69
|
+
|
|
70
|
+
```text
|
|
71
|
+
tda-finance-mapper/
|
|
72
|
+
├── data/
|
|
73
|
+
├── docs/
|
|
74
|
+
├── results_49_Industry_Portfolios/
|
|
75
|
+
├── results_SP500_CRSP/
|
|
76
|
+
├── scripts/
|
|
77
|
+
├── src/
|
|
78
|
+
│ └── tda_finance/
|
|
79
|
+
│ ├── data_preprocessing/
|
|
80
|
+
│ ├── experiments/
|
|
81
|
+
│ ├── portfolio/
|
|
82
|
+
│ └── tda/
|
|
83
|
+
├── pyproject.toml
|
|
84
|
+
├── requirements.txt
|
|
85
|
+
├── LICENSE
|
|
86
|
+
└── README.md
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
The main package is located under `src/tda_finance`.
|
|
90
|
+
|
|
91
|
+
The `scripts/` folder contains auxiliary scripts used during data preparation. These scripts are not part of the main package API.
|
|
92
|
+
|
|
93
|
+
## Main modules
|
|
94
|
+
|
|
95
|
+
### `tda_finance.tda.mapper_clustering`
|
|
96
|
+
|
|
97
|
+
Builds Mapper graphs from financial price windows and converts Mapper clusters into portfolio weights.
|
|
98
|
+
|
|
99
|
+
### `tda_finance.tda.persistence_diagrams`
|
|
100
|
+
|
|
101
|
+
Computes correlation-based distance matrices and persistent-homology diagrams.
|
|
102
|
+
|
|
103
|
+
### `tda_finance.tda.persistence_features`
|
|
104
|
+
|
|
105
|
+
Extracts summary features from persistence diagrams.
|
|
106
|
+
|
|
107
|
+
### `tda_finance.tda.regime_detection`
|
|
108
|
+
|
|
109
|
+
Computes persistence-landscape norms and detects topological anomalies.
|
|
110
|
+
|
|
111
|
+
### `tda_finance.portfolio.backtest_engine`
|
|
112
|
+
|
|
113
|
+
Runs causal long-only backtests, computes portfolio returns, turnover and performance metrics.
|
|
114
|
+
|
|
115
|
+
### `tda_finance.data_preprocessing`
|
|
116
|
+
|
|
117
|
+
Contains utilities to preprocess Kenneth French 49 Industry Portfolios and S&P 500 CRSP monthly data.
|
|
118
|
+
|
|
119
|
+
## Minimal API example
|
|
120
|
+
|
|
121
|
+
The following example shows the basic use of the package API with a generic price matrix.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
import pandas as pd
|
|
125
|
+
|
|
126
|
+
from tda_finance.portfolio.backtest_engine import backtest_tda, perf_summary
|
|
127
|
+
from tda_finance.tda.mapper_clustering import MapperParams
|
|
128
|
+
|
|
129
|
+
prices = pd.read_csv(
|
|
130
|
+
"data/prices.csv",
|
|
131
|
+
index_col=0,
|
|
132
|
+
parse_dates=True,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
params = MapperParams(
|
|
136
|
+
pca_var=0.80,
|
|
137
|
+
umap_dim=1,
|
|
138
|
+
n_cubes=12,
|
|
139
|
+
perc_overlap=0.25,
|
|
140
|
+
clusterer="haca",
|
|
141
|
+
haca_distance_threshold=0.6,
|
|
142
|
+
haca_linkage="average",
|
|
143
|
+
random_state=1,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
result = backtest_tda(
|
|
147
|
+
prices=prices,
|
|
148
|
+
lookback_days=60,
|
|
149
|
+
rebalance_days=3,
|
|
150
|
+
params=params,
|
|
151
|
+
tc_bps=5.0,
|
|
152
|
+
use_ph_control=False,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
metrics = perf_summary(
|
|
156
|
+
result["port_ret"],
|
|
157
|
+
periods_per_year=12,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
print(metrics)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
This is only a minimal usage example. The full experimental protocol is implemented in the experiment script described below.
|
|
164
|
+
|
|
165
|
+
## Running the final experiments
|
|
166
|
+
|
|
167
|
+
The final experiments can be run from the project root with:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
python -m tda_finance.experiments.run_mapper_ph_experiments
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
The script compares:
|
|
174
|
+
|
|
175
|
+
1. Mapper;
|
|
176
|
+
2. Mapper with persistent-homology regime control;
|
|
177
|
+
3. equal-weight benchmark.
|
|
178
|
+
|
|
179
|
+
The selected dataset is configured inside the script.
|
|
180
|
+
|
|
181
|
+
## Data preparation
|
|
182
|
+
|
|
183
|
+
Auxiliary scripts used to prepare the S&P 500 CRSP data are located in:
|
|
184
|
+
|
|
185
|
+
```text
|
|
186
|
+
scripts/data_preparation/
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Example:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
python scripts/data_preparation/prepare_prices_sp500.py
|
|
193
|
+
python scripts/data_preparation/make_monthly_sp500.py
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
These scripts are included to make the data preparation process more transparent, but they are not part of the main package API.
|
|
197
|
+
|
|
198
|
+
## Results
|
|
199
|
+
|
|
200
|
+
The complete experimental results are discussed in the accompanying TFG report.
|
|
201
|
+
|
|
202
|
+
The repository also stores generated CSV outputs in:
|
|
203
|
+
|
|
204
|
+
```text
|
|
205
|
+
results_49_Industry_Portfolios/
|
|
206
|
+
results_SP500_CRSP/
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
These files include summary metrics, diagnostic outputs and NAV curves used in the experimental analysis.
|
|
210
|
+
|
|
211
|
+
## Reproducibility
|
|
212
|
+
|
|
213
|
+
The experiments use fixed random seeds where stochastic methods are involved, especially in dimensionality reduction.
|
|
214
|
+
|
|
215
|
+
For exact reproducibility of the Python environment used to run the experiments, a lock file can be generated with:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
pip freeze > requirements-lock.txt
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
This file records the exact package versions installed in the environment. It is mainly useful for reproducing the results of the TFG, not for publishing the package to PyPI.
|
|
222
|
+
|
|
223
|
+
## Development and packaging checks
|
|
224
|
+
|
|
225
|
+
The following commands are useful during development. They require the optional development dependencies:
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
pip install -e .[dev]
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Run style checks:
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
python -m flake8 src scripts
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Run a basic import check:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
python -c "from tda_finance.tda.mapper_clustering import MapperParams; print(MapperParams())"
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Build the package locally:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
python -m build
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Check the distribution before uploading to PyPI:
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
python -m twine check dist/*
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## License
|
|
256
|
+
|
|
257
|
+
This project is released under the MIT License. See the `LICENSE` file for details.
|
|
258
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77.0.3", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tda-finance-mapper"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Aplicación de análisis topológico de datos a carteras financieras."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Jaime Corzo Galdó", email = "jcggranada04@gmail.com" }
|
|
13
|
+
]
|
|
14
|
+
license = "MIT"
|
|
15
|
+
license-files = ["LICENSE"]
|
|
16
|
+
keywords = [
|
|
17
|
+
"topological-data-analysis",
|
|
18
|
+
"finance",
|
|
19
|
+
"mapper",
|
|
20
|
+
"persistent-homology",
|
|
21
|
+
"portfolio-construction",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Programming Language :: Python :: 3",
|
|
25
|
+
"Operating System :: OS Independent",
|
|
26
|
+
"Intended Audience :: Science/Research",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
dependencies = [
|
|
31
|
+
"numpy",
|
|
32
|
+
"pandas",
|
|
33
|
+
"scikit-learn>=1.2",
|
|
34
|
+
"umap-learn",
|
|
35
|
+
"kmapper",
|
|
36
|
+
"networkx",
|
|
37
|
+
"ripser",
|
|
38
|
+
"persim",
|
|
39
|
+
"gudhi",
|
|
40
|
+
"matplotlib",
|
|
41
|
+
"pyarrow",
|
|
42
|
+
"pandas-datareader",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
dev = [
|
|
47
|
+
"flake8",
|
|
48
|
+
"flake8-pyproject",
|
|
49
|
+
"pyright",
|
|
50
|
+
"pytest",
|
|
51
|
+
"build",
|
|
52
|
+
"twine",
|
|
53
|
+
"sphinx",
|
|
54
|
+
"sphinx-rtd-theme",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[project.urls]
|
|
58
|
+
Homepage = "https://github.com/jaimecrz3/tda-finance-mapper"
|
|
59
|
+
Repository = "https://github.com/jaimecrz3/tda-finance-mapper"
|
|
60
|
+
Documentation = "https://github.com/jaimecrz3/tda-finance-mapper"
|
|
61
|
+
Issues = "https://github.com/jaimecrz3/tda-finance-mapper/issues"
|
|
62
|
+
|
|
63
|
+
[tool.setuptools.packages.find]
|
|
64
|
+
where = ["src"]
|
|
65
|
+
|
|
66
|
+
[tool.pyright]
|
|
67
|
+
include = ["src"]
|
|
68
|
+
typeCheckingMode = "basic"
|
|
69
|
+
pythonVersion = "3.12"
|
|
70
|
+
|
|
71
|
+
[tool.flake8]
|
|
72
|
+
max-line-length = 119
|
|
73
|
+
extend-ignore = ["E203", "W503"]
|
|
74
|
+
exclude = [
|
|
75
|
+
".git",
|
|
76
|
+
"__pycache__",
|
|
77
|
+
"build",
|
|
78
|
+
"dist",
|
|
79
|
+
".venv",
|
|
80
|
+
"venv",
|
|
81
|
+
"env",
|
|
82
|
+
"docs/_build",
|
|
83
|
+
"*.egg-info"
|
|
84
|
+
]
|
|
85
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Data preprocessing utilities for financial datasets."""
|