tra-algorithm 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tra_algorithm-1.0.0/LICENSE +21 -0
- tra_algorithm-1.0.0/MANIFEST.in +18 -0
- tra_algorithm-1.0.0/PKG-INFO +295 -0
- tra_algorithm-1.0.0/README.md +253 -0
- tra_algorithm-1.0.0/docs/CHANGELOG.md +98 -0
- tra_algorithm-1.0.0/docs/README.md +0 -0
- tra_algorithm-1.0.0/docs/examples/basic_usage.py +213 -0
- tra_algorithm-1.0.0/pyproject.toml +66 -0
- tra_algorithm-1.0.0/requirements.txt +18 -0
- tra_algorithm-1.0.0/setup.cfg +109 -0
- tra_algorithm-1.0.0/setup.py +66 -0
- tra_algorithm-1.0.0/tests/__init__.py +0 -0
- tra_algorithm-1.0.0/tests/test_core.py +145 -0
- tra_algorithm-1.0.0/tests/test_utils.py +80 -0
- tra_algorithm-1.0.0/tra_algorithm/__init__.py +98 -0
- tra_algorithm-1.0.0/tra_algorithm/core.py +1067 -0
- tra_algorithm-1.0.0/tra_algorithm/examples.py +882 -0
- tra_algorithm-1.0.0/tra_algorithm/utils.py +439 -0
- tra_algorithm-1.0.0/tra_algorithm/version.py +10 -0
- tra_algorithm-1.0.0/tra_algorithm.egg-info/PKG-INFO +295 -0
- tra_algorithm-1.0.0/tra_algorithm.egg-info/SOURCES.txt +24 -0
- tra_algorithm-1.0.0/tra_algorithm.egg-info/dependency_links.txt +1 -0
- tra_algorithm-1.0.0/tra_algorithm.egg-info/not-zip-safe +1 -0
- tra_algorithm-1.0.0/tra_algorithm.egg-info/requires.txt +14 -0
- tra_algorithm-1.0.0/tra_algorithm.egg-info/top_level.txt +2 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Dasari Ranga Eswar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include CHANGELOG.md
|
|
4
|
+
include requirements.txt
|
|
5
|
+
include setup.cfg
|
|
6
|
+
include pyproject.toml
|
|
7
|
+
recursive-include tra_algorithm *.py
|
|
8
|
+
recursive-include docs *.md *.rst *.py *.txt
|
|
9
|
+
recursive-include tests *.py
|
|
10
|
+
recursive-exclude * __pycache__
|
|
11
|
+
recursive-exclude * *.py[co]
|
|
12
|
+
recursive-exclude * *.so
|
|
13
|
+
recursive-exclude * .DS_Store
|
|
14
|
+
global-exclude *.pyc
|
|
15
|
+
global-exclude *.pyo
|
|
16
|
+
global-exclude *~
|
|
17
|
+
global-exclude *.orig
|
|
18
|
+
global-exclude *.rej
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: tra-algorithm
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Track/Rail Algorithm (TRA) - A novel machine learning algorithm for dynamic model selection
|
|
5
|
+
Home-page: https://github.com/eswaroy/tra_algorithm
|
|
6
|
+
Author: Dasari Ranga Eswar
|
|
7
|
+
Author-email: TRA Algorithm Team <contact@tra-algorithm.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Homepage, https://github.com/eswaroy/tra_algorithm
|
|
10
|
+
Project-URL: Bug Reports, https://github.com/eswaroy/tra_algorithm/issues
|
|
11
|
+
Project-URL: Source, https://github.com/eswaroy/tra_algorithm
|
|
12
|
+
Project-URL: Documentation, https://tra_algorithm.readthedocs.io/
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
|
+
Requires-Python: >=3.7
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: numpy>=1.19.0
|
|
30
|
+
Requires-Dist: pandas>=1.1.0
|
|
31
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
32
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
33
|
+
Requires-Dist: joblib>=1.0.0
|
|
34
|
+
Requires-Dist: networkx>=2.5
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-cov>=2.10; extra == "dev"
|
|
38
|
+
Requires-Dist: black>=21.0; extra == "dev"
|
|
39
|
+
Requires-Dist: flake8>=3.8; extra == "dev"
|
|
40
|
+
Requires-Dist: sphinx>=4.0; extra == "dev"
|
|
41
|
+
Requires-Dist: sphinx-rtd-theme>=0.5; extra == "dev"
|
|
42
|
+
|
|
43
|
+
# TRA Algorithm - Track/Rail Algorithm
|
|
44
|
+
|
|
45
|
+
[](https://badge.fury.io/py/tra-algorithm)
|
|
46
|
+
[](https://pypi.org/project/tra-algorithm/)
|
|
47
|
+
[](https://opensource.org/licenses/MIT)
|
|
48
|
+
[](https://github.com/yourusername/tra-algorithm/actions)
|
|
49
|
+
[](https://codecov.io/gh/yourusername/tra-algorithm)
|
|
50
|
+
|
|
51
|
+
## Overview
|
|
52
|
+
|
|
53
|
+
The **Track/Rail Algorithm (TRA)** is a novel ensemble machine learning method that dynamically routes data through specialized "tracks" based on signal conditions. Unlike traditional ensemble methods that combine predictions, TRA creates multiple specialized models (tracks) and intelligently switches between them during prediction based on real-time signal evaluation.
|
|
54
|
+
|
|
55
|
+
## Key Features
|
|
56
|
+
|
|
57
|
+
- 🚄 **Dynamic Track Switching**: Intelligent routing of data through specialized models
|
|
58
|
+
- ⚡ **Parallel Processing**: Optimized signal evaluation with concurrent processing
|
|
59
|
+
- 🎯 **Adaptive Learning**: Self-optimizing parameters based on performance feedback
|
|
60
|
+
- 🔧 **Memory Optimization**: Automatic pruning of underused tracks
|
|
61
|
+
- 📊 **Rich Visualization**: Comprehensive model structure and performance visualization
|
|
62
|
+
- 🧪 **Dual Task Support**: Both classification and regression tasks
|
|
63
|
+
- 📈 **Performance Monitoring**: Detailed statistics and reporting
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
|
|
67
|
+
Install TRA Algorithm using pip:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install tra-algorithm
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
For development installation:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
git clone https://github.com/yourusername/tra-algorithm.git
|
|
77
|
+
cd tra-algorithm
|
|
78
|
+
pip install -e ".[dev]"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Quick Start
|
|
82
|
+
|
|
83
|
+
### Classification Example
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from tra_algorithm import OptimizedTRA
|
|
87
|
+
from sklearn.datasets import make_classification
|
|
88
|
+
from sklearn.model_selection import train_test_split
|
|
89
|
+
|
|
90
|
+
# Create sample data
|
|
91
|
+
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, random_state=42)
|
|
92
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
93
|
+
|
|
94
|
+
# Initialize and train TRA
|
|
95
|
+
tra = OptimizedTRA(
|
|
96
|
+
task_type="classification",
|
|
97
|
+
n_tracks=5,
|
|
98
|
+
random_state=42,
|
|
99
|
+
parallel_signals=True,
|
|
100
|
+
enable_track_pruning=True
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
tra.fit(X_train, y_train)
|
|
104
|
+
|
|
105
|
+
# Make predictions
|
|
106
|
+
y_pred = tra.predict(X_test)
|
|
107
|
+
y_proba = tra.predict_proba(X_test)
|
|
108
|
+
|
|
109
|
+
# Evaluate performance
|
|
110
|
+
accuracy = tra.score(X_test, y_test)
|
|
111
|
+
print(f"Accuracy: {accuracy:.4f}")
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Regression Example
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from tra_algorithm import OptimizedTRA
|
|
118
|
+
from sklearn.datasets import make_regression
|
|
119
|
+
|
|
120
|
+
# Create sample data
|
|
121
|
+
X, y = make_regression(n_samples=1000, n_features=15, random_state=42)
|
|
122
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
123
|
+
|
|
124
|
+
# Initialize and train TRA for regression
|
|
125
|
+
tra = OptimizedTRA(
|
|
126
|
+
task_type="regression",
|
|
127
|
+
n_tracks=4,
|
|
128
|
+
signal_threshold=0.15,
|
|
129
|
+
feature_selection=True
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
tra.fit(X_train, y_train)
|
|
133
|
+
y_pred = tra.predict(X_test)
|
|
134
|
+
|
|
135
|
+
# Get performance metrics
|
|
136
|
+
mse_score = -tra.score(X_test, y_test) # Negative MSE
|
|
137
|
+
print(f"MSE: {mse_score:.4f}")
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Advanced Features
|
|
141
|
+
|
|
142
|
+
### Model Visualization
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
# Visualize the TRA structure
|
|
146
|
+
tra.visualize("tra_structure.png", figsize=(12, 8))
|
|
147
|
+
|
|
148
|
+
# Get detailed performance report
|
|
149
|
+
print(tra.get_performance_report())
|
|
150
|
+
|
|
151
|
+
# Get track statistics
|
|
152
|
+
stats = tra.get_track_statistics()
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Parameter Optimization
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
# Optimize parameters using validation data
|
|
159
|
+
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
|
|
160
|
+
tra.fit(X_train, y_train)
|
|
161
|
+
optimization_results = tra.optimize_parameters(X_val, y_val)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Model Persistence
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
# Save and load models
|
|
168
|
+
tra.save_model("my_tra_model.joblib")
|
|
169
|
+
loaded_tra = OptimizedTRA.load_model("my_tra_model.joblib")
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Algorithm Details
|
|
173
|
+
|
|
174
|
+
### How TRA Works
|
|
175
|
+
|
|
176
|
+
1. **Track Creation**: Multiple specialized models (tracks) are trained on different bootstrap samples
|
|
177
|
+
2. **Signal Generation**: Signals are created between tracks to detect when switching is beneficial
|
|
178
|
+
3. **Dynamic Routing**: During prediction, data is routed through tracks based on signal evaluation
|
|
179
|
+
4. **Performance Optimization**: Tracks and signals are continuously monitored and optimized
|
|
180
|
+
|
|
181
|
+
### Key Components
|
|
182
|
+
|
|
183
|
+
- **Tracks**: Specialized models trained on different data subsets
|
|
184
|
+
- **Signals**: Conditions that trigger switching between tracks
|
|
185
|
+
- **Records**: Individual data points with routing history
|
|
186
|
+
- **Enhanced Signal Conditions**: Advanced switching logic with regression optimization
|
|
187
|
+
|
|
188
|
+
## Parameters
|
|
189
|
+
|
|
190
|
+
### Main Parameters
|
|
191
|
+
|
|
192
|
+
- `task_type`: "classification" or "regression"
|
|
193
|
+
- `n_tracks`: Number of specialized tracks to create (default: 3)
|
|
194
|
+
- `signal_threshold`: Threshold for track switching (default: 0.1)
|
|
195
|
+
- `parallel_signals`: Enable parallel signal evaluation (default: True)
|
|
196
|
+
- `enable_track_pruning`: Enable automatic track pruning (default: True)
|
|
197
|
+
- `feature_selection`: Enable automatic feature selection (default: True)
|
|
198
|
+
- `handle_imbalanced`: Handle class imbalance (classification only, default: True)
|
|
199
|
+
|
|
200
|
+
### Performance Parameters
|
|
201
|
+
|
|
202
|
+
- `n_estimators`: Number of estimators per track (default: 50)
|
|
203
|
+
- `max_depth`: Maximum depth of track estimators (default: 6)
|
|
204
|
+
- `max_workers`: Maximum parallel workers (default: 4)
|
|
205
|
+
- `pruning_interval`: Interval for track pruning (default: 100)
|
|
206
|
+
|
|
207
|
+
## Performance Comparison
|
|
208
|
+
|
|
209
|
+
TRA has been tested against standard ensemble methods and shows competitive performance with additional benefits:
|
|
210
|
+
|
|
211
|
+
- **Adaptability**: Dynamically adjusts to data patterns
|
|
212
|
+
- **Interpretability**: Clear visualization of decision paths
|
|
213
|
+
- **Efficiency**: Optimized memory usage through track pruning
|
|
214
|
+
- **Robustness**: Handles both classification and regression effectively
|
|
215
|
+
|
|
216
|
+
## Requirements
|
|
217
|
+
|
|
218
|
+
- Python >= 3.8
|
|
219
|
+
- numpy >= 1.21.0
|
|
220
|
+
- pandas >= 1.3.0
|
|
221
|
+
- scikit-learn >= 1.0.0
|
|
222
|
+
- matplotlib >= 3.3.0
|
|
223
|
+
- joblib >= 1.0.0
|
|
224
|
+
- networkx >= 2.6.0 (for visualization)
|
|
225
|
+
|
|
226
|
+
## Contributing
|
|
227
|
+
|
|
228
|
+
We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details.
|
|
229
|
+
|
|
230
|
+
1. Fork the repository
|
|
231
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
232
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
233
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
234
|
+
5. Open a Pull Request
|
|
235
|
+
|
|
236
|
+
## Testing
|
|
237
|
+
|
|
238
|
+
Run tests using pytest:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
# Run all tests
|
|
242
|
+
pytest
|
|
243
|
+
|
|
244
|
+
# Run with coverage
|
|
245
|
+
pytest --cov=tra_algorithm --cov-report=html
|
|
246
|
+
|
|
247
|
+
# Run specific test file
|
|
248
|
+
pytest tests/test_core.py
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Documentation
|
|
252
|
+
|
|
253
|
+
Detailed documentation is available in the `docs/` directory. Build documentation locally:
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
cd docs
|
|
257
|
+
make html
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Changelog
|
|
261
|
+
|
|
262
|
+
See [CHANGELOG.md](CHANGELOG.md) for a history of changes.
|
|
263
|
+
|
|
264
|
+
## License
|
|
265
|
+
|
|
266
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
267
|
+
|
|
268
|
+
## Citation
|
|
269
|
+
|
|
270
|
+
If you use TRA Algorithm in your research, please cite:
|
|
271
|
+
|
|
272
|
+
```bibtex
|
|
273
|
+
@software{tra_algorithm,
|
|
274
|
+
title={TRA Algorithm: Track/Rail Algorithm for Dynamic Ensemble Learning},
|
|
275
|
+
author={TRA Algorithm Team},
|
|
276
|
+
year={2024},
|
|
277
|
+
url={https://github.com/yourusername/tra-algorithm}
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## Support
|
|
282
|
+
|
|
283
|
+
- 📧 Email: contact@tra-algorithm.com
|
|
284
|
+
- 🐛 Issues: [GitHub Issues](https://github.com/yourusername/tra-algorithm/issues)
|
|
285
|
+
- 💬 Discussions: [GitHub Discussions](https://github.com/yourusername/tra-algorithm/discussions)
|
|
286
|
+
|
|
287
|
+
## Acknowledgments
|
|
288
|
+
|
|
289
|
+
- Built on top of scikit-learn
|
|
290
|
+
- Inspired by ensemble learning research
|
|
291
|
+
- Thanks to all contributors and users
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
**Made with ❤️ by the TRA Algorithm Team**
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# TRA Algorithm - Track/Rail Algorithm
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/tra-algorithm)
|
|
4
|
+
[](https://pypi.org/project/tra-algorithm/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://github.com/yourusername/tra-algorithm/actions)
|
|
7
|
+
[](https://codecov.io/gh/yourusername/tra-algorithm)
|
|
8
|
+
|
|
9
|
+
## Overview
|
|
10
|
+
|
|
11
|
+
The **Track/Rail Algorithm (TRA)** is a novel ensemble machine learning method that dynamically routes data through specialized "tracks" based on signal conditions. Unlike traditional ensemble methods that combine predictions, TRA creates multiple specialized models (tracks) and intelligently switches between them during prediction based on real-time signal evaluation.
|
|
12
|
+
|
|
13
|
+
## Key Features
|
|
14
|
+
|
|
15
|
+
- 🚄 **Dynamic Track Switching**: Intelligent routing of data through specialized models
|
|
16
|
+
- ⚡ **Parallel Processing**: Optimized signal evaluation with concurrent processing
|
|
17
|
+
- 🎯 **Adaptive Learning**: Self-optimizing parameters based on performance feedback
|
|
18
|
+
- 🔧 **Memory Optimization**: Automatic pruning of underused tracks
|
|
19
|
+
- 📊 **Rich Visualization**: Comprehensive model structure and performance visualization
|
|
20
|
+
- 🧪 **Dual Task Support**: Both classification and regression tasks
|
|
21
|
+
- 📈 **Performance Monitoring**: Detailed statistics and reporting
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
Install TRA Algorithm using pip:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install tra-algorithm
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
For development installation:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
git clone https://github.com/yourusername/tra-algorithm.git
|
|
35
|
+
cd tra-algorithm
|
|
36
|
+
pip install -e ".[dev]"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
### Classification Example
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from tra_algorithm import OptimizedTRA
|
|
45
|
+
from sklearn.datasets import make_classification
|
|
46
|
+
from sklearn.model_selection import train_test_split
|
|
47
|
+
|
|
48
|
+
# Create sample data
|
|
49
|
+
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, random_state=42)
|
|
50
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
51
|
+
|
|
52
|
+
# Initialize and train TRA
|
|
53
|
+
tra = OptimizedTRA(
|
|
54
|
+
task_type="classification",
|
|
55
|
+
n_tracks=5,
|
|
56
|
+
random_state=42,
|
|
57
|
+
parallel_signals=True,
|
|
58
|
+
enable_track_pruning=True
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
tra.fit(X_train, y_train)
|
|
62
|
+
|
|
63
|
+
# Make predictions
|
|
64
|
+
y_pred = tra.predict(X_test)
|
|
65
|
+
y_proba = tra.predict_proba(X_test)
|
|
66
|
+
|
|
67
|
+
# Evaluate performance
|
|
68
|
+
accuracy = tra.score(X_test, y_test)
|
|
69
|
+
print(f"Accuracy: {accuracy:.4f}")
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Regression Example
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from tra_algorithm import OptimizedTRA
|
|
76
|
+
from sklearn.datasets import make_regression
|
|
77
|
+
|
|
78
|
+
# Create sample data
|
|
79
|
+
X, y = make_regression(n_samples=1000, n_features=15, random_state=42)
|
|
80
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
81
|
+
|
|
82
|
+
# Initialize and train TRA for regression
|
|
83
|
+
tra = OptimizedTRA(
|
|
84
|
+
task_type="regression",
|
|
85
|
+
n_tracks=4,
|
|
86
|
+
signal_threshold=0.15,
|
|
87
|
+
feature_selection=True
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
tra.fit(X_train, y_train)
|
|
91
|
+
y_pred = tra.predict(X_test)
|
|
92
|
+
|
|
93
|
+
# Get performance metrics
|
|
94
|
+
mse_score = -tra.score(X_test, y_test) # Negative MSE
|
|
95
|
+
print(f"MSE: {mse_score:.4f}")
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Advanced Features
|
|
99
|
+
|
|
100
|
+
### Model Visualization
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
# Visualize the TRA structure
|
|
104
|
+
tra.visualize("tra_structure.png", figsize=(12, 8))
|
|
105
|
+
|
|
106
|
+
# Get detailed performance report
|
|
107
|
+
print(tra.get_performance_report())
|
|
108
|
+
|
|
109
|
+
# Get track statistics
|
|
110
|
+
stats = tra.get_track_statistics()
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Parameter Optimization
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
# Optimize parameters using validation data
|
|
117
|
+
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
|
|
118
|
+
tra.fit(X_train, y_train)
|
|
119
|
+
optimization_results = tra.optimize_parameters(X_val, y_val)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Model Persistence
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
# Save and load models
|
|
126
|
+
tra.save_model("my_tra_model.joblib")
|
|
127
|
+
loaded_tra = OptimizedTRA.load_model("my_tra_model.joblib")
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Algorithm Details
|
|
131
|
+
|
|
132
|
+
### How TRA Works
|
|
133
|
+
|
|
134
|
+
1. **Track Creation**: Multiple specialized models (tracks) are trained on different bootstrap samples
|
|
135
|
+
2. **Signal Generation**: Signals are created between tracks to detect when switching is beneficial
|
|
136
|
+
3. **Dynamic Routing**: During prediction, data is routed through tracks based on signal evaluation
|
|
137
|
+
4. **Performance Optimization**: Tracks and signals are continuously monitored and optimized
|
|
138
|
+
|
|
139
|
+
### Key Components
|
|
140
|
+
|
|
141
|
+
- **Tracks**: Specialized models trained on different data subsets
|
|
142
|
+
- **Signals**: Conditions that trigger switching between tracks
|
|
143
|
+
- **Records**: Individual data points with routing history
|
|
144
|
+
- **Enhanced Signal Conditions**: Advanced switching logic with regression optimization
|
|
145
|
+
|
|
146
|
+
## Parameters
|
|
147
|
+
|
|
148
|
+
### Main Parameters
|
|
149
|
+
|
|
150
|
+
- `task_type`: "classification" or "regression"
|
|
151
|
+
- `n_tracks`: Number of specialized tracks to create (default: 3)
|
|
152
|
+
- `signal_threshold`: Threshold for track switching (default: 0.1)
|
|
153
|
+
- `parallel_signals`: Enable parallel signal evaluation (default: True)
|
|
154
|
+
- `enable_track_pruning`: Enable automatic track pruning (default: True)
|
|
155
|
+
- `feature_selection`: Enable automatic feature selection (default: True)
|
|
156
|
+
- `handle_imbalanced`: Handle class imbalance (classification only, default: True)
|
|
157
|
+
|
|
158
|
+
### Performance Parameters
|
|
159
|
+
|
|
160
|
+
- `n_estimators`: Number of estimators per track (default: 50)
|
|
161
|
+
- `max_depth`: Maximum depth of track estimators (default: 6)
|
|
162
|
+
- `max_workers`: Maximum parallel workers (default: 4)
|
|
163
|
+
- `pruning_interval`: Interval for track pruning (default: 100)
|
|
164
|
+
|
|
165
|
+
## Performance Comparison
|
|
166
|
+
|
|
167
|
+
TRA has been tested against standard ensemble methods and shows competitive performance with additional benefits:
|
|
168
|
+
|
|
169
|
+
- **Adaptability**: Dynamically adjusts to data patterns
|
|
170
|
+
- **Interpretability**: Clear visualization of decision paths
|
|
171
|
+
- **Efficiency**: Optimized memory usage through track pruning
|
|
172
|
+
- **Robustness**: Handles both classification and regression effectively
|
|
173
|
+
|
|
174
|
+
## Requirements
|
|
175
|
+
|
|
176
|
+
- Python >= 3.8
|
|
177
|
+
- numpy >= 1.21.0
|
|
178
|
+
- pandas >= 1.3.0
|
|
179
|
+
- scikit-learn >= 1.0.0
|
|
180
|
+
- matplotlib >= 3.3.0
|
|
181
|
+
- joblib >= 1.0.0
|
|
182
|
+
- networkx >= 2.6.0 (for visualization)
|
|
183
|
+
|
|
184
|
+
## Contributing
|
|
185
|
+
|
|
186
|
+
We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details.
|
|
187
|
+
|
|
188
|
+
1. Fork the repository
|
|
189
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
190
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
191
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
192
|
+
5. Open a Pull Request
|
|
193
|
+
|
|
194
|
+
## Testing
|
|
195
|
+
|
|
196
|
+
Run tests using pytest:
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
# Run all tests
|
|
200
|
+
pytest
|
|
201
|
+
|
|
202
|
+
# Run with coverage
|
|
203
|
+
pytest --cov=tra_algorithm --cov-report=html
|
|
204
|
+
|
|
205
|
+
# Run specific test file
|
|
206
|
+
pytest tests/test_core.py
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## Documentation
|
|
210
|
+
|
|
211
|
+
Detailed documentation is available in the `docs/` directory. Build documentation locally:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
cd docs
|
|
215
|
+
make html
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Changelog
|
|
219
|
+
|
|
220
|
+
See [CHANGELOG.md](CHANGELOG.md) for a history of changes.
|
|
221
|
+
|
|
222
|
+
## License
|
|
223
|
+
|
|
224
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
225
|
+
|
|
226
|
+
## Citation
|
|
227
|
+
|
|
228
|
+
If you use TRA Algorithm in your research, please cite:
|
|
229
|
+
|
|
230
|
+
```bibtex
|
|
231
|
+
@software{tra_algorithm,
|
|
232
|
+
title={TRA Algorithm: Track/Rail Algorithm for Dynamic Ensemble Learning},
|
|
233
|
+
author={TRA Algorithm Team},
|
|
234
|
+
year={2024},
|
|
235
|
+
url={https://github.com/yourusername/tra-algorithm}
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Support
|
|
240
|
+
|
|
241
|
+
- 📧 Email: contact@tra-algorithm.com
|
|
242
|
+
- 🐛 Issues: [GitHub Issues](https://github.com/yourusername/tra-algorithm/issues)
|
|
243
|
+
- 💬 Discussions: [GitHub Discussions](https://github.com/yourusername/tra-algorithm/discussions)
|
|
244
|
+
|
|
245
|
+
## Acknowledgments
|
|
246
|
+
|
|
247
|
+
- Built on top of scikit-learn
|
|
248
|
+
- Inspired by ensemble learning research
|
|
249
|
+
- Thanks to all contributors and users
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
**Made with ❤️ by the TRA Algorithm Team**
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
```markdown
|
|
2
|
+
# Changelog
|
|
3
|
+
|
|
4
|
+
All notable changes to the TRA Algorithm package will be documented in this file.
|
|
5
|
+
|
|
6
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
7
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
8
|
+
|
|
9
|
+
## [1.0.0] - 2024-12-XX
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- Initial release of the Track/Rail Algorithm (TRA)
|
|
13
|
+
- OptimizedTRA class with support for classification and regression
|
|
14
|
+
- Multi-track architecture with intelligent signal system
|
|
15
|
+
- Parallel processing capabilities for improved performance
|
|
16
|
+
- Automatic track pruning for memory optimization
|
|
17
|
+
- Parameter optimization functionality
|
|
18
|
+
- Comprehensive performance tracking and analytics
|
|
19
|
+
- Visualization capabilities with NetworkX integration
|
|
20
|
+
- Model persistence (save/load functionality)
|
|
21
|
+
- Extensive test suite with unit tests
|
|
22
|
+
- Documentation and examples
|
|
23
|
+
- Support for feature selection and class imbalance handling
|
|
24
|
+
- Integration with scikit-learn ecosystem
|
|
25
|
+
|
|
26
|
+
### Features
|
|
27
|
+
- **Multi-Track Learning**: Create multiple specialized models that focus on different aspects of the data
|
|
28
|
+
- **Intelligent Switching**: Automatic track switching based on prediction confidence
|
|
29
|
+
- **Performance Optimization**: Parallel signal evaluation and track pruning
|
|
30
|
+
- **Analytics**: Detailed performance reports and statistics
|
|
31
|
+
- **Visualization**: Network graphs showing track relationships and performance
|
|
32
|
+
- **Scikit-learn Compatible**: Follows scikit-learn API conventions
|
|
33
|
+
|
|
34
|
+
### Supported Algorithms
|
|
35
|
+
- Random Forest based tracks for both classification and regression
|
|
36
|
+
- Enhanced signal conditions with regression-specific optimizations
|
|
37
|
+
- StandardScaler for feature normalization
|
|
38
|
+
- SelectKBest for feature selection
|
|
39
|
+
- Class weight balancing for imbalanced datasets
|
|
40
|
+
|
|
41
|
+
### Requirements
|
|
42
|
+
- Python >= 3.7
|
|
43
|
+
- NumPy >= 1.19.0
|
|
44
|
+
- Pandas >= 1.2.0
|
|
45
|
+
- Scikit-learn >= 1.0.0
|
|
46
|
+
- Matplotlib >= 3.3.0
|
|
47
|
+
- Joblib >= 1.0.0
|
|
48
|
+
- NetworkX >= 2.5 (optional, for visualization)
|
|
49
|
+
|
|
50
|
+
### Documentation
|
|
51
|
+
- Comprehensive API documentation
|
|
52
|
+
- Quick start guide with examples
|
|
53
|
+
- Advanced usage patterns
|
|
54
|
+
- Performance optimization tips
|
|
55
|
+
- Troubleshooting guide
|
|
56
|
+
|
|
57
|
+
### Testing
|
|
58
|
+
- Unit tests for all core functionality
|
|
59
|
+
- Integration tests for model persistence
|
|
60
|
+
- Performance benchmarking tests
|
|
61
|
+
- Edge case handling tests
|
|
62
|
+
|
|
63
|
+
## [Unreleased]
|
|
64
|
+
|
|
65
|
+
### Planned Features
|
|
66
|
+
- Support for additional base estimators (XGBoost, LightGBM)
|
|
67
|
+
- Advanced signal conditions (time-based, performance-based)
|
|
68
|
+
- Online learning capabilities
|
|
69
|
+
- GPU acceleration support
|
|
70
|
+
- Advanced visualization options
|
|
71
|
+
- Model interpretability features
|
|
72
|
+
- Hyperparameter optimization integration
|
|
73
|
+
- Streaming data support
|
|
74
|
+
|
|
75
|
+
### Known Issues
|
|
76
|
+
- Visualization requires NetworkX installation
|
|
77
|
+
- Large models may consume significant memory
|
|
78
|
+
- Parallel processing performance varies by system
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Version History Summary
|
|
83
|
+
|
|
84
|
+
- **1.0.0**: Initial release with core TRA functionality
|
|
85
|
+
- **Future releases**: Will include advanced features and optimizations based on user feedback
|
|
86
|
+
|
|
87
|
+
## Contributing
|
|
88
|
+
|
|
89
|
+
We welcome contributions! Please see our contributing guidelines for details on how to submit improvements, bug fixes, and new features.
|
|
90
|
+
|
|
91
|
+
## Support
|
|
92
|
+
|
|
93
|
+
For support, please:
|
|
94
|
+
1. Check the documentation and examples
|
|
95
|
+
2. Review known issues in this changelog
|
|
96
|
+
3. Submit issues on the project repository
|
|
97
|
+
4. Join our community discussions
|
|
98
|
+
```
|
|
File without changes
|