tra-algorithm 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +0 -0
- tests/test_core.py +145 -0
- tests/test_utils.py +80 -0
- tra_algorithm/__init__.py +98 -0
- tra_algorithm/core.py +1067 -0
- tra_algorithm/examples.py +882 -0
- tra_algorithm/utils.py +439 -0
- tra_algorithm/version.py +10 -0
- tra_algorithm-1.0.0.dist-info/LICENSE +21 -0
- tra_algorithm-1.0.0.dist-info/METADATA +295 -0
- tra_algorithm-1.0.0.dist-info/RECORD +13 -0
- tra_algorithm-1.0.0.dist-info/WHEEL +5 -0
- tra_algorithm-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,882 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TRA Algorithm Examples
|
|
3
|
+
=====================
|
|
4
|
+
|
|
5
|
+
This module contains comprehensive examples demonstrating how to use the
|
|
6
|
+
Track/Rail Algorithm (TRA) for various machine learning tasks.
|
|
7
|
+
|
|
8
|
+
Examples include:
|
|
9
|
+
- Basic classification and regression
|
|
10
|
+
- Advanced configuration options
|
|
11
|
+
- Performance optimization
|
|
12
|
+
- Custom datasets
|
|
13
|
+
- Model evaluation and visualization
|
|
14
|
+
- Real-world use cases
|
|
15
|
+
|
|
16
|
+
Author: TRA Development Team
|
|
17
|
+
License: MIT
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import matplotlib.pyplot as plt
|
|
23
|
+
import seaborn as sns
|
|
24
|
+
from sklearn.datasets import (
|
|
25
|
+
make_classification, make_regression, load_iris, load_wine,
|
|
26
|
+
load_diabetes, fetch_california_housing
|
|
27
|
+
)
|
|
28
|
+
from sklearn.model_selection import train_test_split, GridSearchCV
|
|
29
|
+
from sklearn.metrics import (
|
|
30
|
+
classification_report, confusion_matrix, mean_squared_error,
|
|
31
|
+
r2_score, accuracy_score, f1_score
|
|
32
|
+
)
|
|
33
|
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
|
34
|
+
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
|
35
|
+
import warnings
|
|
36
|
+
import time
|
|
37
|
+
import logging
|
|
38
|
+
|
|
39
|
+
# Import TRA algorithm (assuming it's in the same package)
|
|
40
|
+
try:
|
|
41
|
+
from core import OptimizedTRA
|
|
42
|
+
except ImportError:
|
|
43
|
+
from tra_algorithm.core import OptimizedTRA
|
|
44
|
+
|
|
45
|
+
# Configure logging for examples
|
|
46
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
47
|
+
logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
# Suppress warnings for cleaner output
|
|
50
|
+
warnings.filterwarnings('ignore')
|
|
51
|
+
|
|
52
|
+
def basic_classification_example():
|
|
53
|
+
"""
|
|
54
|
+
Basic classification example using TRA algorithm.
|
|
55
|
+
|
|
56
|
+
This example demonstrates:
|
|
57
|
+
- Creating a synthetic classification dataset
|
|
58
|
+
- Training TRA classifier
|
|
59
|
+
- Making predictions and evaluating performance
|
|
60
|
+
"""
|
|
61
|
+
print("=" * 60)
|
|
62
|
+
print("BASIC CLASSIFICATION EXAMPLE")
|
|
63
|
+
print("=" * 60)
|
|
64
|
+
|
|
65
|
+
# Create synthetic dataset
|
|
66
|
+
X, y = make_classification(
|
|
67
|
+
n_samples=1000,
|
|
68
|
+
n_features=10,
|
|
69
|
+
n_informative=8,
|
|
70
|
+
n_redundant=2,
|
|
71
|
+
n_classes=3,
|
|
72
|
+
random_state=42
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
print(f"Dataset shape: {X.shape}")
|
|
76
|
+
print(f"Number of classes: {len(np.unique(y))}")
|
|
77
|
+
print(f"Class distribution: {np.bincount(y)}")
|
|
78
|
+
|
|
79
|
+
# Split data
|
|
80
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
81
|
+
X, y, test_size=0.3, random_state=42, stratify=y
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Create and train TRA classifier
|
|
85
|
+
print("\nTraining TRA Classifier...")
|
|
86
|
+
tra_clf = OptimizedTRA(
|
|
87
|
+
task_type="classification",
|
|
88
|
+
n_tracks=4,
|
|
89
|
+
random_state=42,
|
|
90
|
+
n_estimators=50
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
start_time = time.time()
|
|
94
|
+
tra_clf.fit(X_train, y_train)
|
|
95
|
+
training_time = time.time() - start_time
|
|
96
|
+
|
|
97
|
+
# Make predictions
|
|
98
|
+
y_pred = tra_clf.predict(X_test)
|
|
99
|
+
y_proba = tra_clf.predict_proba(X_test)
|
|
100
|
+
|
|
101
|
+
# Evaluate performance
|
|
102
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
103
|
+
f1 = f1_score(y_test, y_pred, average='weighted')
|
|
104
|
+
|
|
105
|
+
print(f"\nResults:")
|
|
106
|
+
print(f"Training time: {training_time:.2f} seconds")
|
|
107
|
+
print(f"Accuracy: {accuracy:.4f}")
|
|
108
|
+
print(f"F1-score: {f1:.4f}")
|
|
109
|
+
|
|
110
|
+
print(f"\nClassification Report:")
|
|
111
|
+
print(classification_report(y_test, y_pred))
|
|
112
|
+
|
|
113
|
+
# Display track statistics
|
|
114
|
+
stats = tra_clf.get_track_statistics()
|
|
115
|
+
print(f"\nTRA Statistics:")
|
|
116
|
+
print(f"Number of tracks: {stats['n_tracks']}")
|
|
117
|
+
print(f"Number of signals: {stats['n_signals']}")
|
|
118
|
+
|
|
119
|
+
return tra_clf, X_test, y_test
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def basic_regression_example():
|
|
123
|
+
"""
|
|
124
|
+
Basic regression example using TRA algorithm.
|
|
125
|
+
|
|
126
|
+
This example demonstrates:
|
|
127
|
+
- Creating a synthetic regression dataset
|
|
128
|
+
- Training TRA regressor
|
|
129
|
+
- Making predictions and evaluating performance
|
|
130
|
+
"""
|
|
131
|
+
print("\n" + "=" * 60)
|
|
132
|
+
print("BASIC REGRESSION EXAMPLE")
|
|
133
|
+
print("=" * 60)
|
|
134
|
+
|
|
135
|
+
# Create synthetic dataset
|
|
136
|
+
X, y = make_regression(
|
|
137
|
+
n_samples=1000,
|
|
138
|
+
n_features=10,
|
|
139
|
+
n_informative=8,
|
|
140
|
+
noise=0.1,
|
|
141
|
+
random_state=42
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
print(f"Dataset shape: {X.shape}")
|
|
145
|
+
print(f"Target range: [{y.min():.2f}, {y.max():.2f}]")
|
|
146
|
+
|
|
147
|
+
# Split data
|
|
148
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
149
|
+
X, y, test_size=0.3, random_state=42
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Create and train TRA regressor
|
|
153
|
+
print("\nTraining TRA Regressor...")
|
|
154
|
+
tra_reg = OptimizedTRA(
|
|
155
|
+
task_type="regression",
|
|
156
|
+
n_tracks=4,
|
|
157
|
+
random_state=42,
|
|
158
|
+
n_estimators=50
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
start_time = time.time()
|
|
162
|
+
tra_reg.fit(X_train, y_train)
|
|
163
|
+
training_time = time.time() - start_time
|
|
164
|
+
|
|
165
|
+
# Make predictions
|
|
166
|
+
y_pred = tra_reg.predict(X_test)
|
|
167
|
+
|
|
168
|
+
# Evaluate performance
|
|
169
|
+
mse = mean_squared_error(y_test, y_pred)
|
|
170
|
+
rmse = np.sqrt(mse)
|
|
171
|
+
r2 = r2_score(y_test, y_pred)
|
|
172
|
+
|
|
173
|
+
print(f"\nResults:")
|
|
174
|
+
print(f"Training time: {training_time:.2f} seconds")
|
|
175
|
+
print(f"Mean Squared Error: {mse:.4f}")
|
|
176
|
+
print(f"Root Mean Squared Error: {rmse:.4f}")
|
|
177
|
+
print(f"R² Score: {r2:.4f}")
|
|
178
|
+
|
|
179
|
+
# Display track statistics
|
|
180
|
+
stats = tra_reg.get_track_statistics()
|
|
181
|
+
print(f"\nTRA Statistics:")
|
|
182
|
+
print(f"Number of tracks: {stats['n_tracks']}")
|
|
183
|
+
print(f"Number of signals: {stats['n_signals']}")
|
|
184
|
+
|
|
185
|
+
return tra_reg, X_test, y_test
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def real_world_classification_example():
|
|
189
|
+
"""
|
|
190
|
+
Real-world classification example using the Wine dataset.
|
|
191
|
+
|
|
192
|
+
This example demonstrates:
|
|
193
|
+
- Loading and preprocessing real data
|
|
194
|
+
- Comparing TRA with traditional methods
|
|
195
|
+
- Advanced configuration options
|
|
196
|
+
"""
|
|
197
|
+
print("\n" + "=" * 60)
|
|
198
|
+
print("REAL-WORLD CLASSIFICATION EXAMPLE (Wine Dataset)")
|
|
199
|
+
print("=" * 60)
|
|
200
|
+
|
|
201
|
+
# Load wine dataset
|
|
202
|
+
wine_data = load_wine()
|
|
203
|
+
X, y = wine_data.data, wine_data.target
|
|
204
|
+
|
|
205
|
+
print(f"Dataset: {wine_data.DESCR.split('.')[0]}")
|
|
206
|
+
print(f"Shape: {X.shape}")
|
|
207
|
+
print(f"Classes: {wine_data.target_names}")
|
|
208
|
+
print(f"Features: {len(wine_data.feature_names)}")
|
|
209
|
+
|
|
210
|
+
# Split data
|
|
211
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
212
|
+
X, y, test_size=0.3, random_state=42, stratify=y
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Create TRA with advanced configuration
|
|
216
|
+
print("\nTraining Advanced TRA Classifier...")
|
|
217
|
+
tra_clf = OptimizedTRA(
|
|
218
|
+
task_type="classification",
|
|
219
|
+
n_tracks=5,
|
|
220
|
+
signal_threshold=0.1,
|
|
221
|
+
random_state=42,
|
|
222
|
+
n_estimators=100,
|
|
223
|
+
max_depth=8,
|
|
224
|
+
feature_selection=True,
|
|
225
|
+
handle_imbalanced=True,
|
|
226
|
+
parallel_signals=True,
|
|
227
|
+
enable_track_pruning=True
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Train TRA
|
|
231
|
+
start_time = time.time()
|
|
232
|
+
tra_clf.fit(X_train, y_train)
|
|
233
|
+
tra_training_time = time.time() - start_time
|
|
234
|
+
|
|
235
|
+
# Compare with Random Forest
|
|
236
|
+
print("Training Random Forest for comparison...")
|
|
237
|
+
rf_clf = RandomForestClassifier(
|
|
238
|
+
n_estimators=100,
|
|
239
|
+
max_depth=8,
|
|
240
|
+
random_state=42
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
start_time = time.time()
|
|
244
|
+
rf_clf.fit(X_train, y_train)
|
|
245
|
+
rf_training_time = time.time() - start_time
|
|
246
|
+
|
|
247
|
+
# Make predictions
|
|
248
|
+
tra_pred = tra_clf.predict(X_test)
|
|
249
|
+
rf_pred = rf_clf.predict(X_test)
|
|
250
|
+
|
|
251
|
+
# Evaluate both models
|
|
252
|
+
tra_accuracy = accuracy_score(y_test, tra_pred)
|
|
253
|
+
rf_accuracy = accuracy_score(y_test, rf_pred)
|
|
254
|
+
|
|
255
|
+
print(f"\nComparison Results:")
|
|
256
|
+
print(f"TRA Accuracy: {tra_accuracy:.4f} (Training time: {tra_training_time:.2f}s)")
|
|
257
|
+
print(f"Random Forest Accuracy: {rf_accuracy:.4f} (Training time: {rf_training_time:.2f}s)")
|
|
258
|
+
|
|
259
|
+
# Show TRA performance report
|
|
260
|
+
print(f"\nTRA Performance Report:")
|
|
261
|
+
print(tra_clf.get_performance_report())
|
|
262
|
+
|
|
263
|
+
return tra_clf, rf_clf, X_test, y_test
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def real_world_regression_example():
|
|
267
|
+
"""
|
|
268
|
+
Real-world regression example using the California Housing dataset.
|
|
269
|
+
|
|
270
|
+
This example demonstrates:
|
|
271
|
+
- Handling larger real-world datasets
|
|
272
|
+
- Parameter optimization
|
|
273
|
+
- Model evaluation and comparison
|
|
274
|
+
"""
|
|
275
|
+
print("\n" + "=" * 60)
|
|
276
|
+
print("REAL-WORLD REGRESSION EXAMPLE (California Housing)")
|
|
277
|
+
print("=" * 60)
|
|
278
|
+
|
|
279
|
+
# Load California housing dataset
|
|
280
|
+
try:
|
|
281
|
+
housing_data = fetch_california_housing()
|
|
282
|
+
X, y = housing_data.data, housing_data.target
|
|
283
|
+
# Reduce dataset size for speed
|
|
284
|
+
X, y = X[:2000], y[:2000]
|
|
285
|
+
except Exception as e:
|
|
286
|
+
print(f"Could not load California Housing dataset: {e}")
|
|
287
|
+
print("Using synthetic dataset instead...")
|
|
288
|
+
X, y = make_regression(
|
|
289
|
+
n_samples=1000,
|
|
290
|
+
n_features=8,
|
|
291
|
+
n_informative=6,
|
|
292
|
+
noise=0.1,
|
|
293
|
+
random_state=42
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
print(f"Dataset shape: {X.shape}")
|
|
297
|
+
print(f"Target statistics: mean={y.mean():.2f}, std={y.std():.2f}")
|
|
298
|
+
|
|
299
|
+
# Split data
|
|
300
|
+
X_train, X_temp, y_train, y_temp = train_test_split(
|
|
301
|
+
X, y, test_size=0.4, random_state=42
|
|
302
|
+
)
|
|
303
|
+
X_val, X_test, y_val, y_test = train_test_split(
|
|
304
|
+
X_temp, y_temp, test_size=0.5, random_state=42
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
print(f"Data split: Train={X_train.shape[0]}, Val={X_val.shape[0]}, Test={X_test.shape[0]}")
|
|
308
|
+
|
|
309
|
+
# Create and train TRA regressor (faster config)
|
|
310
|
+
print("\nTraining TRA Regressor with optimization...")
|
|
311
|
+
tra_reg = OptimizedTRA(
|
|
312
|
+
task_type="regression",
|
|
313
|
+
n_tracks=2,
|
|
314
|
+
signal_threshold=0.15,
|
|
315
|
+
random_state=42,
|
|
316
|
+
n_estimators=10,
|
|
317
|
+
feature_selection=True,
|
|
318
|
+
parallel_signals=True,
|
|
319
|
+
enable_track_pruning=True
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
start_time = time.time()
|
|
323
|
+
tra_reg.fit(X_train, y_train)
|
|
324
|
+
training_time = time.time() - start_time
|
|
325
|
+
|
|
326
|
+
# Optimize parameters using validation set
|
|
327
|
+
print("Optimizing parameters...")
|
|
328
|
+
optimization_results = tra_reg.optimize_parameters(X_val, y_val)
|
|
329
|
+
|
|
330
|
+
# Compare with Random Forest
|
|
331
|
+
print("Training Random Forest for comparison...")
|
|
332
|
+
rf_reg = RandomForestRegressor(
|
|
333
|
+
n_estimators=10,
|
|
334
|
+
random_state=42
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
start_time = time.time()
|
|
338
|
+
rf_reg.fit(X_train, y_train)
|
|
339
|
+
rf_training_time = time.time() - start_time
|
|
340
|
+
|
|
341
|
+
# Make predictions
|
|
342
|
+
tra_pred = tra_reg.predict(X_test)
|
|
343
|
+
rf_pred = rf_reg.predict(X_test)
|
|
344
|
+
|
|
345
|
+
# Evaluate both models
|
|
346
|
+
tra_mse = mean_squared_error(y_test, tra_pred)
|
|
347
|
+
tra_r2 = r2_score(y_test, tra_pred)
|
|
348
|
+
rf_mse = mean_squared_error(y_test, rf_pred)
|
|
349
|
+
rf_r2 = r2_score(y_test, rf_pred)
|
|
350
|
+
|
|
351
|
+
print(f"\nComparison Results:")
|
|
352
|
+
print(f"TRA - MSE: {tra_mse:.4f}, R²: {tra_r2:.4f} (Training: {training_time:.2f}s)")
|
|
353
|
+
print(f"Random Forest - MSE: {rf_mse:.4f}, R²: {rf_r2:.4f} (Training: {rf_training_time:.2f}s)")
|
|
354
|
+
|
|
355
|
+
print(f"\nParameter Optimization Results:")
|
|
356
|
+
print(f"Original threshold: {optimization_results['original_threshold']}")
|
|
357
|
+
print(f"Optimized threshold: {optimization_results['optimized_threshold']}")
|
|
358
|
+
print(f"Performance improvement: {optimization_results['improvement']:.4f}")
|
|
359
|
+
|
|
360
|
+
return tra_reg, rf_reg, X_test, y_test
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def parameter_tuning_example():
|
|
364
|
+
"""
|
|
365
|
+
Example demonstrating parameter tuning for TRA algorithm.
|
|
366
|
+
|
|
367
|
+
This example shows:
|
|
368
|
+
- Grid search for optimal parameters
|
|
369
|
+
- Cross-validation
|
|
370
|
+
- Performance comparison across different configurations
|
|
371
|
+
"""
|
|
372
|
+
print("\n" + "=" * 60)
|
|
373
|
+
print("PARAMETER TUNING EXAMPLE")
|
|
374
|
+
print("=" * 60)
|
|
375
|
+
|
|
376
|
+
# Create dataset
|
|
377
|
+
X, y = make_classification(
|
|
378
|
+
n_samples=800,
|
|
379
|
+
n_features=12,
|
|
380
|
+
n_informative=10,
|
|
381
|
+
n_classes=2,
|
|
382
|
+
random_state=42
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
386
|
+
X, y, test_size=0.3, random_state=42, stratify=y
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
print(f"Dataset shape: {X.shape}")
|
|
390
|
+
print("Testing different parameter combinations...")
|
|
391
|
+
|
|
392
|
+
# Define parameter combinations to test
|
|
393
|
+
param_combinations = [
|
|
394
|
+
{'n_tracks': 3, 'signal_threshold': 0.1, 'n_estimators': 50},
|
|
395
|
+
{'n_tracks': 4, 'signal_threshold': 0.1, 'n_estimators': 50},
|
|
396
|
+
{'n_tracks': 5, 'signal_threshold': 0.1, 'n_estimators': 50},
|
|
397
|
+
{'n_tracks': 4, 'signal_threshold': 0.05, 'n_estimators': 50},
|
|
398
|
+
{'n_tracks': 4, 'signal_threshold': 0.15, 'n_estimators': 50},
|
|
399
|
+
{'n_tracks': 4, 'signal_threshold': 0.1, 'n_estimators': 30},
|
|
400
|
+
{'n_tracks': 4, 'signal_threshold': 0.1, 'n_estimators': 70},
|
|
401
|
+
]
|
|
402
|
+
|
|
403
|
+
results = []
|
|
404
|
+
|
|
405
|
+
for i, params in enumerate(param_combinations):
|
|
406
|
+
print(f"\nTesting configuration {i+1}: {params}")
|
|
407
|
+
|
|
408
|
+
# Create TRA with current parameters
|
|
409
|
+
tra_clf = OptimizedTRA(
|
|
410
|
+
task_type="classification",
|
|
411
|
+
random_state=42,
|
|
412
|
+
**params
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# Train and evaluate
|
|
416
|
+
start_time = time.time()
|
|
417
|
+
tra_clf.fit(X_train, y_train)
|
|
418
|
+
training_time = time.time() - start_time
|
|
419
|
+
|
|
420
|
+
y_pred = tra_clf.predict(X_test)
|
|
421
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
422
|
+
|
|
423
|
+
results.append({
|
|
424
|
+
'params': params,
|
|
425
|
+
'accuracy': accuracy,
|
|
426
|
+
'training_time': training_time
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
print(f"Accuracy: {accuracy:.4f}, Training time: {training_time:.2f}s")
|
|
430
|
+
|
|
431
|
+
# Find best configuration
|
|
432
|
+
best_result = max(results, key=lambda x: x['accuracy'])
|
|
433
|
+
|
|
434
|
+
print(f"\nBest Configuration:")
|
|
435
|
+
print(f"Parameters: {best_result['params']}")
|
|
436
|
+
print(f"Accuracy: {best_result['accuracy']:.4f}")
|
|
437
|
+
print(f"Training time: {best_result['training_time']:.2f}s")
|
|
438
|
+
|
|
439
|
+
return results
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def model_comparison_example():
|
|
443
|
+
"""
|
|
444
|
+
Comprehensive model comparison example.
|
|
445
|
+
|
|
446
|
+
This example compares TRA with other popular algorithms:
|
|
447
|
+
- Random Forest
|
|
448
|
+
- Decision Tree
|
|
449
|
+
- Gradient Boosting (if available)
|
|
450
|
+
"""
|
|
451
|
+
print("\n" + "=" * 60)
|
|
452
|
+
print("MODEL COMPARISON EXAMPLE")
|
|
453
|
+
print("=" * 60)
|
|
454
|
+
|
|
455
|
+
# Create challenging dataset
|
|
456
|
+
X, y = make_classification(
|
|
457
|
+
n_samples=1500,
|
|
458
|
+
n_features=15,
|
|
459
|
+
n_informative=12,
|
|
460
|
+
n_redundant=3,
|
|
461
|
+
n_classes=3,
|
|
462
|
+
n_clusters_per_class=2,
|
|
463
|
+
class_sep=0.8,
|
|
464
|
+
random_state=42
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
468
|
+
X, y, test_size=0.3, random_state=42, stratify=y
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
print(f"Dataset shape: {X.shape}")
|
|
472
|
+
print(f"Classes: {len(np.unique(y))}")
|
|
473
|
+
|
|
474
|
+
# Initialize models
|
|
475
|
+
models = {
|
|
476
|
+
'TRA': OptimizedTRA(
|
|
477
|
+
task_type="classification",
|
|
478
|
+
n_tracks=5,
|
|
479
|
+
random_state=42,
|
|
480
|
+
n_estimators=80,
|
|
481
|
+
feature_selection=True,
|
|
482
|
+
parallel_signals=True
|
|
483
|
+
),
|
|
484
|
+
'Random Forest': RandomForestClassifier(
|
|
485
|
+
n_estimators=80,
|
|
486
|
+
random_state=42
|
|
487
|
+
)
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
# Add other models if available
|
|
491
|
+
try:
|
|
492
|
+
from sklearn.ensemble import GradientBoostingClassifier
|
|
493
|
+
models['Gradient Boosting'] = GradientBoostingClassifier(
|
|
494
|
+
n_estimators=80,
|
|
495
|
+
random_state=42
|
|
496
|
+
)
|
|
497
|
+
except:
|
|
498
|
+
pass
|
|
499
|
+
|
|
500
|
+
try:
|
|
501
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
502
|
+
models['Decision Tree'] = DecisionTreeClassifier(
|
|
503
|
+
max_depth=10,
|
|
504
|
+
random_state=42
|
|
505
|
+
)
|
|
506
|
+
except:
|
|
507
|
+
pass
|
|
508
|
+
|
|
509
|
+
# Train and evaluate all models
|
|
510
|
+
results = {}
|
|
511
|
+
|
|
512
|
+
for name, model in models.items():
|
|
513
|
+
print(f"\nTraining {name}...")
|
|
514
|
+
|
|
515
|
+
start_time = time.time()
|
|
516
|
+
model.fit(X_train, y_train)
|
|
517
|
+
training_time = time.time() - start_time
|
|
518
|
+
|
|
519
|
+
start_time = time.time()
|
|
520
|
+
y_pred = model.predict(X_test)
|
|
521
|
+
prediction_time = time.time() - start_time
|
|
522
|
+
|
|
523
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
524
|
+
f1 = f1_score(y_test, y_pred, average='weighted')
|
|
525
|
+
|
|
526
|
+
results[name] = {
|
|
527
|
+
'accuracy': accuracy,
|
|
528
|
+
'f1_score': f1,
|
|
529
|
+
'training_time': training_time,
|
|
530
|
+
'prediction_time': prediction_time
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
print(f"Accuracy: {accuracy:.4f}")
|
|
534
|
+
print(f"F1-score: {f1:.4f}")
|
|
535
|
+
print(f"Training time: {training_time:.2f}s")
|
|
536
|
+
print(f"Prediction time: {prediction_time:.4f}s")
|
|
537
|
+
|
|
538
|
+
# Summary comparison
|
|
539
|
+
print(f"\n{'Model':<20} {'Accuracy':<10} {'F1-Score':<10} {'Train Time':<12} {'Pred Time':<12}")
|
|
540
|
+
print("-" * 70)
|
|
541
|
+
|
|
542
|
+
for name, metrics in results.items():
|
|
543
|
+
print(f"{name:<20} {metrics['accuracy']:<10.4f} {metrics['f1_score']:<10.4f} "
|
|
544
|
+
f"{metrics['training_time']:<12.2f} {metrics['prediction_time']:<12.4f}")
|
|
545
|
+
|
|
546
|
+
return results
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def visualization_example():
|
|
550
|
+
"""
|
|
551
|
+
Example demonstrating TRA visualization capabilities.
|
|
552
|
+
|
|
553
|
+
This example shows:
|
|
554
|
+
- Track structure visualization
|
|
555
|
+
- Performance metrics plotting
|
|
556
|
+
- Model comparison charts
|
|
557
|
+
"""
|
|
558
|
+
print("\n" + "=" * 60)
|
|
559
|
+
print("VISUALIZATION EXAMPLE")
|
|
560
|
+
print("=" * 60)
|
|
561
|
+
|
|
562
|
+
# Create dataset
|
|
563
|
+
X, y = make_classification(
|
|
564
|
+
n_samples=800,
|
|
565
|
+
n_features=8,
|
|
566
|
+
n_classes=2,
|
|
567
|
+
random_state=42
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
571
|
+
X, y, test_size=0.3, random_state=42
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
# Train TRA model
|
|
575
|
+
print("Training TRA model for visualization...")
|
|
576
|
+
tra_clf = OptimizedTRA(
|
|
577
|
+
task_type="classification",
|
|
578
|
+
n_tracks=4,
|
|
579
|
+
random_state=42
|
|
580
|
+
)
|
|
581
|
+
tra_clf.fit(X_train, y_train)
|
|
582
|
+
|
|
583
|
+
# Make some predictions to generate activity
|
|
584
|
+
_ = tra_clf.predict(X_test)
|
|
585
|
+
|
|
586
|
+
try:
|
|
587
|
+
# Visualize TRA structure
|
|
588
|
+
print("Creating TRA structure visualization...")
|
|
589
|
+
tra_clf.visualize("tra_structure_example.png")
|
|
590
|
+
print("Visualization saved as 'tra_structure_example.png'")
|
|
591
|
+
|
|
592
|
+
# Get and display statistics
|
|
593
|
+
stats = tra_clf.get_track_statistics()
|
|
594
|
+
|
|
595
|
+
# Create performance comparison plot
|
|
596
|
+
plt.figure(figsize=(12, 8))
|
|
597
|
+
|
|
598
|
+
# Track usage plot
|
|
599
|
+
plt.subplot(2, 2, 1)
|
|
600
|
+
track_names = list(stats['track_details'].keys())
|
|
601
|
+
usage_counts = [stats['track_details'][name]['usage_count'] for name in track_names]
|
|
602
|
+
|
|
603
|
+
plt.bar(track_names, usage_counts)
|
|
604
|
+
plt.title('Track Usage Distribution')
|
|
605
|
+
plt.xlabel('Track')
|
|
606
|
+
plt.ylabel('Usage Count')
|
|
607
|
+
plt.xticks(rotation=45)
|
|
608
|
+
|
|
609
|
+
# Performance scores plot
|
|
610
|
+
plt.subplot(2, 2, 2)
|
|
611
|
+
perf_scores = [stats['track_details'][name]['performance_score'] for name in track_names]
|
|
612
|
+
|
|
613
|
+
plt.bar(track_names, perf_scores)
|
|
614
|
+
plt.title('Track Performance Scores')
|
|
615
|
+
plt.xlabel('Track')
|
|
616
|
+
plt.ylabel('Performance Score')
|
|
617
|
+
plt.xticks(rotation=45)
|
|
618
|
+
|
|
619
|
+
# Signal confidence plot
|
|
620
|
+
plt.subplot(2, 2, 3)
|
|
621
|
+
signal_conf = [stats['track_details'][name]['avg_signal_confidence'] for name in track_names]
|
|
622
|
+
|
|
623
|
+
plt.bar(track_names, signal_conf)
|
|
624
|
+
plt.title('Average Signal Confidence')
|
|
625
|
+
plt.xlabel('Track')
|
|
626
|
+
plt.ylabel('Confidence')
|
|
627
|
+
plt.xticks(rotation=45)
|
|
628
|
+
|
|
629
|
+
# Prediction time plot
|
|
630
|
+
plt.subplot(2, 2, 4)
|
|
631
|
+
pred_times = [stats['track_details'][name]['avg_prediction_time'] * 1000 for name in track_names]
|
|
632
|
+
|
|
633
|
+
plt.bar(track_names, pred_times)
|
|
634
|
+
plt.title('Average Prediction Time')
|
|
635
|
+
plt.xlabel('Track')
|
|
636
|
+
plt.ylabel('Time (ms)')
|
|
637
|
+
plt.xticks(rotation=45)
|
|
638
|
+
|
|
639
|
+
plt.tight_layout()
|
|
640
|
+
plt.savefig('tra_performance_metrics.png', dpi=300, bbox_inches='tight')
|
|
641
|
+
plt.show()
|
|
642
|
+
|
|
643
|
+
print("Performance metrics plot saved as 'tra_performance_metrics.png'")
|
|
644
|
+
|
|
645
|
+
except Exception as e:
|
|
646
|
+
print(f"Visualization failed: {e}")
|
|
647
|
+
print("This might be due to missing dependencies (matplotlib, networkx)")
|
|
648
|
+
|
|
649
|
+
return tra_clf
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def custom_dataset_example():
|
|
653
|
+
"""
|
|
654
|
+
Example showing how to use TRA with custom datasets.
|
|
655
|
+
|
|
656
|
+
This example demonstrates:
|
|
657
|
+
- Loading custom data from CSV
|
|
658
|
+
- Data preprocessing
|
|
659
|
+
- Handling categorical variables
|
|
660
|
+
- Model training and evaluation
|
|
661
|
+
"""
|
|
662
|
+
print("\n" + "=" * 60)
|
|
663
|
+
print("CUSTOM DATASET EXAMPLE")
|
|
664
|
+
print("=" * 60)
|
|
665
|
+
|
|
666
|
+
# Create a sample custom dataset (simulating loaded CSV data)
|
|
667
|
+
print("Creating sample custom dataset...")
|
|
668
|
+
|
|
669
|
+
# Simulate a customer churn dataset
|
|
670
|
+
np.random.seed(42)
|
|
671
|
+
n_samples = 1000
|
|
672
|
+
|
|
673
|
+
# Numerical features
|
|
674
|
+
age = np.random.normal(35, 12, n_samples)
|
|
675
|
+
income = np.random.lognormal(10, 0.5, n_samples)
|
|
676
|
+
tenure = np.random.exponential(2, n_samples)
|
|
677
|
+
|
|
678
|
+
# Categorical features (encoded as numbers for simplicity)
|
|
679
|
+
region = np.random.choice([0, 1, 2, 3], n_samples) # 4 regions
|
|
680
|
+
plan_type = np.random.choice([0, 1, 2], n_samples) # 3 plan types
|
|
681
|
+
|
|
682
|
+
# Create target variable (churn) based on features
|
|
683
|
+
churn_prob = (
|
|
684
|
+
-0.02 * age +
|
|
685
|
+
-0.00001 * income +
|
|
686
|
+
-0.1 * tenure +
|
|
687
|
+
0.1 * region +
|
|
688
|
+
0.05 * plan_type +
|
|
689
|
+
np.random.normal(0, 0.1, n_samples)
|
|
690
|
+
)
|
|
691
|
+
churn = (churn_prob > np.median(churn_prob)).astype(int)
|
|
692
|
+
|
|
693
|
+
# Combine features
|
|
694
|
+
X = np.column_stack([age, income, tenure, region, plan_type])
|
|
695
|
+
y = churn
|
|
696
|
+
|
|
697
|
+
# Create feature names
|
|
698
|
+
feature_names = ['age', 'income', 'tenure', 'region', 'plan_type']
|
|
699
|
+
|
|
700
|
+
print(f"Custom dataset created:")
|
|
701
|
+
print(f"Shape: {X.shape}")
|
|
702
|
+
print(f"Features: {feature_names}")
|
|
703
|
+
print(f"Churn rate: {y.mean():.3f}")
|
|
704
|
+
|
|
705
|
+
# Data preprocessing example
|
|
706
|
+
print("\nPreprocessing data...")
|
|
707
|
+
|
|
708
|
+
# Split data
|
|
709
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
710
|
+
X, y, test_size=0.3, random_state=42, stratify=y
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
# Scale numerical features (first 3 columns)
|
|
714
|
+
scaler = StandardScaler()
|
|
715
|
+
X_train_scaled = X_train.copy()
|
|
716
|
+
X_test_scaled = X_test.copy()
|
|
717
|
+
|
|
718
|
+
X_train_scaled[:, :3] = scaler.fit_transform(X_train[:, :3])
|
|
719
|
+
X_test_scaled[:, :3] = scaler.transform(X_test[:, :3])
|
|
720
|
+
|
|
721
|
+
print("Numerical features scaled")
|
|
722
|
+
|
|
723
|
+
# Train TRA model
|
|
724
|
+
print("\nTraining TRA on custom dataset...")
|
|
725
|
+
tra_clf = OptimizedTRA(
|
|
726
|
+
task_type="classification",
|
|
727
|
+
n_tracks=4,
|
|
728
|
+
signal_threshold=0.12,
|
|
729
|
+
random_state=42,
|
|
730
|
+
handle_imbalanced=True, # Important for imbalanced datasets
|
|
731
|
+
feature_selection=True
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
tra_clf.fit(X_train_scaled, y_train)
|
|
735
|
+
|
|
736
|
+
# Make predictions
|
|
737
|
+
y_pred = tra_clf.predict(X_test_scaled)
|
|
738
|
+
y_proba = tra_clf.predict_proba(X_test_scaled)
|
|
739
|
+
|
|
740
|
+
# Evaluate performance
|
|
741
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
742
|
+
f1 = f1_score(y_test, y_pred)
|
|
743
|
+
|
|
744
|
+
print(f"\nResults on custom dataset:")
|
|
745
|
+
print(f"Accuracy: {accuracy:.4f}")
|
|
746
|
+
print(f"F1-score: {f1:.4f}")
|
|
747
|
+
|
|
748
|
+
print(f"\nClassification Report:")
|
|
749
|
+
print(classification_report(y_test, y_pred, target_names=['No Churn', 'Churn']))
|
|
750
|
+
|
|
751
|
+
# Show TRA-specific insights
|
|
752
|
+
print(f"\nTRA Model Insights:")
|
|
753
|
+
stats = tra_clf.get_track_statistics()
|
|
754
|
+
print(f"Active tracks: {stats['n_tracks']}")
|
|
755
|
+
print(f"Total signals: {stats['n_signals']}")
|
|
756
|
+
|
|
757
|
+
for track_name, details in stats['track_details'].items():
|
|
758
|
+
if details['usage_count'] > 0:
|
|
759
|
+
print(f"{track_name}: {details['usage_percentage']:.1f}% usage, "
|
|
760
|
+
f"performance: {details['performance_score']:.3f}")
|
|
761
|
+
|
|
762
|
+
return tra_clf, X_test_scaled, y_test
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def save_load_example():
|
|
766
|
+
"""
|
|
767
|
+
Example demonstrating model saving and loading.
|
|
768
|
+
|
|
769
|
+
This example shows:
|
|
770
|
+
- Training a TRA model
|
|
771
|
+
- Saving the model to disk
|
|
772
|
+
- Loading the model back
|
|
773
|
+
- Verifying consistency
|
|
774
|
+
"""
|
|
775
|
+
print("\n" + "=" * 60)
|
|
776
|
+
print("MODEL SAVE/LOAD EXAMPLE")
|
|
777
|
+
print("=" * 60)
|
|
778
|
+
|
|
779
|
+
# Create and train model
|
|
780
|
+
X, y = make_classification(n_samples=500, n_features=8, random_state=42)
|
|
781
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
782
|
+
|
|
783
|
+
print("Training original TRA model...")
|
|
784
|
+
original_tra = OptimizedTRA(
|
|
785
|
+
task_type="classification",
|
|
786
|
+
n_tracks=3,
|
|
787
|
+
random_state=42
|
|
788
|
+
)
|
|
789
|
+
original_tra.fit(X_train, y_train)
|
|
790
|
+
|
|
791
|
+
# Make predictions with original model
|
|
792
|
+
original_pred = original_tra.predict(X_test)
|
|
793
|
+
original_accuracy = accuracy_score(y_test, original_pred)
|
|
794
|
+
|
|
795
|
+
print(f"Original model accuracy: {original_accuracy:.4f}")
|
|
796
|
+
|
|
797
|
+
# Save model
|
|
798
|
+
model_filename = "tra_model_example.joblib"
|
|
799
|
+
print(f"\nSaving model to {model_filename}...")
|
|
800
|
+
original_tra.save_model(model_filename)
|
|
801
|
+
|
|
802
|
+
# Load model
|
|
803
|
+
print(f"Loading model from {model_filename}...")
|
|
804
|
+
loaded_tra = OptimizedTRA.load_model(model_filename)
|
|
805
|
+
|
|
806
|
+
# Make predictions with loaded model
|
|
807
|
+
loaded_pred = loaded_tra.predict(X_test)
|
|
808
|
+
loaded_accuracy = accuracy_score(y_test, loaded_pred)
|
|
809
|
+
|
|
810
|
+
print(f"Loaded model accuracy: {loaded_accuracy:.4f}")
|
|
811
|
+
|
|
812
|
+
# Verify consistency
|
|
813
|
+
predictions_match = np.array_equal(original_pred, loaded_pred)
|
|
814
|
+
print(f"Predictions match: {predictions_match}")
|
|
815
|
+
|
|
816
|
+
if predictions_match:
|
|
817
|
+
print("✓ Model save/load successful!")
|
|
818
|
+
else:
|
|
819
|
+
print("✗ Model save/load failed - predictions don't match")
|
|
820
|
+
|
|
821
|
+
# Clean up
|
|
822
|
+
import os
|
|
823
|
+
try:
|
|
824
|
+
os.remove(model_filename)
|
|
825
|
+
print(f"Cleaned up {model_filename}")
|
|
826
|
+
except:
|
|
827
|
+
pass
|
|
828
|
+
|
|
829
|
+
return original_tra, loaded_tra
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
def run_all_examples():
|
|
833
|
+
"""
|
|
834
|
+
Run all examples in sequence.
|
|
835
|
+
|
|
836
|
+
This function executes all available examples to demonstrate
|
|
837
|
+
the full capabilities of the TRA algorithm.
|
|
838
|
+
"""
|
|
839
|
+
print("*" * 80)
|
|
840
|
+
print("RUNNING ALL TRA ALGORITHM EXAMPLES")
|
|
841
|
+
print("*" * 80)
|
|
842
|
+
|
|
843
|
+
examples = [
|
|
844
|
+
("Basic Classification Example", basic_classification_example),
|
|
845
|
+
("Basic Regression Example", basic_regression_example),
|
|
846
|
+
("Real-World Classification Example", real_world_classification_example),
|
|
847
|
+
("Real-World Regression Example", real_world_regression_example),
|
|
848
|
+
("Parameter Tuning Example", parameter_tuning_example),
|
|
849
|
+
("Model Comparison Example", model_comparison_example),
|
|
850
|
+
("Visualization Example", visualization_example),
|
|
851
|
+
("Custom Dataset Example", custom_dataset_example),
|
|
852
|
+
("Model Save/Load Example", save_load_example)
|
|
853
|
+
]
|
|
854
|
+
for name, example_func in examples:
|
|
855
|
+
print(f"\nRunning: {name}")
|
|
856
|
+
try:
|
|
857
|
+
example_func()
|
|
858
|
+
print(f"{name} completed successfully!")
|
|
859
|
+
except Exception as e:
|
|
860
|
+
print(f"Error in {name}: {e}")
|
|
861
|
+
print("\n" + "*" * 80)
|
|
862
|
+
print("ALL EXAMPLES COMPLETED!")
|
|
863
|
+
print("*" * 80)
|
|
864
|
+
return examples
|
|
865
|
+
|
|
866
|
+
def main():
|
|
867
|
+
"""
|
|
868
|
+
Main function to run all examples.
|
|
869
|
+
|
|
870
|
+
This serves as the entry point for executing the TRA algorithm examples.
|
|
871
|
+
"""
|
|
872
|
+
print("TRA Algorithm Examples")
|
|
873
|
+
print("======================")
|
|
874
|
+
run_all_examples()
|
|
875
|
+
print("\nQuick Start Guide:")
|
|
876
|
+
print("1. Import: from tra_algorithm import OptimizedTRA")
|
|
877
|
+
print("2. Create: tra = OptimizedTRA(task_type='classification')")
|
|
878
|
+
print("3. Train: tra.fit(X_train, y_train)")
|
|
879
|
+
print("4. Predict: y_pred = tra.predict(X_test)")
|
|
880
|
+
print("5. Evaluate: tra.get_performance_report()")
|
|
881
|
+
if __name__ == "__main__":
|
|
882
|
+
main()
|