tra-algorithm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,882 @@
1
+ """
2
+ TRA Algorithm Examples
3
+ =====================
4
+
5
+ This module contains comprehensive examples demonstrating how to use the
6
+ Track/Rail Algorithm (TRA) for various machine learning tasks.
7
+
8
+ Examples include:
9
+ - Basic classification and regression
10
+ - Advanced configuration options
11
+ - Performance optimization
12
+ - Custom datasets
13
+ - Model evaluation and visualization
14
+ - Real-world use cases
15
+
16
+ Author: TRA Development Team
17
+ License: MIT
18
+ """
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ import matplotlib.pyplot as plt
23
+ import seaborn as sns
24
+ from sklearn.datasets import (
25
+ make_classification, make_regression, load_iris, load_wine,
26
+ load_diabetes, fetch_california_housing
27
+ )
28
+ from sklearn.model_selection import train_test_split, GridSearchCV
29
+ from sklearn.metrics import (
30
+ classification_report, confusion_matrix, mean_squared_error,
31
+ r2_score, accuracy_score, f1_score
32
+ )
33
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
34
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
35
+ import warnings
36
+ import time
37
+ import logging
38
+
39
+ # Import TRA algorithm (assuming it's in the same package)
40
+ try:
41
+ from core import OptimizedTRA
42
+ except ImportError:
43
+ from tra_algorithm.core import OptimizedTRA
44
+
45
+ # Configure logging for examples
46
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
47
+ logger = logging.getLogger(__name__)
48
+
49
+ # Suppress warnings for cleaner output
50
+ warnings.filterwarnings('ignore')
51
+
52
+ def basic_classification_example():
53
+ """
54
+ Basic classification example using TRA algorithm.
55
+
56
+ This example demonstrates:
57
+ - Creating a synthetic classification dataset
58
+ - Training TRA classifier
59
+ - Making predictions and evaluating performance
60
+ """
61
+ print("=" * 60)
62
+ print("BASIC CLASSIFICATION EXAMPLE")
63
+ print("=" * 60)
64
+
65
+ # Create synthetic dataset
66
+ X, y = make_classification(
67
+ n_samples=1000,
68
+ n_features=10,
69
+ n_informative=8,
70
+ n_redundant=2,
71
+ n_classes=3,
72
+ random_state=42
73
+ )
74
+
75
+ print(f"Dataset shape: {X.shape}")
76
+ print(f"Number of classes: {len(np.unique(y))}")
77
+ print(f"Class distribution: {np.bincount(y)}")
78
+
79
+ # Split data
80
+ X_train, X_test, y_train, y_test = train_test_split(
81
+ X, y, test_size=0.3, random_state=42, stratify=y
82
+ )
83
+
84
+ # Create and train TRA classifier
85
+ print("\nTraining TRA Classifier...")
86
+ tra_clf = OptimizedTRA(
87
+ task_type="classification",
88
+ n_tracks=4,
89
+ random_state=42,
90
+ n_estimators=50
91
+ )
92
+
93
+ start_time = time.time()
94
+ tra_clf.fit(X_train, y_train)
95
+ training_time = time.time() - start_time
96
+
97
+ # Make predictions
98
+ y_pred = tra_clf.predict(X_test)
99
+ y_proba = tra_clf.predict_proba(X_test)
100
+
101
+ # Evaluate performance
102
+ accuracy = accuracy_score(y_test, y_pred)
103
+ f1 = f1_score(y_test, y_pred, average='weighted')
104
+
105
+ print(f"\nResults:")
106
+ print(f"Training time: {training_time:.2f} seconds")
107
+ print(f"Accuracy: {accuracy:.4f}")
108
+ print(f"F1-score: {f1:.4f}")
109
+
110
+ print(f"\nClassification Report:")
111
+ print(classification_report(y_test, y_pred))
112
+
113
+ # Display track statistics
114
+ stats = tra_clf.get_track_statistics()
115
+ print(f"\nTRA Statistics:")
116
+ print(f"Number of tracks: {stats['n_tracks']}")
117
+ print(f"Number of signals: {stats['n_signals']}")
118
+
119
+ return tra_clf, X_test, y_test
120
+
121
+
122
+ def basic_regression_example():
123
+ """
124
+ Basic regression example using TRA algorithm.
125
+
126
+ This example demonstrates:
127
+ - Creating a synthetic regression dataset
128
+ - Training TRA regressor
129
+ - Making predictions and evaluating performance
130
+ """
131
+ print("\n" + "=" * 60)
132
+ print("BASIC REGRESSION EXAMPLE")
133
+ print("=" * 60)
134
+
135
+ # Create synthetic dataset
136
+ X, y = make_regression(
137
+ n_samples=1000,
138
+ n_features=10,
139
+ n_informative=8,
140
+ noise=0.1,
141
+ random_state=42
142
+ )
143
+
144
+ print(f"Dataset shape: {X.shape}")
145
+ print(f"Target range: [{y.min():.2f}, {y.max():.2f}]")
146
+
147
+ # Split data
148
+ X_train, X_test, y_train, y_test = train_test_split(
149
+ X, y, test_size=0.3, random_state=42
150
+ )
151
+
152
+ # Create and train TRA regressor
153
+ print("\nTraining TRA Regressor...")
154
+ tra_reg = OptimizedTRA(
155
+ task_type="regression",
156
+ n_tracks=4,
157
+ random_state=42,
158
+ n_estimators=50
159
+ )
160
+
161
+ start_time = time.time()
162
+ tra_reg.fit(X_train, y_train)
163
+ training_time = time.time() - start_time
164
+
165
+ # Make predictions
166
+ y_pred = tra_reg.predict(X_test)
167
+
168
+ # Evaluate performance
169
+ mse = mean_squared_error(y_test, y_pred)
170
+ rmse = np.sqrt(mse)
171
+ r2 = r2_score(y_test, y_pred)
172
+
173
+ print(f"\nResults:")
174
+ print(f"Training time: {training_time:.2f} seconds")
175
+ print(f"Mean Squared Error: {mse:.4f}")
176
+ print(f"Root Mean Squared Error: {rmse:.4f}")
177
+ print(f"R² Score: {r2:.4f}")
178
+
179
+ # Display track statistics
180
+ stats = tra_reg.get_track_statistics()
181
+ print(f"\nTRA Statistics:")
182
+ print(f"Number of tracks: {stats['n_tracks']}")
183
+ print(f"Number of signals: {stats['n_signals']}")
184
+
185
+ return tra_reg, X_test, y_test
186
+
187
+
188
+ def real_world_classification_example():
189
+ """
190
+ Real-world classification example using the Wine dataset.
191
+
192
+ This example demonstrates:
193
+ - Loading and preprocessing real data
194
+ - Comparing TRA with traditional methods
195
+ - Advanced configuration options
196
+ """
197
+ print("\n" + "=" * 60)
198
+ print("REAL-WORLD CLASSIFICATION EXAMPLE (Wine Dataset)")
199
+ print("=" * 60)
200
+
201
+ # Load wine dataset
202
+ wine_data = load_wine()
203
+ X, y = wine_data.data, wine_data.target
204
+
205
+ print(f"Dataset: {wine_data.DESCR.split('.')[0]}")
206
+ print(f"Shape: {X.shape}")
207
+ print(f"Classes: {wine_data.target_names}")
208
+ print(f"Features: {len(wine_data.feature_names)}")
209
+
210
+ # Split data
211
+ X_train, X_test, y_train, y_test = train_test_split(
212
+ X, y, test_size=0.3, random_state=42, stratify=y
213
+ )
214
+
215
+ # Create TRA with advanced configuration
216
+ print("\nTraining Advanced TRA Classifier...")
217
+ tra_clf = OptimizedTRA(
218
+ task_type="classification",
219
+ n_tracks=5,
220
+ signal_threshold=0.1,
221
+ random_state=42,
222
+ n_estimators=100,
223
+ max_depth=8,
224
+ feature_selection=True,
225
+ handle_imbalanced=True,
226
+ parallel_signals=True,
227
+ enable_track_pruning=True
228
+ )
229
+
230
+ # Train TRA
231
+ start_time = time.time()
232
+ tra_clf.fit(X_train, y_train)
233
+ tra_training_time = time.time() - start_time
234
+
235
+ # Compare with Random Forest
236
+ print("Training Random Forest for comparison...")
237
+ rf_clf = RandomForestClassifier(
238
+ n_estimators=100,
239
+ max_depth=8,
240
+ random_state=42
241
+ )
242
+
243
+ start_time = time.time()
244
+ rf_clf.fit(X_train, y_train)
245
+ rf_training_time = time.time() - start_time
246
+
247
+ # Make predictions
248
+ tra_pred = tra_clf.predict(X_test)
249
+ rf_pred = rf_clf.predict(X_test)
250
+
251
+ # Evaluate both models
252
+ tra_accuracy = accuracy_score(y_test, tra_pred)
253
+ rf_accuracy = accuracy_score(y_test, rf_pred)
254
+
255
+ print(f"\nComparison Results:")
256
+ print(f"TRA Accuracy: {tra_accuracy:.4f} (Training time: {tra_training_time:.2f}s)")
257
+ print(f"Random Forest Accuracy: {rf_accuracy:.4f} (Training time: {rf_training_time:.2f}s)")
258
+
259
+ # Show TRA performance report
260
+ print(f"\nTRA Performance Report:")
261
+ print(tra_clf.get_performance_report())
262
+
263
+ return tra_clf, rf_clf, X_test, y_test
264
+
265
+
266
+ def real_world_regression_example():
267
+ """
268
+ Real-world regression example using the California Housing dataset.
269
+
270
+ This example demonstrates:
271
+ - Handling larger real-world datasets
272
+ - Parameter optimization
273
+ - Model evaluation and comparison
274
+ """
275
+ print("\n" + "=" * 60)
276
+ print("REAL-WORLD REGRESSION EXAMPLE (California Housing)")
277
+ print("=" * 60)
278
+
279
+ # Load California housing dataset
280
+ try:
281
+ housing_data = fetch_california_housing()
282
+ X, y = housing_data.data, housing_data.target
283
+ # Reduce dataset size for speed
284
+ X, y = X[:2000], y[:2000]
285
+ except Exception as e:
286
+ print(f"Could not load California Housing dataset: {e}")
287
+ print("Using synthetic dataset instead...")
288
+ X, y = make_regression(
289
+ n_samples=1000,
290
+ n_features=8,
291
+ n_informative=6,
292
+ noise=0.1,
293
+ random_state=42
294
+ )
295
+
296
+ print(f"Dataset shape: {X.shape}")
297
+ print(f"Target statistics: mean={y.mean():.2f}, std={y.std():.2f}")
298
+
299
+ # Split data
300
+ X_train, X_temp, y_train, y_temp = train_test_split(
301
+ X, y, test_size=0.4, random_state=42
302
+ )
303
+ X_val, X_test, y_val, y_test = train_test_split(
304
+ X_temp, y_temp, test_size=0.5, random_state=42
305
+ )
306
+
307
+ print(f"Data split: Train={X_train.shape[0]}, Val={X_val.shape[0]}, Test={X_test.shape[0]}")
308
+
309
+ # Create and train TRA regressor (faster config)
310
+ print("\nTraining TRA Regressor with optimization...")
311
+ tra_reg = OptimizedTRA(
312
+ task_type="regression",
313
+ n_tracks=2,
314
+ signal_threshold=0.15,
315
+ random_state=42,
316
+ n_estimators=10,
317
+ feature_selection=True,
318
+ parallel_signals=True,
319
+ enable_track_pruning=True
320
+ )
321
+
322
+ start_time = time.time()
323
+ tra_reg.fit(X_train, y_train)
324
+ training_time = time.time() - start_time
325
+
326
+ # Optimize parameters using validation set
327
+ print("Optimizing parameters...")
328
+ optimization_results = tra_reg.optimize_parameters(X_val, y_val)
329
+
330
+ # Compare with Random Forest
331
+ print("Training Random Forest for comparison...")
332
+ rf_reg = RandomForestRegressor(
333
+ n_estimators=10,
334
+ random_state=42
335
+ )
336
+
337
+ start_time = time.time()
338
+ rf_reg.fit(X_train, y_train)
339
+ rf_training_time = time.time() - start_time
340
+
341
+ # Make predictions
342
+ tra_pred = tra_reg.predict(X_test)
343
+ rf_pred = rf_reg.predict(X_test)
344
+
345
+ # Evaluate both models
346
+ tra_mse = mean_squared_error(y_test, tra_pred)
347
+ tra_r2 = r2_score(y_test, tra_pred)
348
+ rf_mse = mean_squared_error(y_test, rf_pred)
349
+ rf_r2 = r2_score(y_test, rf_pred)
350
+
351
+ print(f"\nComparison Results:")
352
+ print(f"TRA - MSE: {tra_mse:.4f}, R²: {tra_r2:.4f} (Training: {training_time:.2f}s)")
353
+ print(f"Random Forest - MSE: {rf_mse:.4f}, R²: {rf_r2:.4f} (Training: {rf_training_time:.2f}s)")
354
+
355
+ print(f"\nParameter Optimization Results:")
356
+ print(f"Original threshold: {optimization_results['original_threshold']}")
357
+ print(f"Optimized threshold: {optimization_results['optimized_threshold']}")
358
+ print(f"Performance improvement: {optimization_results['improvement']:.4f}")
359
+
360
+ return tra_reg, rf_reg, X_test, y_test
361
+
362
+
363
+ def parameter_tuning_example():
364
+ """
365
+ Example demonstrating parameter tuning for TRA algorithm.
366
+
367
+ This example shows:
368
+ - Grid search for optimal parameters
369
+ - Cross-validation
370
+ - Performance comparison across different configurations
371
+ """
372
+ print("\n" + "=" * 60)
373
+ print("PARAMETER TUNING EXAMPLE")
374
+ print("=" * 60)
375
+
376
+ # Create dataset
377
+ X, y = make_classification(
378
+ n_samples=800,
379
+ n_features=12,
380
+ n_informative=10,
381
+ n_classes=2,
382
+ random_state=42
383
+ )
384
+
385
+ X_train, X_test, y_train, y_test = train_test_split(
386
+ X, y, test_size=0.3, random_state=42, stratify=y
387
+ )
388
+
389
+ print(f"Dataset shape: {X.shape}")
390
+ print("Testing different parameter combinations...")
391
+
392
+ # Define parameter combinations to test
393
+ param_combinations = [
394
+ {'n_tracks': 3, 'signal_threshold': 0.1, 'n_estimators': 50},
395
+ {'n_tracks': 4, 'signal_threshold': 0.1, 'n_estimators': 50},
396
+ {'n_tracks': 5, 'signal_threshold': 0.1, 'n_estimators': 50},
397
+ {'n_tracks': 4, 'signal_threshold': 0.05, 'n_estimators': 50},
398
+ {'n_tracks': 4, 'signal_threshold': 0.15, 'n_estimators': 50},
399
+ {'n_tracks': 4, 'signal_threshold': 0.1, 'n_estimators': 30},
400
+ {'n_tracks': 4, 'signal_threshold': 0.1, 'n_estimators': 70},
401
+ ]
402
+
403
+ results = []
404
+
405
+ for i, params in enumerate(param_combinations):
406
+ print(f"\nTesting configuration {i+1}: {params}")
407
+
408
+ # Create TRA with current parameters
409
+ tra_clf = OptimizedTRA(
410
+ task_type="classification",
411
+ random_state=42,
412
+ **params
413
+ )
414
+
415
+ # Train and evaluate
416
+ start_time = time.time()
417
+ tra_clf.fit(X_train, y_train)
418
+ training_time = time.time() - start_time
419
+
420
+ y_pred = tra_clf.predict(X_test)
421
+ accuracy = accuracy_score(y_test, y_pred)
422
+
423
+ results.append({
424
+ 'params': params,
425
+ 'accuracy': accuracy,
426
+ 'training_time': training_time
427
+ })
428
+
429
+ print(f"Accuracy: {accuracy:.4f}, Training time: {training_time:.2f}s")
430
+
431
+ # Find best configuration
432
+ best_result = max(results, key=lambda x: x['accuracy'])
433
+
434
+ print(f"\nBest Configuration:")
435
+ print(f"Parameters: {best_result['params']}")
436
+ print(f"Accuracy: {best_result['accuracy']:.4f}")
437
+ print(f"Training time: {best_result['training_time']:.2f}s")
438
+
439
+ return results
440
+
441
+
442
+ def model_comparison_example():
443
+ """
444
+ Comprehensive model comparison example.
445
+
446
+ This example compares TRA with other popular algorithms:
447
+ - Random Forest
448
+ - Decision Tree
449
+ - Gradient Boosting (if available)
450
+ """
451
+ print("\n" + "=" * 60)
452
+ print("MODEL COMPARISON EXAMPLE")
453
+ print("=" * 60)
454
+
455
+ # Create challenging dataset
456
+ X, y = make_classification(
457
+ n_samples=1500,
458
+ n_features=15,
459
+ n_informative=12,
460
+ n_redundant=3,
461
+ n_classes=3,
462
+ n_clusters_per_class=2,
463
+ class_sep=0.8,
464
+ random_state=42
465
+ )
466
+
467
+ X_train, X_test, y_train, y_test = train_test_split(
468
+ X, y, test_size=0.3, random_state=42, stratify=y
469
+ )
470
+
471
+ print(f"Dataset shape: {X.shape}")
472
+ print(f"Classes: {len(np.unique(y))}")
473
+
474
+ # Initialize models
475
+ models = {
476
+ 'TRA': OptimizedTRA(
477
+ task_type="classification",
478
+ n_tracks=5,
479
+ random_state=42,
480
+ n_estimators=80,
481
+ feature_selection=True,
482
+ parallel_signals=True
483
+ ),
484
+ 'Random Forest': RandomForestClassifier(
485
+ n_estimators=80,
486
+ random_state=42
487
+ )
488
+ }
489
+
490
+ # Add other models if available
491
+ try:
492
+ from sklearn.ensemble import GradientBoostingClassifier
493
+ models['Gradient Boosting'] = GradientBoostingClassifier(
494
+ n_estimators=80,
495
+ random_state=42
496
+ )
497
+ except:
498
+ pass
499
+
500
+ try:
501
+ from sklearn.tree import DecisionTreeClassifier
502
+ models['Decision Tree'] = DecisionTreeClassifier(
503
+ max_depth=10,
504
+ random_state=42
505
+ )
506
+ except:
507
+ pass
508
+
509
+ # Train and evaluate all models
510
+ results = {}
511
+
512
+ for name, model in models.items():
513
+ print(f"\nTraining {name}...")
514
+
515
+ start_time = time.time()
516
+ model.fit(X_train, y_train)
517
+ training_time = time.time() - start_time
518
+
519
+ start_time = time.time()
520
+ y_pred = model.predict(X_test)
521
+ prediction_time = time.time() - start_time
522
+
523
+ accuracy = accuracy_score(y_test, y_pred)
524
+ f1 = f1_score(y_test, y_pred, average='weighted')
525
+
526
+ results[name] = {
527
+ 'accuracy': accuracy,
528
+ 'f1_score': f1,
529
+ 'training_time': training_time,
530
+ 'prediction_time': prediction_time
531
+ }
532
+
533
+ print(f"Accuracy: {accuracy:.4f}")
534
+ print(f"F1-score: {f1:.4f}")
535
+ print(f"Training time: {training_time:.2f}s")
536
+ print(f"Prediction time: {prediction_time:.4f}s")
537
+
538
+ # Summary comparison
539
+ print(f"\n{'Model':<20} {'Accuracy':<10} {'F1-Score':<10} {'Train Time':<12} {'Pred Time':<12}")
540
+ print("-" * 70)
541
+
542
+ for name, metrics in results.items():
543
+ print(f"{name:<20} {metrics['accuracy']:<10.4f} {metrics['f1_score']:<10.4f} "
544
+ f"{metrics['training_time']:<12.2f} {metrics['prediction_time']:<12.4f}")
545
+
546
+ return results
547
+
548
+
549
+ def visualization_example():
550
+ """
551
+ Example demonstrating TRA visualization capabilities.
552
+
553
+ This example shows:
554
+ - Track structure visualization
555
+ - Performance metrics plotting
556
+ - Model comparison charts
557
+ """
558
+ print("\n" + "=" * 60)
559
+ print("VISUALIZATION EXAMPLE")
560
+ print("=" * 60)
561
+
562
+ # Create dataset
563
+ X, y = make_classification(
564
+ n_samples=800,
565
+ n_features=8,
566
+ n_classes=2,
567
+ random_state=42
568
+ )
569
+
570
+ X_train, X_test, y_train, y_test = train_test_split(
571
+ X, y, test_size=0.3, random_state=42
572
+ )
573
+
574
+ # Train TRA model
575
+ print("Training TRA model for visualization...")
576
+ tra_clf = OptimizedTRA(
577
+ task_type="classification",
578
+ n_tracks=4,
579
+ random_state=42
580
+ )
581
+ tra_clf.fit(X_train, y_train)
582
+
583
+ # Make some predictions to generate activity
584
+ _ = tra_clf.predict(X_test)
585
+
586
+ try:
587
+ # Visualize TRA structure
588
+ print("Creating TRA structure visualization...")
589
+ tra_clf.visualize("tra_structure_example.png")
590
+ print("Visualization saved as 'tra_structure_example.png'")
591
+
592
+ # Get and display statistics
593
+ stats = tra_clf.get_track_statistics()
594
+
595
+ # Create performance comparison plot
596
+ plt.figure(figsize=(12, 8))
597
+
598
+ # Track usage plot
599
+ plt.subplot(2, 2, 1)
600
+ track_names = list(stats['track_details'].keys())
601
+ usage_counts = [stats['track_details'][name]['usage_count'] for name in track_names]
602
+
603
+ plt.bar(track_names, usage_counts)
604
+ plt.title('Track Usage Distribution')
605
+ plt.xlabel('Track')
606
+ plt.ylabel('Usage Count')
607
+ plt.xticks(rotation=45)
608
+
609
+ # Performance scores plot
610
+ plt.subplot(2, 2, 2)
611
+ perf_scores = [stats['track_details'][name]['performance_score'] for name in track_names]
612
+
613
+ plt.bar(track_names, perf_scores)
614
+ plt.title('Track Performance Scores')
615
+ plt.xlabel('Track')
616
+ plt.ylabel('Performance Score')
617
+ plt.xticks(rotation=45)
618
+
619
+ # Signal confidence plot
620
+ plt.subplot(2, 2, 3)
621
+ signal_conf = [stats['track_details'][name]['avg_signal_confidence'] for name in track_names]
622
+
623
+ plt.bar(track_names, signal_conf)
624
+ plt.title('Average Signal Confidence')
625
+ plt.xlabel('Track')
626
+ plt.ylabel('Confidence')
627
+ plt.xticks(rotation=45)
628
+
629
+ # Prediction time plot
630
+ plt.subplot(2, 2, 4)
631
+ pred_times = [stats['track_details'][name]['avg_prediction_time'] * 1000 for name in track_names]
632
+
633
+ plt.bar(track_names, pred_times)
634
+ plt.title('Average Prediction Time')
635
+ plt.xlabel('Track')
636
+ plt.ylabel('Time (ms)')
637
+ plt.xticks(rotation=45)
638
+
639
+ plt.tight_layout()
640
+ plt.savefig('tra_performance_metrics.png', dpi=300, bbox_inches='tight')
641
+ plt.show()
642
+
643
+ print("Performance metrics plot saved as 'tra_performance_metrics.png'")
644
+
645
+ except Exception as e:
646
+ print(f"Visualization failed: {e}")
647
+ print("This might be due to missing dependencies (matplotlib, networkx)")
648
+
649
+ return tra_clf
650
+
651
+
652
+ def custom_dataset_example():
653
+ """
654
+ Example showing how to use TRA with custom datasets.
655
+
656
+ This example demonstrates:
657
+ - Loading custom data from CSV
658
+ - Data preprocessing
659
+ - Handling categorical variables
660
+ - Model training and evaluation
661
+ """
662
+ print("\n" + "=" * 60)
663
+ print("CUSTOM DATASET EXAMPLE")
664
+ print("=" * 60)
665
+
666
+ # Create a sample custom dataset (simulating loaded CSV data)
667
+ print("Creating sample custom dataset...")
668
+
669
+ # Simulate a customer churn dataset
670
+ np.random.seed(42)
671
+ n_samples = 1000
672
+
673
+ # Numerical features
674
+ age = np.random.normal(35, 12, n_samples)
675
+ income = np.random.lognormal(10, 0.5, n_samples)
676
+ tenure = np.random.exponential(2, n_samples)
677
+
678
+ # Categorical features (encoded as numbers for simplicity)
679
+ region = np.random.choice([0, 1, 2, 3], n_samples) # 4 regions
680
+ plan_type = np.random.choice([0, 1, 2], n_samples) # 3 plan types
681
+
682
+ # Create target variable (churn) based on features
683
+ churn_prob = (
684
+ -0.02 * age +
685
+ -0.00001 * income +
686
+ -0.1 * tenure +
687
+ 0.1 * region +
688
+ 0.05 * plan_type +
689
+ np.random.normal(0, 0.1, n_samples)
690
+ )
691
+ churn = (churn_prob > np.median(churn_prob)).astype(int)
692
+
693
+ # Combine features
694
+ X = np.column_stack([age, income, tenure, region, plan_type])
695
+ y = churn
696
+
697
+ # Create feature names
698
+ feature_names = ['age', 'income', 'tenure', 'region', 'plan_type']
699
+
700
+ print(f"Custom dataset created:")
701
+ print(f"Shape: {X.shape}")
702
+ print(f"Features: {feature_names}")
703
+ print(f"Churn rate: {y.mean():.3f}")
704
+
705
+ # Data preprocessing example
706
+ print("\nPreprocessing data...")
707
+
708
+ # Split data
709
+ X_train, X_test, y_train, y_test = train_test_split(
710
+ X, y, test_size=0.3, random_state=42, stratify=y
711
+ )
712
+
713
+ # Scale numerical features (first 3 columns)
714
+ scaler = StandardScaler()
715
+ X_train_scaled = X_train.copy()
716
+ X_test_scaled = X_test.copy()
717
+
718
+ X_train_scaled[:, :3] = scaler.fit_transform(X_train[:, :3])
719
+ X_test_scaled[:, :3] = scaler.transform(X_test[:, :3])
720
+
721
+ print("Numerical features scaled")
722
+
723
+ # Train TRA model
724
+ print("\nTraining TRA on custom dataset...")
725
+ tra_clf = OptimizedTRA(
726
+ task_type="classification",
727
+ n_tracks=4,
728
+ signal_threshold=0.12,
729
+ random_state=42,
730
+ handle_imbalanced=True, # Important for imbalanced datasets
731
+ feature_selection=True
732
+ )
733
+
734
+ tra_clf.fit(X_train_scaled, y_train)
735
+
736
+ # Make predictions
737
+ y_pred = tra_clf.predict(X_test_scaled)
738
+ y_proba = tra_clf.predict_proba(X_test_scaled)
739
+
740
+ # Evaluate performance
741
+ accuracy = accuracy_score(y_test, y_pred)
742
+ f1 = f1_score(y_test, y_pred)
743
+
744
+ print(f"\nResults on custom dataset:")
745
+ print(f"Accuracy: {accuracy:.4f}")
746
+ print(f"F1-score: {f1:.4f}")
747
+
748
+ print(f"\nClassification Report:")
749
+ print(classification_report(y_test, y_pred, target_names=['No Churn', 'Churn']))
750
+
751
+ # Show TRA-specific insights
752
+ print(f"\nTRA Model Insights:")
753
+ stats = tra_clf.get_track_statistics()
754
+ print(f"Active tracks: {stats['n_tracks']}")
755
+ print(f"Total signals: {stats['n_signals']}")
756
+
757
+ for track_name, details in stats['track_details'].items():
758
+ if details['usage_count'] > 0:
759
+ print(f"{track_name}: {details['usage_percentage']:.1f}% usage, "
760
+ f"performance: {details['performance_score']:.3f}")
761
+
762
+ return tra_clf, X_test_scaled, y_test
763
+
764
+
765
+ def save_load_example():
766
+ """
767
+ Example demonstrating model saving and loading.
768
+
769
+ This example shows:
770
+ - Training a TRA model
771
+ - Saving the model to disk
772
+ - Loading the model back
773
+ - Verifying consistency
774
+ """
775
+ print("\n" + "=" * 60)
776
+ print("MODEL SAVE/LOAD EXAMPLE")
777
+ print("=" * 60)
778
+
779
+ # Create and train model
780
+ X, y = make_classification(n_samples=500, n_features=8, random_state=42)
781
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
782
+
783
+ print("Training original TRA model...")
784
+ original_tra = OptimizedTRA(
785
+ task_type="classification",
786
+ n_tracks=3,
787
+ random_state=42
788
+ )
789
+ original_tra.fit(X_train, y_train)
790
+
791
+ # Make predictions with original model
792
+ original_pred = original_tra.predict(X_test)
793
+ original_accuracy = accuracy_score(y_test, original_pred)
794
+
795
+ print(f"Original model accuracy: {original_accuracy:.4f}")
796
+
797
+ # Save model
798
+ model_filename = "tra_model_example.joblib"
799
+ print(f"\nSaving model to {model_filename}...")
800
+ original_tra.save_model(model_filename)
801
+
802
+ # Load model
803
+ print(f"Loading model from {model_filename}...")
804
+ loaded_tra = OptimizedTRA.load_model(model_filename)
805
+
806
+ # Make predictions with loaded model
807
+ loaded_pred = loaded_tra.predict(X_test)
808
+ loaded_accuracy = accuracy_score(y_test, loaded_pred)
809
+
810
+ print(f"Loaded model accuracy: {loaded_accuracy:.4f}")
811
+
812
+ # Verify consistency
813
+ predictions_match = np.array_equal(original_pred, loaded_pred)
814
+ print(f"Predictions match: {predictions_match}")
815
+
816
+ if predictions_match:
817
+ print("✓ Model save/load successful!")
818
+ else:
819
+ print("✗ Model save/load failed - predictions don't match")
820
+
821
+ # Clean up
822
+ import os
823
+ try:
824
+ os.remove(model_filename)
825
+ print(f"Cleaned up {model_filename}")
826
+ except:
827
+ pass
828
+
829
+ return original_tra, loaded_tra
830
+
831
+
832
+ def run_all_examples():
833
+ """
834
+ Run all examples in sequence.
835
+
836
+ This function executes all available examples to demonstrate
837
+ the full capabilities of the TRA algorithm.
838
+ """
839
+ print("*" * 80)
840
+ print("RUNNING ALL TRA ALGORITHM EXAMPLES")
841
+ print("*" * 80)
842
+
843
+ examples = [
844
+ ("Basic Classification Example", basic_classification_example),
845
+ ("Basic Regression Example", basic_regression_example),
846
+ ("Real-World Classification Example", real_world_classification_example),
847
+ ("Real-World Regression Example", real_world_regression_example),
848
+ ("Parameter Tuning Example", parameter_tuning_example),
849
+ ("Model Comparison Example", model_comparison_example),
850
+ ("Visualization Example", visualization_example),
851
+ ("Custom Dataset Example", custom_dataset_example),
852
+ ("Model Save/Load Example", save_load_example)
853
+ ]
854
+ for name, example_func in examples:
855
+ print(f"\nRunning: {name}")
856
+ try:
857
+ example_func()
858
+ print(f"{name} completed successfully!")
859
+ except Exception as e:
860
+ print(f"Error in {name}: {e}")
861
+ print("\n" + "*" * 80)
862
+ print("ALL EXAMPLES COMPLETED!")
863
+ print("*" * 80)
864
+ return examples
865
+
866
+ def main():
867
+ """
868
+ Main function to run all examples.
869
+
870
+ This serves as the entry point for executing the TRA algorithm examples.
871
+ """
872
+ print("TRA Algorithm Examples")
873
+ print("======================")
874
+ run_all_examples()
875
+ print("\nQuick Start Guide:")
876
+ print("1. Import: from tra_algorithm import OptimizedTRA")
877
+ print("2. Create: tra = OptimizedTRA(task_type='classification')")
878
+ print("3. Train: tra.fit(X_train, y_train)")
879
+ print("4. Predict: y_pred = tra.predict(X_test)")
880
+ print("5. Evaluate: tra.get_performance_report()")
881
+ if __name__ == "__main__":
882
+ main()