validmind 1.7.0__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -1
- validmind/{client.pyx → client.py} +48 -41
- validmind/data_validation/{threshold_tests.pyx → threshold_tests.py} +1 -2
- validmind/datasets/__init__.py +0 -0
- validmind/datasets/classification/{customer_churn.pyx → customer_churn.py} +1 -1
- validmind/datasets/classification/datasets/bank_customer_churn.csv +8001 -0
- validmind/datasets/classification/datasets/taiwan_credit.csv +30001 -0
- validmind/datasets/classification/{taiwan_credit.pyx → taiwan_credit.py} +1 -1
- validmind/datasets/regression/__init__.py +55 -1
- validmind/datasets/regression/datasets/fred_loan_rates.csv +3552 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +126 -0
- validmind/datasets/regression/datasets/lending_club_loan_rates.csv +138 -0
- validmind/datasets/regression/fred.py +132 -0
- validmind/datasets/regression/lending_club.py +70 -0
- validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- validmind/model_validation/sklearn/{threshold_tests.pyx → threshold_tests.py} +9 -9
- validmind/model_validation/statsmodels/{metrics.pyx → metrics.py} +123 -138
- validmind/test_plans/__init__.py +0 -4
- validmind/test_plans/{binary_classifier.pyx → binary_classifier.py} +0 -15
- validmind/test_plans/{statsmodels_timeseries.pyx → statsmodels_timeseries.py} +2 -2
- validmind/test_plans/{tabular_datasets.pyx → tabular_datasets.py} +0 -13
- validmind/test_plans/{time_series.pyx → time_series.py} +3 -3
- validmind/test_suites/__init__.py +73 -0
- validmind/test_suites/test_suites.py +48 -0
- validmind/vm_models/__init__.py +2 -0
- validmind/vm_models/{dataset.pyx → dataset.py} +17 -8
- validmind/vm_models/test_suite.py +57 -0
- {validmind-1.7.0.dist-info → validmind-1.8.1.dist-info}/METADATA +1 -3
- validmind-1.8.1.dist-info/RECORD +63 -0
- validmind/api_client.c +0 -9481
- validmind/api_client.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/client.c +0 -7198
- validmind/client.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/datasets/regression/fred.pyx +0 -7
- validmind/datasets/regression/lending_club.pyx +0 -7
- validmind/model_utils.c +0 -9281
- validmind/model_utils.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/utils.c +0 -10284
- validmind/utils.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind-1.7.0.dist-info/RECORD +0 -53
- /validmind/{api_client.pyx → api_client.py} +0 -0
- /validmind/data_validation/{metrics.pyx → metrics.py} +0 -0
- /validmind/{model_utils.pyx → model_utils.py} +0 -0
- /validmind/model_validation/{model_metadata.pyx → model_metadata.py} +0 -0
- /validmind/model_validation/sklearn/{metrics.pyx → metrics.py} +0 -0
- /validmind/model_validation/statsmodels/{threshold_tests.pyx → threshold_tests.py} +0 -0
- /validmind/model_validation/{utils.pyx → utils.py} +0 -0
- /validmind/{utils.pyx → utils.py} +0 -0
- /validmind/vm_models/{dataset_utils.pyx → dataset_utils.py} +0 -0
- /validmind/vm_models/{figure.pyx → figure.py} +0 -0
- /validmind/vm_models/{metric.pyx → metric.py} +0 -0
- /validmind/vm_models/{metric_result.pyx → metric_result.py} +0 -0
- /validmind/vm_models/{model.pyx → model.py} +0 -0
- /validmind/vm_models/{plot_utils.pyx → plot_utils.py} +0 -0
- /validmind/vm_models/{result_summary.pyx → result_summary.py} +0 -0
- /validmind/vm_models/{test_context.pyx → test_context.py} +0 -0
- /validmind/vm_models/{test_plan.pyx → test_plan.py} +0 -0
- /validmind/vm_models/{test_plan_result.pyx → test_plan_result.py} +0 -0
- /validmind/vm_models/{test_result.pyx → test_result.py} +0 -0
- /validmind/vm_models/{threshold_test.pyx → threshold_test.py} +0 -0
- {validmind-1.7.0.dist-info → validmind-1.8.1.dist-info}/WHEEL +0 -0
@@ -575,133 +575,6 @@ class AutoARIMA(Metric):
|
|
575
575
|
return self.cache_results(results)
|
576
576
|
|
577
577
|
|
578
|
-
class ModelPredictionOLS(Metric):
|
579
|
-
"""
|
580
|
-
Calculates and plots the model predictions for each of the models
|
581
|
-
"""
|
582
|
-
|
583
|
-
type = "dataset"
|
584
|
-
key = "model_prediction_ols"
|
585
|
-
default_params = {"plot_start_date": None, "plot_end_date": None}
|
586
|
-
|
587
|
-
def serialize_time_series_df(self, df):
|
588
|
-
# Convert the DateTimeIndex to strings without specifying a date format
|
589
|
-
df.index = df.index.astype(str)
|
590
|
-
|
591
|
-
# Reset the index and rename the index column to 'Date'
|
592
|
-
df = df.reset_index().rename(columns={"index": "Date"})
|
593
|
-
|
594
|
-
# Convert the DataFrame into a list of dictionaries
|
595
|
-
return df.to_dict("records")
|
596
|
-
|
597
|
-
def get_model_prediction(self, model_list, df_test):
|
598
|
-
# Extract the training target variable from the first model fit
|
599
|
-
|
600
|
-
first_model_fit = model_list[0].model
|
601
|
-
|
602
|
-
train_data = pd.Series(
|
603
|
-
first_model_fit.model.endog, index=first_model_fit.model.data.row_labels
|
604
|
-
)
|
605
|
-
train_data = train_data.to_frame()
|
606
|
-
target_var_name = first_model_fit.model.endog_names
|
607
|
-
train_data.columns = [f"{target_var_name}_train"]
|
608
|
-
|
609
|
-
# Initialize an empty DataFrame to store the predictions
|
610
|
-
prediction_df = pd.DataFrame(index=df_test.index)
|
611
|
-
prediction_df[f"{target_var_name}_test"] = np.nan
|
612
|
-
|
613
|
-
# Concatenate the train_data and prediction_df
|
614
|
-
combined_df = pd.concat([train_data, prediction_df], axis=0)
|
615
|
-
|
616
|
-
# Loop through each model fit
|
617
|
-
for i, model_fit in enumerate(model_list):
|
618
|
-
model_name = f"model_{i+1}"
|
619
|
-
|
620
|
-
# Prepare the test dataset
|
621
|
-
exog_names = model_fit.model.model.exog_names
|
622
|
-
X_test = df_test.copy()
|
623
|
-
|
624
|
-
# Add the constant if it's missing
|
625
|
-
if "const" in exog_names and "const" not in X_test.columns:
|
626
|
-
X_test["const"] = 1.0
|
627
|
-
|
628
|
-
# Select the necessary columns
|
629
|
-
X_test = X_test[exog_names]
|
630
|
-
|
631
|
-
# Generate the predictions
|
632
|
-
predictions = model_fit.model.predict(X_test)
|
633
|
-
|
634
|
-
# Add the predictions to the DataFrame
|
635
|
-
combined_df[model_name] = np.nan
|
636
|
-
combined_df[model_name].iloc[len(train_data) :] = predictions
|
637
|
-
|
638
|
-
# Add the test data to the '<target_variable>_test' column
|
639
|
-
combined_df[f"{target_var_name}_test"].iloc[len(train_data) :] = df_test[
|
640
|
-
target_var_name
|
641
|
-
]
|
642
|
-
|
643
|
-
return combined_df
|
644
|
-
|
645
|
-
def plot_predictions(self, prediction_df, start_date=None, end_date=None):
|
646
|
-
if start_date and end_date:
|
647
|
-
prediction_df = prediction_df.loc[start_date:end_date]
|
648
|
-
|
649
|
-
n_models = prediction_df.shape[1] - 2
|
650
|
-
fig, axes = plt.subplots(n_models, 1, sharex=True)
|
651
|
-
|
652
|
-
for i in range(n_models):
|
653
|
-
axes[i].plot(
|
654
|
-
prediction_df.index,
|
655
|
-
prediction_df.iloc[:, 0],
|
656
|
-
label=prediction_df.columns[0],
|
657
|
-
color="grey",
|
658
|
-
)
|
659
|
-
axes[i].plot(
|
660
|
-
prediction_df.index,
|
661
|
-
prediction_df.iloc[:, 1],
|
662
|
-
label=prediction_df.columns[1],
|
663
|
-
color="lightgrey",
|
664
|
-
)
|
665
|
-
axes[i].plot(
|
666
|
-
prediction_df.index,
|
667
|
-
prediction_df.iloc[:, i + 2],
|
668
|
-
label=prediction_df.columns[i + 2],
|
669
|
-
linestyle="-",
|
670
|
-
)
|
671
|
-
axes[i].set_ylabel("Target Variable")
|
672
|
-
axes[i].set_title(f"Test Data vs. {prediction_df.columns[i + 2]}")
|
673
|
-
axes[i].legend()
|
674
|
-
axes[i].grid(True)
|
675
|
-
plt.xlabel("Date")
|
676
|
-
plt.tight_layout()
|
677
|
-
|
678
|
-
def run(self):
|
679
|
-
model_list = self.models
|
680
|
-
|
681
|
-
df_test = self.test_ds.df
|
682
|
-
|
683
|
-
plot_start_date = self.params["plot_start_date"]
|
684
|
-
plot_end_date = self.params["plot_end_date"]
|
685
|
-
|
686
|
-
print(plot_start_date)
|
687
|
-
|
688
|
-
prediction_df = self.get_model_prediction(model_list, df_test)
|
689
|
-
results = self.serialize_time_series_df(prediction_df)
|
690
|
-
|
691
|
-
figures = []
|
692
|
-
self.plot_predictions(
|
693
|
-
prediction_df, start_date=plot_start_date, end_date=plot_end_date
|
694
|
-
)
|
695
|
-
|
696
|
-
# Assuming the plot is the only figure we want to store
|
697
|
-
fig = plt.gcf()
|
698
|
-
figures.append(Figure(key=self.key, figure=fig, metadata={}))
|
699
|
-
plt.close("all")
|
700
|
-
|
701
|
-
# Assuming we do not need to cache any results, just the figure
|
702
|
-
return self.cache_results(results, figures=figures)
|
703
|
-
|
704
|
-
|
705
578
|
@dataclass
|
706
579
|
class RegressionModelSummary(Metric):
|
707
580
|
"""
|
@@ -769,12 +642,17 @@ class RegressionModelInsampleComparison(Metric):
|
|
769
642
|
if not self.models:
|
770
643
|
raise ValueError("List of models must be provided in the models parameter")
|
771
644
|
all_models = []
|
772
|
-
|
645
|
+
if self.model is not None:
|
646
|
+
all_models.append(self.model)
|
647
|
+
|
648
|
+
if self.models is not None:
|
649
|
+
all_models.extend(self.models)
|
650
|
+
|
651
|
+
for model in all_models:
|
773
652
|
if model.model.__class__.__name__ != "RegressionResultsWrapper":
|
774
653
|
raise ValueError(
|
775
654
|
"Only RegressionResultsWrapper models of statsmodels library supported"
|
776
655
|
)
|
777
|
-
all_models.append(model.model)
|
778
656
|
|
779
657
|
results = self._in_sample_performance_ols(all_models)
|
780
658
|
return self.cache_results(results)
|
@@ -799,15 +677,13 @@ class RegressionModelInsampleComparison(Metric):
|
|
799
677
|
evaluation_results = []
|
800
678
|
|
801
679
|
for i, model in enumerate(models):
|
802
|
-
|
803
|
-
X_columns = model.model.exog_names
|
804
|
-
|
680
|
+
X_columns = model.model.model.exog_names
|
805
681
|
# Extract R-squared and Adjusted R-squared
|
806
|
-
r2 = model.rsquared
|
807
|
-
adj_r2 = model.rsquared_adj
|
682
|
+
r2 = model.model.rsquared
|
683
|
+
adj_r2 = model.model.rsquared_adj
|
808
684
|
|
809
685
|
# Calculate the Mean Squared Error (MSE) and Root Mean Squared Error (RMSE)
|
810
|
-
mse = model.mse_resid
|
686
|
+
mse = model.model.mse_resid
|
811
687
|
rmse = mse**0.5
|
812
688
|
|
813
689
|
# Append the results to the evaluation_results list
|
@@ -850,12 +726,21 @@ class RegressionModelOutsampleComparison(Metric):
|
|
850
726
|
if not self.models:
|
851
727
|
raise ValueError("List of models must be provided in the models parameter")
|
852
728
|
all_models = []
|
853
|
-
|
729
|
+
if self.model is not None:
|
730
|
+
all_models.append(self.model)
|
731
|
+
|
732
|
+
if self.models is not None:
|
733
|
+
all_models.extend(self.models)
|
734
|
+
|
735
|
+
for model in all_models:
|
854
736
|
if model.model.__class__.__name__ != "RegressionResultsWrapper":
|
855
737
|
raise ValueError(
|
856
738
|
"Only RegressionResultsWrapper models of statsmodels library supported"
|
857
739
|
)
|
858
|
-
|
740
|
+
if model.test_ds is None:
|
741
|
+
raise ValueError(
|
742
|
+
"Test dataset is missing in the ValidMind Model object"
|
743
|
+
)
|
859
744
|
|
860
745
|
results = self._out_sample_performance_ols(
|
861
746
|
all_models,
|
@@ -888,7 +773,7 @@ class RegressionModelOutsampleComparison(Metric):
|
|
888
773
|
y_test = fitted_model.test_ds.y
|
889
774
|
|
890
775
|
# Predict the test data
|
891
|
-
y_pred = fitted_model.predict(X_test)
|
776
|
+
y_pred = fitted_model.model.predict(X_test)
|
892
777
|
|
893
778
|
# Calculate the residuals
|
894
779
|
residuals = y_test - y_pred
|
@@ -905,3 +790,103 @@ class RegressionModelOutsampleComparison(Metric):
|
|
905
790
|
results_df = pd.DataFrame(results, columns=["Model", "MSE", "RMSE"])
|
906
791
|
|
907
792
|
return results_df
|
793
|
+
|
794
|
+
|
795
|
+
@dataclass
|
796
|
+
class RegressionModelForecastPlot(Metric):
|
797
|
+
"""
|
798
|
+
This metric creates a plot of forecast vs observed for each model in the list.
|
799
|
+
"""
|
800
|
+
|
801
|
+
category = "model_forecast"
|
802
|
+
scope = "test"
|
803
|
+
key = "regression_forecast_plot"
|
804
|
+
default_params = {"start_date": None, "end_date": None}
|
805
|
+
|
806
|
+
def description(self):
|
807
|
+
return """
|
808
|
+
This section shows plots of training and test datasets vs forecast trainining and forecast test.
|
809
|
+
"""
|
810
|
+
|
811
|
+
def run(self):
|
812
|
+
print(self.params)
|
813
|
+
|
814
|
+
start_date = self.params["start_date"]
|
815
|
+
end_date = self.params["end_date"]
|
816
|
+
|
817
|
+
print(self.params)
|
818
|
+
|
819
|
+
# Check models list is not empty
|
820
|
+
if not self.models:
|
821
|
+
raise ValueError("List of models must be provided in the models parameter")
|
822
|
+
all_models = []
|
823
|
+
for model in self.models:
|
824
|
+
if model.model.__class__.__name__ != "RegressionResultsWrapper":
|
825
|
+
raise ValueError(
|
826
|
+
"Only RegressionResultsWrapper models of statsmodels library supported"
|
827
|
+
)
|
828
|
+
all_models.append(model)
|
829
|
+
|
830
|
+
figures = self._plot_forecast(all_models, start_date, end_date)
|
831
|
+
|
832
|
+
return self.cache_results(figures=figures)
|
833
|
+
|
834
|
+
def _plot_forecast(self, model_list, start_date=None, end_date=None):
|
835
|
+
# Convert start_date and end_date to pandas Timestamp for comparison
|
836
|
+
start_date = pd.Timestamp(start_date)
|
837
|
+
end_date = pd.Timestamp(end_date)
|
838
|
+
|
839
|
+
# Initialize a list to store figures
|
840
|
+
figures = []
|
841
|
+
|
842
|
+
for fitted_model in model_list:
|
843
|
+
train_ds = fitted_model.train_ds
|
844
|
+
test_ds = fitted_model.test_ds
|
845
|
+
|
846
|
+
# Check that start_date and end_date are within the data range
|
847
|
+
all_dates = pd.concat([pd.Series(train_ds.index), pd.Series(test_ds.index)])
|
848
|
+
print(all_dates)
|
849
|
+
if start_date < all_dates.min() or end_date > all_dates.max():
|
850
|
+
raise ValueError(
|
851
|
+
"start_date and end_date must be within the range of dates in the data"
|
852
|
+
)
|
853
|
+
|
854
|
+
fig, ax = plt.subplots()
|
855
|
+
sns.lineplot(
|
856
|
+
x=train_ds.index,
|
857
|
+
y=train_ds.y,
|
858
|
+
ax=ax,
|
859
|
+
label="Train Forecast",
|
860
|
+
)
|
861
|
+
sns.lineplot(
|
862
|
+
x=test_ds.index,
|
863
|
+
y=test_ds.y,
|
864
|
+
ax=ax,
|
865
|
+
label="Test Forecast",
|
866
|
+
)
|
867
|
+
sns.lineplot(
|
868
|
+
x=train_ds.index,
|
869
|
+
y=fitted_model.y_train_predict.loc[train_ds.index],
|
870
|
+
ax=ax,
|
871
|
+
label="Train Dataset",
|
872
|
+
color="grey",
|
873
|
+
)
|
874
|
+
sns.lineplot(
|
875
|
+
x=test_ds.index,
|
876
|
+
y=fitted_model.y_test_predict.loc[test_ds.index],
|
877
|
+
ax=ax,
|
878
|
+
label="Test Dataset",
|
879
|
+
color="black",
|
880
|
+
)
|
881
|
+
plt.title(
|
882
|
+
f"Forecast vs Observed for {fitted_model.model.__class__.__name__}"
|
883
|
+
)
|
884
|
+
|
885
|
+
# Set the x-axis limits to zoom in/out
|
886
|
+
plt.xlim(start_date, end_date)
|
887
|
+
|
888
|
+
plt.legend()
|
889
|
+
figures.append(Figure(key=self.key, figure=fig, metadata={}))
|
890
|
+
plt.close("all")
|
891
|
+
|
892
|
+
return figures
|
validmind/test_plans/__init__.py
CHANGED
@@ -8,12 +8,10 @@ import tabulate
|
|
8
8
|
from ..vm_models import TestPlan
|
9
9
|
from .binary_classifier import (
|
10
10
|
BinaryClassifierMetrics,
|
11
|
-
BinaryClassifier,
|
12
11
|
BinaryClassifierPerformance,
|
13
12
|
BinaryClassifierDiagnosis,
|
14
13
|
)
|
15
14
|
from .tabular_datasets import (
|
16
|
-
TabularDataset,
|
17
15
|
TabularDataQuality,
|
18
16
|
TabularDatasetDescription,
|
19
17
|
TimeSeriesDataQuality,
|
@@ -39,8 +37,6 @@ core_test_plans = {
|
|
39
37
|
"binary_classifier_metrics": BinaryClassifierMetrics,
|
40
38
|
"binary_classifier_validation": BinaryClassifierPerformance,
|
41
39
|
"binary_classifier_model_diagnosis": BinaryClassifierDiagnosis,
|
42
|
-
"binary_classifier": BinaryClassifier,
|
43
|
-
"tabular_dataset": TabularDataset,
|
44
40
|
"tabular_dataset_description": TabularDatasetDescription,
|
45
41
|
"tabular_data_quality": TabularDataQuality,
|
46
42
|
"normality_test_plan": NormalityTestPlan,
|
@@ -81,18 +81,3 @@ class BinaryClassifierDiagnosis(TestPlan):
|
|
81
81
|
name = "binary_classifier_model_diagnosis"
|
82
82
|
required_context = ["model"]
|
83
83
|
tests = [OverfitDiagnosis, WeakspotsDiagnosis, RobustnessDiagnosis]
|
84
|
-
|
85
|
-
|
86
|
-
class BinaryClassifier(TestPlan):
|
87
|
-
"""
|
88
|
-
Test plan for sklearn classifier models that includes
|
89
|
-
both metrics and validation tests
|
90
|
-
"""
|
91
|
-
|
92
|
-
name = "binary_classifier"
|
93
|
-
required_context = ["model"]
|
94
|
-
test_plans = [
|
95
|
-
BinaryClassifierMetrics,
|
96
|
-
BinaryClassifierPerformance,
|
97
|
-
BinaryClassifierDiagnosis,
|
98
|
-
]
|
@@ -116,5 +116,5 @@ class RegressionModelsComparison(TestPlan):
|
|
116
116
|
"""
|
117
117
|
|
118
118
|
name = "regression_models_comparison"
|
119
|
-
required_context = ["models"]
|
120
|
-
tests = [
|
119
|
+
required_context = ["models", "model"]
|
120
|
+
tests = [RegressionModelInsampleComparison, RegressionModelOutsampleComparison]
|
@@ -72,19 +72,6 @@ class TimeSeriesDataQuality(TestPlan):
|
|
72
72
|
tests = [TimeSeriesOutliers, TimeSeriesMissingValues, TimeSeriesFrequency]
|
73
73
|
|
74
74
|
|
75
|
-
class TabularDataset(TestPlan):
|
76
|
-
"""
|
77
|
-
Test plan for generic tabular datasets
|
78
|
-
"""
|
79
|
-
|
80
|
-
name = "tabular_dataset"
|
81
|
-
required_context = ["dataset"]
|
82
|
-
test_plans = [
|
83
|
-
TabularDatasetDescription,
|
84
|
-
TabularDataQuality,
|
85
|
-
]
|
86
|
-
|
87
|
-
|
88
75
|
class TimeSeriesDataset(TestPlan):
|
89
76
|
"""
|
90
77
|
Test plan for time series datasets
|
@@ -18,7 +18,7 @@ from ..data_validation.metrics import (
|
|
18
18
|
SpreadPlot,
|
19
19
|
)
|
20
20
|
|
21
|
-
from ..model_validation.statsmodels.metrics import
|
21
|
+
from ..model_validation.statsmodels.metrics import RegressionModelForecastPlot
|
22
22
|
|
23
23
|
|
24
24
|
class TimeSeriesUnivariate(TestPlan):
|
@@ -135,8 +135,8 @@ class TimeSeriesForecast(TestPlan):
|
|
135
135
|
"""
|
136
136
|
|
137
137
|
name = "time_series_forecast"
|
138
|
-
required_context = ["models"
|
139
|
-
tests = [
|
138
|
+
required_context = ["models"]
|
139
|
+
tests = [RegressionModelForecastPlot]
|
140
140
|
|
141
141
|
def description(self):
|
142
142
|
return """
|
@@ -0,0 +1,73 @@
|
|
1
|
+
"""
|
2
|
+
Entrypoint for test suites.
|
3
|
+
"""
|
4
|
+
import tabulate
|
5
|
+
|
6
|
+
from .test_suites import (
|
7
|
+
BinaryClassifierFullSuite,
|
8
|
+
BinaryClassifierModelValidation,
|
9
|
+
TabularDataset,
|
10
|
+
)
|
11
|
+
from ..vm_models import TestSuite
|
12
|
+
|
13
|
+
core_test_suites = {
|
14
|
+
"binary_classifier_full_suite": BinaryClassifierFullSuite,
|
15
|
+
"binary_classifier_model_validation": BinaryClassifierModelValidation,
|
16
|
+
"tabular_dataset": TabularDataset,
|
17
|
+
}
|
18
|
+
|
19
|
+
# These test suites can be added by the user
|
20
|
+
custom_test_suites = {}
|
21
|
+
|
22
|
+
|
23
|
+
def _get_all_test_suites():
|
24
|
+
"""
|
25
|
+
Returns a dictionary of all test suites.
|
26
|
+
|
27
|
+
Merge the core and custom test suites, with the custom suites
|
28
|
+
taking precedence, i.e. allowing overriding of core test suites
|
29
|
+
"""
|
30
|
+
return {**core_test_suites, **custom_test_suites}
|
31
|
+
|
32
|
+
|
33
|
+
def get_by_name(name: str):
|
34
|
+
"""
|
35
|
+
Returns the test suite by name
|
36
|
+
"""
|
37
|
+
all_test_suites = _get_all_test_suites()
|
38
|
+
if name in all_test_suites:
|
39
|
+
return all_test_suites[name]
|
40
|
+
|
41
|
+
raise ValueError(f"Test suite with name: '{name}' not found")
|
42
|
+
|
43
|
+
|
44
|
+
def list_suites(pretty: bool = True):
|
45
|
+
"""
|
46
|
+
Returns a list of all available test suites
|
47
|
+
"""
|
48
|
+
|
49
|
+
all_test_suites = _get_all_test_suites()
|
50
|
+
|
51
|
+
if not pretty:
|
52
|
+
return list(all_test_suites.keys())
|
53
|
+
|
54
|
+
table = []
|
55
|
+
for name, test_suite in all_test_suites.items():
|
56
|
+
table.append(
|
57
|
+
{
|
58
|
+
"ID": name,
|
59
|
+
"Name": test_suite.__name__,
|
60
|
+
"Description": test_suite.__doc__.strip(),
|
61
|
+
"Test Plans": ", ".join(test_suite.test_plans),
|
62
|
+
}
|
63
|
+
)
|
64
|
+
|
65
|
+
return tabulate.tabulate(table, headers="keys", tablefmt="html")
|
66
|
+
|
67
|
+
|
68
|
+
def register_test_suite(suite_id: str, suite: TestSuite):
|
69
|
+
"""
|
70
|
+
Registers a custom test suite
|
71
|
+
"""
|
72
|
+
custom_test_suites[suite_id] = suite
|
73
|
+
print(f"Registered test suite: {suite_id}")
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
Default test suites provided by the developer framework.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from ..vm_models import TestSuite
|
6
|
+
|
7
|
+
|
8
|
+
class TabularDataset(TestSuite):
|
9
|
+
"""
|
10
|
+
Test suite for tabular datasets.
|
11
|
+
"""
|
12
|
+
|
13
|
+
required_context = ["dataset"]
|
14
|
+
|
15
|
+
test_plans = [
|
16
|
+
"tabular_dataset_description",
|
17
|
+
"tabular_data_quality",
|
18
|
+
]
|
19
|
+
|
20
|
+
|
21
|
+
class BinaryClassifierModelValidation(TestSuite):
|
22
|
+
"""
|
23
|
+
Test suite for binary classification models.
|
24
|
+
"""
|
25
|
+
|
26
|
+
required_context = ["model"]
|
27
|
+
|
28
|
+
test_plans = [
|
29
|
+
"binary_classifier_metrics",
|
30
|
+
"binary_classifier_validation",
|
31
|
+
"binary_classifier_model_diagnosis",
|
32
|
+
]
|
33
|
+
|
34
|
+
|
35
|
+
class BinaryClassifierFullSuite(TestSuite):
|
36
|
+
"""
|
37
|
+
Full test suite for binary classification models.
|
38
|
+
"""
|
39
|
+
|
40
|
+
required_context = ["dataset", "model"]
|
41
|
+
|
42
|
+
test_plans = [
|
43
|
+
"tabular_dataset_description",
|
44
|
+
"tabular_data_quality",
|
45
|
+
"binary_classifier_metrics",
|
46
|
+
"binary_classifier_validation",
|
47
|
+
"binary_classifier_model_diagnosis",
|
48
|
+
]
|
validmind/vm_models/__init__.py
CHANGED
@@ -17,6 +17,7 @@ from .test_plan_result import (
|
|
17
17
|
TestPlanTestResult,
|
18
18
|
)
|
19
19
|
from .test_result import TestResult, TestResults
|
20
|
+
from .test_suite import TestSuite
|
20
21
|
from .threshold_test import ThresholdTest
|
21
22
|
|
22
23
|
__all__ = [
|
@@ -40,5 +41,6 @@ __all__ = [
|
|
40
41
|
"TestPlanTestResult",
|
41
42
|
"TestResult",
|
42
43
|
"TestResults",
|
44
|
+
"TestSuite",
|
43
45
|
"ThresholdTest",
|
44
46
|
]
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
Dataset class wrapper
|
3
3
|
"""
|
4
|
-
from dataclasses import dataclass,
|
4
|
+
from dataclasses import dataclass, fields
|
5
5
|
|
6
6
|
from dython.nominal import associations
|
7
7
|
|
@@ -47,13 +47,15 @@ class Dataset:
|
|
47
47
|
target_column: str = ""
|
48
48
|
class_labels: dict = None
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
_feature_lookup: dict = None
|
51
|
+
_transformed_df: object = None
|
52
52
|
|
53
53
|
def __post_init__(self):
|
54
54
|
"""
|
55
55
|
Set target_column and class_labels from DatasetTargets
|
56
56
|
"""
|
57
|
+
self._feature_lookup = {}
|
58
|
+
|
57
59
|
if self.targets:
|
58
60
|
self.target_column = self.targets.target_column
|
59
61
|
self.class_labels = self.targets.class_labels
|
@@ -79,6 +81,13 @@ class Dataset:
|
|
79
81
|
"""
|
80
82
|
return self.raw_dataset[self.target_column]
|
81
83
|
|
84
|
+
@property
|
85
|
+
def index(self):
|
86
|
+
"""
|
87
|
+
Returns the dataset's index.
|
88
|
+
"""
|
89
|
+
return self.raw_dataset.index
|
90
|
+
|
82
91
|
def get_feature_by_id(self, feature_id):
|
83
92
|
"""
|
84
93
|
Returns the feature with the given id. We also build a lazy
|
@@ -93,14 +102,14 @@ class Dataset:
|
|
93
102
|
Returns:
|
94
103
|
dict: The feature with the given id
|
95
104
|
"""
|
96
|
-
if feature_id not in self.
|
105
|
+
if feature_id not in self._feature_lookup:
|
97
106
|
for feature in self.fields:
|
98
107
|
if feature["id"] == feature_id:
|
99
|
-
self.
|
108
|
+
self._feature_lookup[feature_id] = feature
|
100
109
|
return feature
|
101
110
|
raise ValueError(f"Feature with id {feature_id} does not exist")
|
102
111
|
|
103
|
-
return self.
|
112
|
+
return self._feature_lookup[feature_id]
|
104
113
|
|
105
114
|
def get_feature_type(self, feature_id):
|
106
115
|
"""
|
@@ -230,8 +239,8 @@ class Dataset:
|
|
230
239
|
Returns:
|
231
240
|
pd.DataFrame: The transformed dataset
|
232
241
|
"""
|
233
|
-
if self.
|
234
|
-
return self.
|
242
|
+
if self._transformed_df is not None and force_refresh is False:
|
243
|
+
return self._transformed_df
|
235
244
|
|
236
245
|
# Get the list of features that are of type Dummy
|
237
246
|
dataset_options = self.options
|
@@ -0,0 +1,57 @@
|
|
1
|
+
"""
|
2
|
+
A TestSuite is a collection of TestPlans. It is a helpful way to organize
|
3
|
+
TestPlans that are related to each other. For example, a TestSuite could be
|
4
|
+
created for a specific use case or model methodology, to run a colllection
|
5
|
+
of plans for data validation and model validation with a single function call.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from typing import ClassVar, List
|
10
|
+
|
11
|
+
from .test_context import TestContext
|
12
|
+
from .test_plan import TestPlan
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class TestSuite(TestPlan):
|
17
|
+
"""
|
18
|
+
Base class for test suites. Test suites are used to define any
|
19
|
+
arbitrary grouping of test plans that will be run on a dataset and/or model.
|
20
|
+
"""
|
21
|
+
|
22
|
+
test_plans: ClassVar[List[str]] = []
|
23
|
+
# Stores a reference to the child test plan instances
|
24
|
+
# so we can access their results after running the test suite
|
25
|
+
_test_plan_instances: List[object] = None
|
26
|
+
|
27
|
+
def run(self, send=True):
|
28
|
+
"""
|
29
|
+
Runs the test suite.
|
30
|
+
"""
|
31
|
+
# Avoid circular import
|
32
|
+
from ..test_plans import get_by_name
|
33
|
+
|
34
|
+
self._test_plan_instances = []
|
35
|
+
|
36
|
+
if self.test_context is None:
|
37
|
+
self.test_context = TestContext(
|
38
|
+
dataset=self.dataset,
|
39
|
+
model=self.model,
|
40
|
+
models=self.models,
|
41
|
+
)
|
42
|
+
|
43
|
+
for test_plan_id in self.test_plans:
|
44
|
+
test_plan = get_by_name(test_plan_id)
|
45
|
+
test_plan_instance = test_plan(
|
46
|
+
config=self.config,
|
47
|
+
test_context=self.test_context,
|
48
|
+
)
|
49
|
+
test_plan_instance.run(send=send)
|
50
|
+
self._test_plan_instances.append(test_plan_instance)
|
51
|
+
|
52
|
+
@property
|
53
|
+
def results(self):
|
54
|
+
"""
|
55
|
+
Returns the results of the test suite.
|
56
|
+
"""
|
57
|
+
return [test_plan.results for test_plan in self._test_plan_instances]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.8.1
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
Author: Andres Rodriguez
|
6
6
|
Author-email: andres@validmind.ai
|
@@ -12,7 +12,6 @@ Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Provides-Extra: r-support
|
13
13
|
Requires-Dist: arch (>=5.4.0,<6.0.0)
|
14
14
|
Requires-Dist: click (>=8.0.4,<9.0.0)
|
15
|
-
Requires-Dist: cython (>=0.29.34,<0.30.0)
|
16
15
|
Requires-Dist: dython (>=0.7.1,<0.8.0)
|
17
16
|
Requires-Dist: ipython (>=8.11.0,<9.0.0)
|
18
17
|
Requires-Dist: myst-parser (>=1.0.0,<2.0.0)
|
@@ -33,5 +32,4 @@ Requires-Dist: sphinx-rtd-theme (>=1.2.0,<2.0.0)
|
|
33
32
|
Requires-Dist: statsmodels (>=0.13.5,<0.14.0)
|
34
33
|
Requires-Dist: tabulate (>=0.8.9,<0.9.0)
|
35
34
|
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
36
|
-
Requires-Dist: twine (>=4.0.2,<5.0.0)
|
37
35
|
Requires-Dist: xgboost (>=1.5.2,<2.0.0)
|