validmind 2.3.3__py3-none-any.whl → 2.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/datasets/regression/fred_timeseries.py +272 -0
- validmind/tests/__types__.py +10 -0
- validmind/tests/data_validation/SeasonalDecompose.py +68 -40
- validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
- validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
- validmind/tests/data_validation/TimeSeriesOutliers.py +30 -41
- validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
- validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +1 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
- {validmind-2.3.3.dist-info → validmind-2.3.5.dist-info}/METADATA +70 -36
- {validmind-2.3.3.dist-info → validmind-2.3.5.dist-info}/RECORD +23 -12
- /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
- {validmind-2.3.3.dist-info → validmind-2.3.5.dist-info}/LICENSE +0 -0
- {validmind-2.3.3.dist-info → validmind-2.3.5.dist-info}/WHEEL +0 -0
- {validmind-2.3.3.dist-info → validmind-2.3.5.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
import plotly.express as px
|
8
|
+
from sklearn import metrics
|
9
|
+
|
10
|
+
from validmind import tags, tasks
|
11
|
+
|
12
|
+
|
13
|
+
@tags("model_performance", "sklearn")
|
14
|
+
@tasks("regression", "time_series_forecasting")
|
15
|
+
def TimeSeriesR2SquareBySegments(datasets, models, segments=None):
|
16
|
+
"""
|
17
|
+
Plot R-Squared values for each model over specified time segments and generate a bar chart
|
18
|
+
with the results.
|
19
|
+
|
20
|
+
**Purpose**: The purpose of this function is to plot the R-Squared values for different models applied to various segments of the time series data.
|
21
|
+
|
22
|
+
**Parameters**:
|
23
|
+
- datasets: List of datasets to evaluate.
|
24
|
+
- models: List of models to evaluate.
|
25
|
+
- segments: Dictionary with 'start_date' and 'end_date' keys containing lists of start and end dates for each segments. If None, the time series will be segmented into two halves.
|
26
|
+
|
27
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates the R-Squared values for specified time segments, and generates a bar chart with these results.
|
28
|
+
|
29
|
+
**Signs of High Risk**:
|
30
|
+
- If the R-Squared values are significantly low for certain segments, it could indicate that the model is not explaining much of the variability in the dataset for those segments.
|
31
|
+
|
32
|
+
**Strengths**:
|
33
|
+
- Provides a visual representation of model performance across different time segments.
|
34
|
+
- Allows for identification of segments where models perform poorly.
|
35
|
+
|
36
|
+
**Limitations**:
|
37
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
|
38
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
39
|
+
- Assumes that `y_true` and `y_pred` are pandas Series with datetime indices.
|
40
|
+
"""
|
41
|
+
results_list = []
|
42
|
+
|
43
|
+
for dataset, model in zip(datasets, models):
|
44
|
+
dataset_name = dataset.input_id
|
45
|
+
model_name = model.input_id
|
46
|
+
|
47
|
+
y_true = dataset.y
|
48
|
+
y_pred = dataset.y_pred(model)
|
49
|
+
|
50
|
+
# Ensure y_true and y_pred are pandas Series with the same index
|
51
|
+
if not isinstance(y_true, pd.Series):
|
52
|
+
y_true = pd.Series(y_true, index=dataset.df.index)
|
53
|
+
if not isinstance(y_pred, pd.Series):
|
54
|
+
y_pred = pd.Series(y_pred, index=dataset.df.index)
|
55
|
+
|
56
|
+
index = dataset.df.index
|
57
|
+
|
58
|
+
if segments is None:
|
59
|
+
mid_point = len(index) // 2
|
60
|
+
segments = {
|
61
|
+
"start_date": [index.min(), index[mid_point]],
|
62
|
+
"end_date": [index[mid_point - 1], index.max()],
|
63
|
+
}
|
64
|
+
|
65
|
+
for segment_index, (start_date, end_date) in enumerate(
|
66
|
+
zip(segments["start_date"], segments["end_date"])
|
67
|
+
):
|
68
|
+
mask = (index >= start_date) & (index <= end_date)
|
69
|
+
y_true_segment = y_true.loc[mask]
|
70
|
+
y_pred_segment = y_pred.loc[mask]
|
71
|
+
|
72
|
+
if len(y_true_segment) > 0 and len(y_pred_segment) > 0:
|
73
|
+
r2s = metrics.r2_score(y_true_segment, y_pred_segment)
|
74
|
+
results_list.append(
|
75
|
+
{
|
76
|
+
"Model": model_name,
|
77
|
+
"Dataset": dataset_name,
|
78
|
+
"Segments": f"Segment {segment_index + 1}",
|
79
|
+
"Start Date": start_date,
|
80
|
+
"End Date": end_date,
|
81
|
+
"R-Squared": r2s,
|
82
|
+
}
|
83
|
+
)
|
84
|
+
|
85
|
+
# Convert results list to a DataFrame
|
86
|
+
results_df = pd.DataFrame(results_list)
|
87
|
+
|
88
|
+
# Plotting
|
89
|
+
fig = px.bar(
|
90
|
+
results_df,
|
91
|
+
x="Segments",
|
92
|
+
y="R-Squared",
|
93
|
+
color="Model",
|
94
|
+
barmode="group",
|
95
|
+
title="R-Squared Comparison by Segment and Model",
|
96
|
+
labels={
|
97
|
+
"R-Squared": "R-Squared Value",
|
98
|
+
"Segment": "Time Segment",
|
99
|
+
"Model": "Model",
|
100
|
+
},
|
101
|
+
)
|
102
|
+
|
103
|
+
return fig, results_df
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from sklearn.inspection import permutation_importance
|
7
|
+
|
8
|
+
from validmind import tags, tasks
|
9
|
+
|
10
|
+
|
11
|
+
@tags("model_explainability", "sklearn")
|
12
|
+
@tasks("regression", "time_series_forecasting")
|
13
|
+
def FeatureImportanceComparison(datasets, models, num_features=3):
|
14
|
+
"""
|
15
|
+
Compare feature importance scores for each model and generate a summary table
|
16
|
+
with the top important features.
|
17
|
+
|
18
|
+
**Purpose**: The purpose of this function is to compare the feature importance scores for different models applied to various datasets.
|
19
|
+
|
20
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates permutation feature importance (PFI) scores, and generates a summary table with the top `num_features` important features for each model.
|
21
|
+
|
22
|
+
**Signs of High Risk**:
|
23
|
+
- If key features expected to be important are ranked low, it could indicate potential issues with model training or data quality.
|
24
|
+
- High variance in feature importance scores across different models may suggest instability in feature selection.
|
25
|
+
|
26
|
+
**Strengths**:
|
27
|
+
- Provides a clear comparison of the most important features for each model.
|
28
|
+
- Uses permutation importance, which is a model-agnostic method and can be applied to any estimator.
|
29
|
+
|
30
|
+
**Limitations**:
|
31
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `x_df` and `y_df` methods to access feature and target data.
|
32
|
+
- Requires that `model.model` is compatible with `sklearn.inspection.permutation_importance`.
|
33
|
+
- The function's output is dependent on the number of features specified by `num_features`, which defaults to 3 but can be adjusted.
|
34
|
+
|
35
|
+
|
36
|
+
"""
|
37
|
+
results_list = []
|
38
|
+
|
39
|
+
for dataset, model in zip(datasets, models):
|
40
|
+
x = dataset.x_df()
|
41
|
+
y = dataset.y_df()
|
42
|
+
|
43
|
+
pfi_values = permutation_importance(
|
44
|
+
model.model,
|
45
|
+
x,
|
46
|
+
y,
|
47
|
+
random_state=0,
|
48
|
+
n_jobs=-2,
|
49
|
+
)
|
50
|
+
|
51
|
+
# Create a dictionary to store PFI scores
|
52
|
+
pfi = {
|
53
|
+
column: pfi_values["importances_mean"][i]
|
54
|
+
for i, column in enumerate(x.columns)
|
55
|
+
}
|
56
|
+
|
57
|
+
# Sort features by their importance
|
58
|
+
sorted_features = sorted(pfi.items(), key=lambda item: item[1], reverse=True)
|
59
|
+
|
60
|
+
# Extract the top `num_features` features
|
61
|
+
top_features = sorted_features[:num_features]
|
62
|
+
|
63
|
+
# Prepare the result for the current model and dataset
|
64
|
+
result = {
|
65
|
+
"Model": model.input_id,
|
66
|
+
"Dataset": dataset.input_id,
|
67
|
+
}
|
68
|
+
|
69
|
+
# Dynamically add feature columns to the result
|
70
|
+
for i in range(num_features):
|
71
|
+
if i < len(top_features):
|
72
|
+
result[
|
73
|
+
f"Feature {i + 1}"
|
74
|
+
] = f"[{top_features[i][0]}; {top_features[i][1]:.4f}]"
|
75
|
+
else:
|
76
|
+
result[f"Feature {i + 1}"] = None
|
77
|
+
|
78
|
+
# Append the result to the list
|
79
|
+
results_list.append(result)
|
80
|
+
|
81
|
+
# Convert the results list to a DataFrame
|
82
|
+
results_df = pd.DataFrame(results_list)
|
83
|
+
return results_df
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
from sklearn import metrics
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.logging import get_logger
|
11
|
+
|
12
|
+
logger = get_logger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
@tags("model_performance", "sklearn")
|
16
|
+
@tasks("regression", "time_series_forecasting")
|
17
|
+
def RegressionErrorsComparison(datasets, models):
|
18
|
+
"""
|
19
|
+
Compare regression error metrics for each model and generate a summary table
|
20
|
+
with the results.
|
21
|
+
|
22
|
+
**Purpose**: The purpose of this function is to compare the regression errors for different models applied to various datasets.
|
23
|
+
|
24
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates various error metrics (MAE, MSE, MAPE, MBD), and generates a summary table with these results.
|
25
|
+
|
26
|
+
**Signs of High Risk**:
|
27
|
+
- High Mean Absolute Error (MAE) or Mean Squared Error (MSE) indicates poor model performance.
|
28
|
+
- High Mean Absolute Percentage Error (MAPE) suggests large percentage errors, especially problematic if the true values are small.
|
29
|
+
- Mean Bias Deviation (MBD) significantly different from zero indicates systematic overestimation or underestimation by the model.
|
30
|
+
|
31
|
+
**Strengths**:
|
32
|
+
- Provides multiple error metrics to assess model performance from different perspectives.
|
33
|
+
- Includes a check to avoid division by zero when calculating MAPE.
|
34
|
+
|
35
|
+
**Limitations**:
|
36
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
|
37
|
+
- The function relies on the `logger` from `validmind.logging` to warn about zero values in `y_true`, which should be correctly implemented and imported.
|
38
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
39
|
+
"""
|
40
|
+
results_list = []
|
41
|
+
|
42
|
+
for dataset, model in zip(datasets, models):
|
43
|
+
dataset_name = dataset.input_id
|
44
|
+
model_name = model.input_id
|
45
|
+
|
46
|
+
y_true = dataset.y
|
47
|
+
y_pred = dataset.y_pred(model) # Assuming dataset has X for features
|
48
|
+
y_true = y_true.astype(y_pred.dtype)
|
49
|
+
|
50
|
+
mae = metrics.mean_absolute_error(y_true, y_pred)
|
51
|
+
mse = metrics.mean_squared_error(y_true, y_pred)
|
52
|
+
|
53
|
+
if np.any(y_true == 0):
|
54
|
+
logger.warning(
|
55
|
+
"y_true contains zero values. Skipping MAPE calculation to avoid division by zero."
|
56
|
+
)
|
57
|
+
mape = None
|
58
|
+
else:
|
59
|
+
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
|
60
|
+
mbd = np.mean(y_pred - y_true)
|
61
|
+
|
62
|
+
# Append results to the list
|
63
|
+
results_list.append(
|
64
|
+
{
|
65
|
+
"Model": model_name,
|
66
|
+
"Dataset": dataset_name,
|
67
|
+
"Mean Absolute Error (MAE)": mae,
|
68
|
+
"Mean Squared Error (MSE)": mse,
|
69
|
+
"Mean Absolute Percentage Error (MAPE)": mape,
|
70
|
+
"Mean Bias Deviation (MBD)": mbd,
|
71
|
+
}
|
72
|
+
)
|
73
|
+
|
74
|
+
# Convert results list to a DataFrame
|
75
|
+
results_df = pd.DataFrame(results_list)
|
76
|
+
return results_df
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from sklearn import metrics
|
7
|
+
|
8
|
+
from validmind import tags, tasks
|
9
|
+
from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
|
10
|
+
|
11
|
+
|
12
|
+
@tags("model_performance", "sklearn")
|
13
|
+
@tasks("regression", "time_series_forecasting")
|
14
|
+
def RegressionR2SquareComparison(datasets, models):
|
15
|
+
"""
|
16
|
+
Compare R-Squared and Adjusted R-Squared values for each model and generate a summary table
|
17
|
+
with the results.
|
18
|
+
|
19
|
+
**Purpose**: The purpose of this function is to compare the R-Squared and Adjusted R-Squared values for different models applied to various datasets.
|
20
|
+
|
21
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates the R-Squared and Adjusted R-Squared values, and generates a summary table with these results.
|
22
|
+
|
23
|
+
**Signs of High Risk**:
|
24
|
+
- If the R-Squared values are significantly low, it could indicate that the model is not explaining much of the variability in the dataset.
|
25
|
+
- A significant difference between R-Squared and Adjusted R-Squared values might indicate that the model includes irrelevant features.
|
26
|
+
|
27
|
+
**Strengths**:
|
28
|
+
- Provides a quantitative measure of model performance in terms of variance explained.
|
29
|
+
- Adjusted R-Squared accounts for the number of predictors, making it a more reliable measure when comparing models with different numbers of features.
|
30
|
+
|
31
|
+
**Limitations**:
|
32
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
|
33
|
+
- The function relies on `adj_r2_score` from the `statsmodels.statsutils` module, which should be correctly implemented and imported.
|
34
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
35
|
+
|
36
|
+
"""
|
37
|
+
results_list = []
|
38
|
+
|
39
|
+
for dataset, model in zip(datasets, models):
|
40
|
+
dataset_name = dataset.input_id
|
41
|
+
model_name = model.input_id
|
42
|
+
|
43
|
+
y_true = dataset.y
|
44
|
+
y_pred = dataset.y_pred(model) # Assuming dataset has X for features
|
45
|
+
y_true = y_true.astype(y_pred.dtype)
|
46
|
+
|
47
|
+
r2s = metrics.r2_score(y_true, y_pred)
|
48
|
+
X_columns = dataset.feature_columns
|
49
|
+
adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
|
50
|
+
|
51
|
+
# Append results to the list
|
52
|
+
results_list.append(
|
53
|
+
{
|
54
|
+
"Model": model_name,
|
55
|
+
"Dataset": dataset_name,
|
56
|
+
"R-Squared": r2s,
|
57
|
+
"Adjusted R-Squared": adj_r2,
|
58
|
+
}
|
59
|
+
)
|
60
|
+
|
61
|
+
# Convert results list to a DataFrame
|
62
|
+
results_df = pd.DataFrame(results_list)
|
63
|
+
return results_df
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.3.
|
3
|
+
Version: 2.3.5
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
@@ -12,58 +12,50 @@ Classifier: Programming Language :: Python :: 3.9
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
14
14
|
Provides-Extra: all
|
15
|
+
Provides-Extra: huggingface
|
15
16
|
Provides-Extra: llm
|
16
17
|
Provides-Extra: pytorch
|
17
18
|
Provides-Extra: r-support
|
18
|
-
|
19
|
-
Requires-Dist:
|
20
|
-
Requires-Dist:
|
21
|
-
Requires-Dist:
|
22
|
-
Requires-Dist:
|
23
|
-
Requires-Dist:
|
24
|
-
Requires-Dist:
|
25
|
-
Requires-Dist:
|
26
|
-
Requires-Dist:
|
27
|
-
Requires-Dist:
|
28
|
-
Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
|
29
|
-
Requires-Dist: levenshtein (>=0.21.1,<0.22.0) ; extra == "all" or extra == "llm"
|
30
|
-
Requires-Dist: llvmlite (>=0.42.0) ; python_version >= "3.12"
|
19
|
+
Requires-Dist: aiohttp[speedups]
|
20
|
+
Requires-Dist: arch
|
21
|
+
Requires-Dist: bert-score (>=0.3.13)
|
22
|
+
Requires-Dist: catboost
|
23
|
+
Requires-Dist: evaluate
|
24
|
+
Requires-Dist: ipywidgets
|
25
|
+
Requires-Dist: kaleido (>=0.2.1,!=0.2.1.post1)
|
26
|
+
Requires-Dist: langchain-openai (>=0.1.8) ; extra == "all" or extra == "llm"
|
27
|
+
Requires-Dist: langdetect
|
28
|
+
Requires-Dist: latex2mathml (>=3.77.0)
|
31
29
|
Requires-Dist: llvmlite ; python_version >= "3.8" and python_full_version <= "3.11.0"
|
32
|
-
Requires-Dist: matplotlib
|
30
|
+
Requires-Dist: matplotlib
|
33
31
|
Requires-Dist: mistune (>=3.0.2,<4.0.0)
|
34
32
|
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
35
33
|
Requires-Dist: numba (<0.59.0)
|
36
|
-
Requires-Dist: numpy
|
37
|
-
Requires-Dist:
|
38
|
-
Requires-Dist: openai (>=1.3.7,<2.0.0) ; extra == "all" or extra == "llm"
|
34
|
+
Requires-Dist: numpy
|
35
|
+
Requires-Dist: openai (>=1) ; extra == "all" or extra == "llm"
|
39
36
|
Requires-Dist: pandas (>=1.1,<2)
|
40
|
-
Requires-Dist: plotly
|
41
|
-
Requires-Dist: plotly-express
|
42
|
-
Requires-Dist: polars
|
37
|
+
Requires-Dist: plotly
|
38
|
+
Requires-Dist: plotly-express
|
39
|
+
Requires-Dist: polars
|
43
40
|
Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
|
44
|
-
Requires-Dist:
|
45
|
-
Requires-Dist:
|
46
|
-
Requires-Dist:
|
47
|
-
Requires-Dist: rouge (>=1.0.1,<2.0.0)
|
41
|
+
Requires-Dist: python-dotenv
|
42
|
+
Requires-Dist: ragas (>=0.1.7) ; extra == "all" or extra == "llm"
|
43
|
+
Requires-Dist: rouge (>=1)
|
48
44
|
Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
|
49
|
-
Requires-Dist: scikit-learn
|
50
|
-
Requires-Dist: scipy
|
51
|
-
Requires-Dist: scipy ; python_version >= "3.8" and python_full_version <= "3.11.0"
|
45
|
+
Requires-Dist: scikit-learn
|
46
|
+
Requires-Dist: scipy
|
52
47
|
Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
|
53
|
-
Requires-Dist: seaborn
|
54
|
-
Requires-Dist: selfcheckgpt (>=0.1.7,<0.2.0)
|
48
|
+
Requires-Dist: seaborn
|
55
49
|
Requires-Dist: sentry-sdk (>=1.24.0,<2.0.0)
|
56
50
|
Requires-Dist: shap (>=0.42.0,<0.43.0)
|
57
|
-
Requires-Dist: statsmodels
|
51
|
+
Requires-Dist: statsmodels
|
58
52
|
Requires-Dist: tabulate (>=0.8.9,<0.9.0)
|
59
53
|
Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
|
60
|
-
Requires-Dist: textstat (>=0.7.3,<0.8.0)
|
61
54
|
Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
|
62
|
-
Requires-Dist:
|
63
|
-
Requires-Dist:
|
64
|
-
Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "llm" or extra == "transformers"
|
55
|
+
Requires-Dist: tqdm
|
56
|
+
Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "llm" or extra == "huggingface"
|
65
57
|
Requires-Dist: xgboost (>=1.5.2,<3)
|
66
|
-
Requires-Dist: ydata-profiling
|
58
|
+
Requires-Dist: ydata-profiling
|
67
59
|
Description-Content-Type: text/markdown
|
68
60
|
|
69
61
|
# ValidMind Developer Framework
|
@@ -79,3 +71,45 @@ descriptions of your dataset to testing your models for weak spots and overfit a
|
|
79
71
|
Framework helps you automate the generation of model documentation by feeding the ValidMind platform with
|
80
72
|
documentation artifacts and test results to the ValidMind platform.
|
81
73
|
|
74
|
+
## Installation
|
75
|
+
|
76
|
+
To install the ValidMind Developer Framework and all optional dependencies, run:
|
77
|
+
|
78
|
+
```bash
|
79
|
+
pip install validmind[all]
|
80
|
+
```
|
81
|
+
|
82
|
+
To install the Developer Framework without optional dependencies (core functionality only), run:
|
83
|
+
|
84
|
+
```bash
|
85
|
+
pip install validmind
|
86
|
+
```
|
87
|
+
|
88
|
+
### Extra dependencies
|
89
|
+
|
90
|
+
The Developer Framework has optional dependencies that can be installed separately to support additional model types and tests.
|
91
|
+
|
92
|
+
- **LLM Support**: To be able to run tests for Large Language Models (LLMs), install the `llm` extra:
|
93
|
+
|
94
|
+
```bash
|
95
|
+
pip install validmind[llm]
|
96
|
+
```
|
97
|
+
|
98
|
+
- **PyTorch Models**: To use pytorch models with the Developer Framework, install the `torch` extra:
|
99
|
+
|
100
|
+
```bash
|
101
|
+
pip install validmind[torch]
|
102
|
+
```
|
103
|
+
|
104
|
+
- **Hugging Face Transformers**: To use Hugging Face Transformers models with the Developer Framework, install the `transformers` extra:
|
105
|
+
|
106
|
+
```bash
|
107
|
+
pip install validmind[transformers]
|
108
|
+
```
|
109
|
+
|
110
|
+
- **R Models**: To use R models with the Developer Framework, install the `r` extra:
|
111
|
+
|
112
|
+
```bash
|
113
|
+
pip install validmind[r-support]
|
114
|
+
```
|
115
|
+
|
@@ -1,5 +1,5 @@
|
|
1
1
|
validmind/__init__.py,sha256=UfmzPwUCdUWbWq3zPqqmq4jw0_kfl3hX4U72p_seE4I,3700
|
2
|
-
validmind/__version__.py,sha256=
|
2
|
+
validmind/__version__.py,sha256=AMoApoKvsYqyoF1DuOQX5QmSAGaw0FpF6uuxBuaZrp8,22
|
3
3
|
validmind/ai/test_descriptions.py,sha256=QBV8i13nKeaQPXqnnra0L_BGc6pZzVWejATUTcgKMek,9287
|
4
4
|
validmind/ai/utils.py,sha256=DtlpgcJcYS1FvdZPw5moUmYnv_guGKsxVbIRzFQ7pcg,3380
|
5
5
|
validmind/api_client.py,sha256=0IR8MpH_GxBykOs4Egz7oEKZLoOEwoli81X1oFL0DD8,16893
|
@@ -48,8 +48,9 @@ validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv,sha256=arukRZY
|
|
48
48
|
validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv,sha256=pCznzePHsQFfAv9r6NMQqfZ9f2sAFquuqMiKIrry0TU,2736
|
49
49
|
validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv,sha256=FBxkMcc-sauImJ2RKL1VDa5EqU501OoKU4zSwL2A1e0,3355
|
50
50
|
validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv,sha256=qPFYcPRQgKYrsOEWjumrY-27n4E0r7IQIiAY8CtD8yc,3866
|
51
|
-
validmind/datasets/regression/datasets/
|
51
|
+
validmind/datasets/regression/datasets/leanding_club_loan_rates.csv,sha256=1mePKtdNXg8ZG-VVSPLtOlCJm_3qSqoK5qP0_klxdF8,11624
|
52
52
|
validmind/datasets/regression/fred.py,sha256=NpydiYfBPfClE8C5ZO_FisGBS09CulAIn1-yrz_LP4k,5707
|
53
|
+
validmind/datasets/regression/fred_timeseries.py,sha256=d8OM-FyI2hyu_azdZvOVcd8hO1nNWaPdzM4PNWqv1-4,8857
|
53
54
|
validmind/datasets/regression/lending_club.py,sha256=QM8RTuy0ijRfbHm9Ye8_-vQY_X61sGRwG0HUDpn-oSQ,2536
|
54
55
|
validmind/datasets/regression/models/fred_loan_rates_model_1.pkl,sha256=RUpaUJC7WCqc5jwzV4vPujtQlNpVbcJhJ4N5F9Qk59s,40067
|
55
56
|
validmind/datasets/regression/models/fred_loan_rates_model_2.pkl,sha256=J1ukMdeFoxRlC1vAm7YV39aANncAU1VQVAFSyjlDPUk,48314
|
@@ -85,7 +86,7 @@ validmind/test_suites/tabular_datasets.py,sha256=WE4eLzRCfiqAxRqXnZFRR3Lo_u-TI6K
|
|
85
86
|
validmind/test_suites/text_data.py,sha256=YGVGBB05356jN9Gzcy5CHShRzo1fm5mKsZY7YBq0cYU,739
|
86
87
|
validmind/test_suites/time_series.py,sha256=msUyYySAe5VHJJp6z0k0cNt2ekMB8-XkxGER75Zs1hs,6724
|
87
88
|
validmind/tests/__init__.py,sha256=9-SR070X6SUZIURkh7M1jUMiqaDS0SVUmzZ8gNtm-10,15904
|
88
|
-
validmind/tests/__types__.py,sha256=
|
89
|
+
validmind/tests/__types__.py,sha256=AmSEzm1rwtkTQnbAFlgjrcyyg1xMThglC6r7W4jdMaM,9902
|
89
90
|
validmind/tests/data_validation/ACFandPACFPlot.py,sha256=BMXcVZxrZ09xzw0TZtUM81Mss6q2eQg6md5uEfpGu_8,4960
|
90
91
|
validmind/tests/data_validation/ADF.py,sha256=WEFDUdDJlkvDPcLTFAa9RXwMSv_JD-Y-CN-F3xRGBtc,5177
|
91
92
|
validmind/tests/data_validation/ANOVAOneWayTable.py,sha256=udizp4rxW4VlMaXK2RrkPK5tAUgO0C-A3MIifPjBZMw,6019
|
@@ -120,7 +121,7 @@ validmind/tests/data_validation/PearsonCorrelationMatrix.py,sha256=dM6M8jjGHNL9u
|
|
120
121
|
validmind/tests/data_validation/PhillipsPerronArch.py,sha256=ft4ZbeKsM_8WAvdWHou0AZGXFUc4RWiMdOvn1pMXmr4,5075
|
121
122
|
validmind/tests/data_validation/RollingStatsPlot.py,sha256=MqKooEL1cIRandoSN7sWhKgXDhdbIbCcBTVvc-FIp5k,5901
|
122
123
|
validmind/tests/data_validation/ScatterPlot.py,sha256=5mCr37aD92DUSn82BR7AWdx6-RdJqhjWZPhPcpIexGU,4346
|
123
|
-
validmind/tests/data_validation/SeasonalDecompose.py,sha256=
|
124
|
+
validmind/tests/data_validation/SeasonalDecompose.py,sha256=7aBjifXMr7py0LdUjvHMCwTDjO-K9mI38ThHX4Yp5D8,9562
|
124
125
|
validmind/tests/data_validation/Skewness.py,sha256=lygUUQomckvdX5__JGOn6Rx0kJRfyw-0gZOjqqN9Phk,4935
|
125
126
|
validmind/tests/data_validation/SpreadPlot.py,sha256=3FMhokxIexGzRoIlRElkkgpQRRdvnlyx0-tt8aK-wDY,4591
|
126
127
|
validmind/tests/data_validation/TabularCategoricalBarPlots.py,sha256=EM1m1v9V5N6bpaed_QYoqEFl4ipYcDEh7TbUL1B2stE,4241
|
@@ -128,11 +129,13 @@ validmind/tests/data_validation/TabularDateTimeHistograms.py,sha256=Dzrw77U8mbDY
|
|
128
129
|
validmind/tests/data_validation/TabularDescriptionTables.py,sha256=Hd78V0CsRR0zbA97GFHV4DuffaT-85CI3wyF3ptdXLk,9281
|
129
130
|
validmind/tests/data_validation/TabularNumericalHistograms.py,sha256=CSdQJxDht6QJRMGXoedP_1MVoem-whlcwxGGBaP3inc,4170
|
130
131
|
validmind/tests/data_validation/TargetRateBarPlots.py,sha256=7BghG2XtWw2ptmNgT-wEWb6gWwUgWIlp-LV5HtQENbM,5737
|
132
|
+
validmind/tests/data_validation/TimeSeriesDescription.py,sha256=YIfet30KZSAOdYAkTQadKLNuY_SAM3a5Fn2z2AqQz7I,3130
|
133
|
+
validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py,sha256=GZlo4TV6NnG92T-2XQqNs8o3vKRv4jlJJM3sVX9aO10,3196
|
131
134
|
validmind/tests/data_validation/TimeSeriesFrequency.py,sha256=b6lfIzGjiMUho1dhBFfgWZf6EM8AhYdFojWVbgEE4F4,7243
|
132
|
-
validmind/tests/data_validation/TimeSeriesHistogram.py,sha256=
|
135
|
+
validmind/tests/data_validation/TimeSeriesHistogram.py,sha256=ILSdldXLGn--40rRFapUBAizaTgWxXcWlaC9BvKykAc,3676
|
133
136
|
validmind/tests/data_validation/TimeSeriesLinePlot.py,sha256=_HQfgfY_ZmT1S2SSF7gJ7RmNoKjGKQ9_dDaxVHESHtI,4173
|
134
137
|
validmind/tests/data_validation/TimeSeriesMissingValues.py,sha256=4-b55iIMbhDETeRp-lgJjr7p7A5nmuPsYXwILiJ_Jtw,7351
|
135
|
-
validmind/tests/data_validation/TimeSeriesOutliers.py,sha256=
|
138
|
+
validmind/tests/data_validation/TimeSeriesOutliers.py,sha256=JaGrK6vaPpC0a-dcaR76X10_b66Mk0Lt5Pujm0_aJTo,9165
|
136
139
|
validmind/tests/data_validation/TooManyZeroValues.py,sha256=lnW0De4o2q56j1LJvoW_4CQbz4OPvPP5K3e0exCnxqc,5875
|
137
140
|
validmind/tests/data_validation/UniqueRows.py,sha256=zyZ6icTq4kRcNUT_ID95j6Ae8OpGHjrlHkR9j4_3aB8,4516
|
138
141
|
validmind/tests/data_validation/WOEBinPlots.py,sha256=C3bNTRzbz3wXWoHUdoHZVmzIgMzJTAgwKtgWK2fPbSc,6946
|
@@ -158,9 +161,14 @@ validmind/tests/model_validation/ContextualRecall.py,sha256=wzLjaliEG441qXvaonch
|
|
158
161
|
validmind/tests/model_validation/FeaturesAUC.py,sha256=RKh3oQIyFSaU0rG4trtuPZDrC4-sIky8cVXnB2z5PYA,4733
|
159
162
|
validmind/tests/model_validation/MeteorScore.py,sha256=3YtSjdzxraFYmam03HtOhjayXScFdS5QR_9V4gD-lLI,5010
|
160
163
|
validmind/tests/model_validation/ModelMetadata.py,sha256=F9ctmlIxngkHgOlggRl0WFLilh46SlM3vYfY9zkhtYk,3733
|
164
|
+
validmind/tests/model_validation/ModelMetadataComparison.py,sha256=gcBYgbnqh_2qotc2lLHXQZ4WyAPc5ieZyoL2nvHsTmo,2485
|
165
|
+
validmind/tests/model_validation/ModelPredictionResiduals.py,sha256=PYeqdNuaeF0bTPzQZOi0ESx1LnBA99KHF5_ZItm8Pyg,3784
|
161
166
|
validmind/tests/model_validation/RegardScore.py,sha256=EuR1pAgVcn99m5eWagxGgdOCHDBkB2NIzyGE9ly73z4,5206
|
162
167
|
validmind/tests/model_validation/RegressionResidualsPlot.py,sha256=EQzJn9wH_1pztHr2JI26Um3E9KzHmu6o76o4ffbXZj4,5025
|
163
168
|
validmind/tests/model_validation/RougeScore.py,sha256=1yr09JH1kGJKFL29lmXvEm1Dp482Sjxn0lK6UJfCQ0s,5576
|
169
|
+
validmind/tests/model_validation/TimeSeriesPredictionWithCI.py,sha256=zNTjM9SOGs49NLghlGMnY2EuZjkIZgRKWTavxSNjPls,4616
|
170
|
+
validmind/tests/model_validation/TimeSeriesPredictionsPlot.py,sha256=gvtiaAw0tndhOhxdzZOS073-1vp-Rxt81vF67cG1acg,2887
|
171
|
+
validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py,sha256=ec90bxP9cWFe7b7xMfVhWj1lUgVheWeRFUSH79HKPqI,4017
|
164
172
|
validmind/tests/model_validation/TokenDisparity.py,sha256=EZlpFQH6qRWedjTQT5o4u-OIdgj1iKK-JB8GEQQlxoA,4394
|
165
173
|
validmind/tests/model_validation/ToxicityScore.py,sha256=nFDHU1Z8mGpJrdKE6sWxo9nOqqzne1JsYIiNFyn_gYA,5299
|
166
174
|
validmind/tests/model_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -197,6 +205,7 @@ validmind/tests/model_validation/sklearn/ClusterPerformance.py,sha256=kDGdMfxyf6
|
|
197
205
|
validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py,sha256=-9Euc3ZCAFoMSsJuz9zrrQqk3GTXQHYKa3i8lYsJilI,8600
|
198
206
|
validmind/tests/model_validation/sklearn/CompletenessScore.py,sha256=Uj_hTTTqRLHDJ-pjajfuun_2Anq7W0GQpwPsAhdWq24,2559
|
199
207
|
validmind/tests/model_validation/sklearn/ConfusionMatrix.py,sha256=Bm9fsw9nD1KurbBbXf0Jph0MN7_-7GRiydiTTfDgiU4,5776
|
208
|
+
validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py,sha256=zUXNy9SookZQQfLvwvI2W0yG6BlRBdWIK0IHgYZKNpg,3250
|
200
209
|
validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py,sha256=hL7hfvdeZ_jR9ktxMH9NI-hwvLsl478iQDajOXUuocM,3049
|
201
210
|
validmind/tests/model_validation/sklearn/HomogeneityScore.py,sha256=ykBkNCNm1HRyMoU2x5hK20tuKbU0vYrwj4huc4x7v6w,2753
|
202
211
|
validmind/tests/model_validation/sklearn/HyperParametersTuning.py,sha256=YGJ38Px-RRIkWVHavLr_FNm6sod_k_t6U32BLmIFMTg,4660
|
@@ -206,13 +215,15 @@ validmind/tests/model_validation/sklearn/MinimumF1Score.py,sha256=5QLwdsFkuT-k2Q
|
|
206
215
|
validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py,sha256=0KMdAHZOnY_PpoWSNZxmudClqQ469JV_V_vTM3FXAC8,4891
|
207
216
|
validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py,sha256=ITimCZ0dPxomj6bSI_0g_I5ft_fWc5QDvCTKukciaRU,6196
|
208
217
|
validmind/tests/model_validation/sklearn/OverfitDiagnosis.py,sha256=BLyDWAHd7dw17QLuwy9JrvsBNPXhM8yhXWu9EeSIVgg,14075
|
209
|
-
validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py,sha256=
|
218
|
+
validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py,sha256=E7ynDX0IVnenUqovwW5GXtxheGci5bCo7Y534WoU-tY,4990
|
210
219
|
validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py,sha256=5cp3E78C1OjFomqVmtYOovdoNniLVVg-jmRb9HXQ3XQ,10132
|
211
220
|
validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py,sha256=Ay0Z3NDpP0w3Tz3nPSSUhA5WZGW4EZyNmCIJga2kixQ,4436
|
212
221
|
validmind/tests/model_validation/sklearn/ROCCurve.py,sha256=gXeUoJ8Gxd4sZ_VRDICEznk8iaNyZmDpgZk2M03lVdo,5822
|
213
222
|
validmind/tests/model_validation/sklearn/RegressionErrors.py,sha256=ozczSJX5jwEXVj-kb6BlLzoUVzNXNkFqeaoYmKfTAdM,5976
|
223
|
+
validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py,sha256=CHfdcRx6ZqvfkfZVy7HNgOUjZp-KepPS5rs2al19OyQ,3160
|
214
224
|
validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py,sha256=1e0Sv-pfI4sUeMDl-62X97Ai8kezcI_3gUnfZWzq3fA,5789
|
215
225
|
validmind/tests/model_validation/sklearn/RegressionR2Square.py,sha256=MbVfgxiloCSd32xhlO4_QiDyo3ZTJB4Orc-G3yMltwM,4958
|
226
|
+
validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py,sha256=tGJKpfeTvU2xBxsYbQSC5GPDcCS2_j0FcT3uceXZduI,2761
|
216
227
|
validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py,sha256=762ckUxewgv87Aix48gJQ532v7UEdwIUD_l5iMaQoGU,13738
|
217
228
|
validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py,sha256=FwY2n65uDBz4D4fFy-Ur7G2lb9W_LcOr-HPevmwTxZk,8951
|
218
229
|
validmind/tests/model_validation/sklearn/SilhouettePlot.py,sha256=TznxbLhwybNbht6hUg4MSKxX3TI7zJp75tQH0svWon0,6237
|
@@ -291,8 +302,8 @@ validmind/vm_models/test_suite/runner.py,sha256=wgjyqx2CU4bjX3fZKmzJP7gb5GFooGvs
|
|
291
302
|
validmind/vm_models/test_suite/summary.py,sha256=co-xJJMUYGb7cOiVmw0i8vpZlfiMqrWjaCOmHKMAbcE,4686
|
292
303
|
validmind/vm_models/test_suite/test.py,sha256=_GfbK36l98SjzgVcucmp0OKBJKqMW3neO7SqJ3EWeps,5049
|
293
304
|
validmind/vm_models/test_suite/test_suite.py,sha256=Cns2wL54v0T5Mv5_HJb3kMeaa4rtycdqT8KxK9_rWEU,6279
|
294
|
-
validmind-2.3.
|
295
|
-
validmind-2.3.
|
296
|
-
validmind-2.3.
|
297
|
-
validmind-2.3.
|
298
|
-
validmind-2.3.
|
305
|
+
validmind-2.3.5.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
|
306
|
+
validmind-2.3.5.dist-info/METADATA,sha256=Jnq-YN0eBrHfWFytv-dKo8akuB0iYBGaJwEcQ0a9rIg,4133
|
307
|
+
validmind-2.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
308
|
+
validmind-2.3.5.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
|
309
|
+
validmind-2.3.5.dist-info/RECORD,,
|
/validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv}
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|