validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +6 -3
- validmind/__version__.py +1 -1
- validmind/ai.py +193 -0
- validmind/api_client.py +45 -31
- validmind/client.py +33 -6
- validmind/datasets/classification/customer_churn.py +2 -2
- validmind/datasets/credit_risk/__init__.py +11 -0
- validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club.py +394 -0
- validmind/datasets/nlp/__init__.py +5 -0
- validmind/datasets/nlp/cnn_dailymail.py +98 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
- validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
- validmind/errors.py +11 -1
- validmind/logging.py +9 -2
- validmind/models/huggingface.py +2 -2
- validmind/models/pytorch.py +3 -3
- validmind/models/sklearn.py +4 -4
- validmind/template.py +2 -2
- validmind/test_suites/__init__.py +4 -2
- validmind/tests/__init__.py +130 -45
- validmind/tests/data_validation/DatasetDescription.py +0 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +8 -2
- validmind/tests/data_validation/nlp/StopWords.py +1 -6
- validmind/tests/data_validation/nlp/TextDescription.py +20 -9
- validmind/tests/decorator.py +313 -0
- validmind/tests/model_validation/BertScore.py +1 -1
- validmind/tests/model_validation/BertScoreAggregate.py +1 -1
- validmind/tests/model_validation/BleuScore.py +1 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
- validmind/tests/model_validation/ContextualRecall.py +1 -1
- validmind/tests/model_validation/FeaturesAUC.py +110 -0
- validmind/tests/model_validation/MeteorScore.py +92 -0
- validmind/tests/model_validation/RegardHistogram.py +6 -7
- validmind/tests/model_validation/RegardScore.py +4 -6
- validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
- validmind/tests/model_validation/RougeMetrics.py +7 -5
- validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
- validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
- validmind/tests/model_validation/TokenDisparity.py +1 -1
- validmind/tests/model_validation/ToxicityHistogram.py +1 -1
- validmind/tests/model_validation/ToxicityScore.py +1 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
- validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
- validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
- validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
- validmind/tests/prompt_validation/ai_powered_test.py +2 -0
- validmind/tests/test_providers.py +14 -124
- validmind/unit_metrics/__init__.py +75 -70
- validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
- validmind/unit_metrics/classification/sklearn/F1.py +13 -0
- validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
- validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
- validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
- validmind/unit_metrics/composite.py +228 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
- validmind/unit_metrics/regression/HuberLoss.py +23 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
- validmind/unit_metrics/regression/QuantileLoss.py +15 -0
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
- validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
- validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
- validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
- validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
- validmind/utils.py +20 -31
- validmind/vm_models/__init__.py +0 -2
- validmind/vm_models/dataset.py +623 -29
- validmind/vm_models/figure.py +52 -17
- validmind/vm_models/test/metric.py +33 -31
- validmind/vm_models/test/output_template.py +0 -27
- validmind/vm_models/test/result_wrapper.py +68 -36
- validmind/vm_models/test/test.py +4 -2
- validmind/vm_models/test/threshold_test.py +24 -14
- validmind/vm_models/test_context.py +7 -0
- validmind/vm_models/test_suite/runner.py +1 -1
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +1 -1
- validmind/vm_models/test_suite/test_suite.py +2 -1
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
- validmind-2.1.0.dist-info/entry_points.txt +3 -0
- validmind/tests/__types__.py +0 -62
- validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
- validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
- validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
- validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
- validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
- validmind/unit_metrics/sklearn/classification/F1.py +0 -22
- validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
- validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
- validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
- validmind/vm_models/test/unit_metric.py +0 -88
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
from sklearn.metrics import mean_squared_error
|
7
|
+
|
8
|
+
from validmind import tags, tasks
|
9
|
+
|
10
|
+
|
11
|
+
@tags("regression", "sklearn", "unit_metric")
|
12
|
+
@tasks("regression")
|
13
|
+
def RootMeanSquaredError(model, dataset, **kwargs):
|
14
|
+
return np.sqrt(
|
15
|
+
mean_squared_error(
|
16
|
+
dataset.y,
|
17
|
+
dataset.y_pred(model),
|
18
|
+
**kwargs,
|
19
|
+
)
|
20
|
+
)
|
validmind/utils.py
CHANGED
@@ -85,6 +85,8 @@ def nan_to_none(obj):
|
|
85
85
|
|
86
86
|
class NumpyEncoder(json.JSONEncoder):
|
87
87
|
def default(self, obj):
|
88
|
+
if isinstance(obj, pd.Interval):
|
89
|
+
return f"[{obj.left}, {obj.right}]"
|
88
90
|
if isinstance(obj, np.integer):
|
89
91
|
return int(obj)
|
90
92
|
if isinstance(obj, np.floating):
|
@@ -236,26 +238,6 @@ def summarize_data_quality_results(results):
|
|
236
238
|
)
|
237
239
|
|
238
240
|
|
239
|
-
def clean_docstring(docstring: str) -> str:
|
240
|
-
"""
|
241
|
-
Clean up docstrings by removing leading and trailing whitespace and
|
242
|
-
replacing newlines with spaces.
|
243
|
-
"""
|
244
|
-
description = (docstring or "").strip()
|
245
|
-
paragraphs = description.split("\n\n") # Split into paragraphs
|
246
|
-
paragraphs = [
|
247
|
-
" ".join([line.strip() for line in paragraph.split("\n")])
|
248
|
-
for paragraph in paragraphs
|
249
|
-
]
|
250
|
-
paragraphs = [
|
251
|
-
paragraph.replace(" - ", "\n- ") for paragraph in paragraphs
|
252
|
-
] # Add newline before list items
|
253
|
-
# Join paragraphs with double newlines for markdown
|
254
|
-
description = "\n\n".join(paragraphs)
|
255
|
-
|
256
|
-
return description
|
257
|
-
|
258
|
-
|
259
241
|
def format_number(number):
|
260
242
|
"""
|
261
243
|
Format a number for display purposes. If the number is a float, round it
|
@@ -338,20 +320,27 @@ def fuzzy_match(string: str, search_string: str, threshold=0.7):
|
|
338
320
|
return score >= threshold
|
339
321
|
|
340
322
|
|
341
|
-
def test_id_to_name(test_id: str):
|
342
|
-
"""Convert a test ID to a human-readable name
|
343
|
-
# Extract the last part of the ID string
|
344
|
-
last_part = test_id.split(".")[-1]
|
323
|
+
def test_id_to_name(test_id: str) -> str:
|
324
|
+
"""Convert a test ID to a human-readable name.
|
345
325
|
|
346
|
-
|
347
|
-
|
326
|
+
Args:
|
327
|
+
test_id (str): The test identifier, typically in CamelCase or snake_case.
|
348
328
|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
)
|
329
|
+
Returns:
|
330
|
+
str: A human-readable name derived from the test ID.
|
331
|
+
"""
|
332
|
+
last_part = test_id.split(".")[-1]
|
333
|
+
words = []
|
334
|
+
|
335
|
+
# Split on underscores and apply regex to each part to handle CamelCase and acronyms
|
336
|
+
for part in last_part.split("_"):
|
337
|
+
# Regex pattern to match uppercase acronyms, mixed-case words, or alphanumeric combinations
|
338
|
+
words.extend(
|
339
|
+
re.findall(r"[A-Z]+(?:_[A-Z]+)*(?=_|$|[A-Z][a-z])|[A-Z]?[a-z0-9]+", part)
|
340
|
+
)
|
353
341
|
|
354
|
-
|
342
|
+
# Join the words with spaces, capitalize non-acronym words
|
343
|
+
return " ".join(word.capitalize() if not word.isupper() else word for word in words)
|
355
344
|
|
356
345
|
|
357
346
|
def get_model_info(model):
|
validmind/vm_models/__init__.py
CHANGED
@@ -15,7 +15,6 @@ from .test.result_summary import ResultSummary, ResultTable, ResultTableMetadata
|
|
15
15
|
from .test.test import Test
|
16
16
|
from .test.threshold_test import ThresholdTest
|
17
17
|
from .test.threshold_test_result import ThresholdTestResult, ThresholdTestResults
|
18
|
-
from .test.unit_metric import UnitMetric
|
19
18
|
from .test_context import TestContext, TestInput
|
20
19
|
from .test_suite.runner import TestSuiteRunner
|
21
20
|
from .test_suite.test_suite import TestSuite
|
@@ -30,7 +29,6 @@ __all__ = [
|
|
30
29
|
"ResultTable",
|
31
30
|
"ResultTableMetadata",
|
32
31
|
"Test",
|
33
|
-
"UnitMetric",
|
34
32
|
"Metric",
|
35
33
|
"MetricResult",
|
36
34
|
"ThresholdTest",
|