validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +8 -1
- validmind/ai/utils.py +2 -1
- validmind/client.py +1 -0
- validmind/datasets/regression/fred_timeseries.py +272 -0
- validmind/tests/__init__.py +14 -468
- validmind/tests/__types__.py +10 -0
- validmind/tests/_store.py +102 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
- validmind/tests/data_validation/ADF.py +8 -10
- validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
- validmind/tests/data_validation/AutoAR.py +2 -4
- validmind/tests/data_validation/AutoMA.py +2 -4
- validmind/tests/data_validation/AutoSeasonality.py +8 -10
- validmind/tests/data_validation/AutoStationarity.py +8 -10
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
- validmind/tests/data_validation/BivariateHistograms.py +8 -10
- validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
- validmind/tests/data_validation/ClassImbalance.py +2 -4
- validmind/tests/data_validation/DFGLSArch.py +2 -4
- validmind/tests/data_validation/DatasetDescription.py +7 -9
- validmind/tests/data_validation/DatasetSplit.py +8 -9
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +2 -4
- validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
- validmind/tests/data_validation/HighCardinality.py +2 -4
- validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
- validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
- validmind/tests/data_validation/IQROutliersTable.py +2 -4
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
- validmind/tests/data_validation/KPSS.py +8 -10
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
- validmind/tests/data_validation/MissingValues.py +2 -4
- validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
- validmind/tests/data_validation/MissingValuesRisk.py +2 -4
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
- validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
- validmind/tests/data_validation/RollingStatsPlot.py +2 -4
- validmind/tests/data_validation/ScatterPlot.py +2 -4
- validmind/tests/data_validation/SeasonalDecompose.py +70 -44
- validmind/tests/data_validation/Skewness.py +2 -4
- validmind/tests/data_validation/SpreadPlot.py +2 -4
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
- validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
- validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
- validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
- validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
- validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
- validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
- validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
- validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
- validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
- validmind/tests/data_validation/TooManyZeroValues.py +2 -4
- validmind/tests/data_validation/UniqueRows.py +2 -4
- validmind/tests/data_validation/WOEBinPlots.py +2 -4
- validmind/tests/data_validation/WOEBinTable.py +2 -4
- validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
- validmind/tests/data_validation/nlp/CommonWords.py +2 -4
- validmind/tests/data_validation/nlp/Hashtags.py +2 -4
- validmind/tests/data_validation/nlp/Mentions.py +2 -4
- validmind/tests/data_validation/nlp/Punctuations.py +2 -4
- validmind/tests/data_validation/nlp/StopWords.py +2 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -4
- validmind/tests/decorator.py +10 -8
- validmind/tests/load.py +264 -0
- validmind/tests/metadata.py +59 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
- validmind/tests/model_validation/FeaturesAUC.py +6 -8
- validmind/tests/model_validation/ModelMetadata.py +8 -9
- validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
- validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
- validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
- validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
- validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
- validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
- validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
- validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
- validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
- validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
- validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
- validmind/tests/prompt_validation/Bias.py +2 -4
- validmind/tests/prompt_validation/Clarity.py +2 -4
- validmind/tests/prompt_validation/Conciseness.py +2 -4
- validmind/tests/prompt_validation/Delimitation.py +2 -4
- validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
- validmind/tests/prompt_validation/Robustness.py +2 -4
- validmind/tests/prompt_validation/Specificity.py +2 -4
- validmind/tests/run.py +394 -0
- validmind/tests/test_providers.py +12 -0
- validmind/tests/utils.py +16 -0
- validmind/unit_metrics/__init__.py +12 -4
- validmind/unit_metrics/composite.py +3 -0
- validmind/vm_models/test/metric.py +8 -5
- validmind/vm_models/test/result_wrapper.py +2 -1
- validmind/vm_models/test/test.py +14 -11
- validmind/vm_models/test/threshold_test.py +1 -0
- validmind/vm_models/test_suite/runner.py +1 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
- /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
validmind/tests/__init__.py
CHANGED
@@ -2,38 +2,15 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
"""
|
6
|
-
|
7
|
-
import importlib
|
8
|
-
import inspect
|
9
|
-
import json
|
10
|
-
import sys
|
11
|
-
from pathlib import Path
|
12
|
-
from pprint import pformat
|
13
|
-
from typing import Dict
|
14
|
-
from uuid import uuid4
|
15
|
-
|
16
|
-
import pandas as pd
|
17
|
-
from ipywidgets import HTML, Accordion
|
5
|
+
"""ValidMind Tests Module"""
|
18
6
|
|
19
7
|
from ..errors import LoadTestError
|
20
|
-
from ..html_templates.content_blocks import test_content_block_html
|
21
8
|
from ..logging import get_logger
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
25
|
-
|
26
|
-
|
27
|
-
format_dataframe,
|
28
|
-
fuzzy_match,
|
29
|
-
md_to_html,
|
30
|
-
test_id_to_name,
|
31
|
-
)
|
32
|
-
from ..vm_models import TestContext, TestInput
|
33
|
-
from .__types__ import TestID
|
34
|
-
from .decorator import tags, tasks
|
35
|
-
from .decorator import test as test_decorator
|
36
|
-
from .test_providers import LocalTestProvider, TestProvider
|
9
|
+
from .decorator import metric, tags, tasks, test
|
10
|
+
from .load import describe_test, list_tests, load_test
|
11
|
+
from .metadata import list_tags, list_tasks, list_tasks_and_tags
|
12
|
+
from .run import run_test
|
13
|
+
from .test_providers import LocalTestProvider, TestProvider, register_test_provider
|
37
14
|
|
38
15
|
logger = get_logger(__name__)
|
39
16
|
|
@@ -45,449 +22,18 @@ __all__ = [
|
|
45
22
|
"list_tests",
|
46
23
|
"load_test",
|
47
24
|
"describe_test",
|
25
|
+
"run_test",
|
48
26
|
"register_test_provider",
|
49
27
|
"LoadTestError",
|
50
28
|
"LocalTestProvider",
|
29
|
+
"TestProvider",
|
30
|
+
# Metadata
|
31
|
+
"list_tags",
|
32
|
+
"list_tasks",
|
33
|
+
"list_tasks_and_tags",
|
51
34
|
# Decorators for functional metrics
|
52
|
-
"
|
35
|
+
"test",
|
36
|
+
"metric", # DEPRECATED
|
53
37
|
"tags",
|
54
38
|
"tasks",
|
55
39
|
]
|
56
|
-
|
57
|
-
__tests = None
|
58
|
-
__test_classes = None
|
59
|
-
|
60
|
-
__test_providers: Dict[str, TestProvider] = {}
|
61
|
-
__custom_tests: Dict[str, object] = {}
|
62
|
-
|
63
|
-
|
64
|
-
def _test_description(test_class, truncate=True):
|
65
|
-
description = inspect.getdoc(test_class).strip()
|
66
|
-
|
67
|
-
if truncate and len(description.split("\n")) > 5:
|
68
|
-
return description.strip().split("\n")[0] + "..."
|
69
|
-
|
70
|
-
return description
|
71
|
-
|
72
|
-
|
73
|
-
def _load_tests(test_ids):
|
74
|
-
global __test_classes
|
75
|
-
|
76
|
-
if __test_classes is None:
|
77
|
-
__test_classes = {}
|
78
|
-
for test_id in test_ids:
|
79
|
-
__test_classes[test_id] = load_test(test_id)
|
80
|
-
|
81
|
-
|
82
|
-
def _pretty_list_tests(tests, truncate=True):
|
83
|
-
_load_tests(tests)
|
84
|
-
|
85
|
-
table = [
|
86
|
-
{
|
87
|
-
"ID": test_id,
|
88
|
-
"Name": test_id_to_name(test_id),
|
89
|
-
"Description": _test_description(__test_classes[test_id], truncate),
|
90
|
-
"Required Inputs": __test_classes[test_id].required_inputs,
|
91
|
-
"Params": __test_classes[test_id].default_params or {},
|
92
|
-
}
|
93
|
-
for test_id in tests
|
94
|
-
]
|
95
|
-
|
96
|
-
return format_dataframe(pd.DataFrame(table))
|
97
|
-
|
98
|
-
|
99
|
-
def _initialize_test_classes():
|
100
|
-
"""
|
101
|
-
Initialize and populate the __test_classes global variable.
|
102
|
-
"""
|
103
|
-
global __test_classes
|
104
|
-
|
105
|
-
if __test_classes is None:
|
106
|
-
__test_classes = {}
|
107
|
-
for path in Path(__file__).parent.glob("**/*.py"):
|
108
|
-
if path.name.startswith("__") or not path.name[0].isupper():
|
109
|
-
continue # skip special files and non-class files
|
110
|
-
test_id = path.stem # or any other way to define test_id
|
111
|
-
__test_classes[test_id] = load_test(
|
112
|
-
test_id
|
113
|
-
) # Assuming a function load_test exists
|
114
|
-
|
115
|
-
|
116
|
-
def list_tags():
|
117
|
-
"""
|
118
|
-
List unique tags from all test classes.
|
119
|
-
"""
|
120
|
-
_initialize_test_classes()
|
121
|
-
|
122
|
-
unique_tags = set()
|
123
|
-
|
124
|
-
for test_class in __test_classes.values():
|
125
|
-
if hasattr(test_class, "metadata") and "tags" in test_class.metadata:
|
126
|
-
for tag in test_class.metadata["tags"]:
|
127
|
-
unique_tags.add(tag)
|
128
|
-
|
129
|
-
return list(unique_tags)
|
130
|
-
|
131
|
-
|
132
|
-
def list_tasks_and_tags():
|
133
|
-
"""
|
134
|
-
List all task types and their associated tags, with one row per task type and
|
135
|
-
all tags for a task type in one row.
|
136
|
-
|
137
|
-
Returns:
|
138
|
-
pandas.DataFrame: A DataFrame with 'Task Type' and concatenated 'Tags'.
|
139
|
-
"""
|
140
|
-
_initialize_test_classes()
|
141
|
-
task_tags_dict = {}
|
142
|
-
|
143
|
-
for test_class in __test_classes.values():
|
144
|
-
if hasattr(test_class, "metadata"):
|
145
|
-
task_types = test_class.metadata.get("task_types", [])
|
146
|
-
tags = test_class.metadata.get("tags", [])
|
147
|
-
|
148
|
-
for task_type in task_types:
|
149
|
-
if task_type not in task_tags_dict:
|
150
|
-
task_tags_dict[task_type] = set()
|
151
|
-
task_tags_dict[task_type].update(tags)
|
152
|
-
|
153
|
-
# Convert the dictionary into a DataFrame
|
154
|
-
task_tags_data = [
|
155
|
-
{"Task Type": task_type, "Tags": ", ".join(tags)}
|
156
|
-
for task_type, tags in task_tags_dict.items()
|
157
|
-
]
|
158
|
-
return format_dataframe(pd.DataFrame(task_tags_data))
|
159
|
-
|
160
|
-
|
161
|
-
def list_task_types():
|
162
|
-
"""
|
163
|
-
List unique task types from all test classes.
|
164
|
-
"""
|
165
|
-
_initialize_test_classes()
|
166
|
-
|
167
|
-
unique_task_types = set()
|
168
|
-
|
169
|
-
for test_class in __test_classes.values():
|
170
|
-
if hasattr(test_class, "metadata") and "task_types" in test_class.metadata:
|
171
|
-
for task_type in test_class.metadata["task_types"]:
|
172
|
-
unique_task_types.add(task_type)
|
173
|
-
|
174
|
-
return list(unique_task_types)
|
175
|
-
|
176
|
-
|
177
|
-
def list_tests(filter=None, task=None, tags=None, pretty=True, truncate=True):
|
178
|
-
"""List all tests in the tests directory.
|
179
|
-
|
180
|
-
Args:
|
181
|
-
filter (str, optional): Find tests where the ID, task_type or tags match the
|
182
|
-
filter string. Defaults to None.
|
183
|
-
task (str, optional): Find tests that match the task type. Can be used to
|
184
|
-
narrow down matches from the filter string. Defaults to None.
|
185
|
-
tags (list, optional): Find tests that match list of tags. Can be used to
|
186
|
-
narrow down matches from the filter string. Defaults to None.
|
187
|
-
pretty (bool, optional): If True, returns a pandas DataFrame with a
|
188
|
-
formatted table. Defaults to True.
|
189
|
-
truncate (bool, optional): If True, truncates the test description to the first
|
190
|
-
line. Defaults to True. (only used if pretty=True)
|
191
|
-
|
192
|
-
Returns:
|
193
|
-
list or pandas.DataFrame: A list of all tests or a formatted table.
|
194
|
-
"""
|
195
|
-
global __tests
|
196
|
-
|
197
|
-
if __tests is None:
|
198
|
-
__tests = []
|
199
|
-
|
200
|
-
directories = [p.name for p in Path(__file__).parent.iterdir() if p.is_dir()]
|
201
|
-
|
202
|
-
for d in directories:
|
203
|
-
for path in Path(__file__).parent.joinpath(d).glob("**/**/*.py"):
|
204
|
-
if path.name.startswith("__") or not path.name[0].isupper():
|
205
|
-
continue # skip __init__.py and other special files as well as non Test files
|
206
|
-
|
207
|
-
test_id = (
|
208
|
-
f"validmind.{d}.{path.parent.stem}.{path.stem}"
|
209
|
-
if path.parent.parent.stem == d
|
210
|
-
else f"validmind.{d}.{path.stem}"
|
211
|
-
)
|
212
|
-
__tests.append(test_id)
|
213
|
-
|
214
|
-
tests = __tests
|
215
|
-
|
216
|
-
# first filter by the filter string since it's the most general search
|
217
|
-
if filter is not None:
|
218
|
-
_load_tests(tests)
|
219
|
-
|
220
|
-
matched_by_id = [
|
221
|
-
test_id for test_id in tests if filter.lower() in test_id.lower()
|
222
|
-
]
|
223
|
-
matched_by_task = [
|
224
|
-
test_id
|
225
|
-
for test_id in tests
|
226
|
-
if hasattr(__test_classes[test_id], "metadata")
|
227
|
-
and any(
|
228
|
-
filter.lower() in task.lower()
|
229
|
-
for task in __test_classes[test_id].metadata["task_types"]
|
230
|
-
)
|
231
|
-
]
|
232
|
-
matched_by_tags = [
|
233
|
-
test_id
|
234
|
-
for test_id in tests
|
235
|
-
if hasattr(__test_classes[test_id], "metadata")
|
236
|
-
and any(
|
237
|
-
fuzzy_match(tag, filter.lower())
|
238
|
-
for tag in __test_classes[test_id].metadata["tags"]
|
239
|
-
)
|
240
|
-
]
|
241
|
-
|
242
|
-
tests = list(set(matched_by_id + matched_by_task + matched_by_tags))
|
243
|
-
|
244
|
-
# then filter by task type and tags since they are more specific
|
245
|
-
if task is not None:
|
246
|
-
_load_tests(tests)
|
247
|
-
|
248
|
-
tests = [
|
249
|
-
test_id
|
250
|
-
for test_id in tests
|
251
|
-
if hasattr(__test_classes[test_id], "metadata")
|
252
|
-
and task in __test_classes[test_id].metadata["task_types"]
|
253
|
-
]
|
254
|
-
|
255
|
-
if tags is not None:
|
256
|
-
_load_tests(tests)
|
257
|
-
|
258
|
-
tests = [
|
259
|
-
test_id
|
260
|
-
for test_id in tests
|
261
|
-
if hasattr(__test_classes[test_id], "metadata")
|
262
|
-
and all(tag in __test_classes[test_id].metadata["tags"] for tag in tags)
|
263
|
-
]
|
264
|
-
|
265
|
-
if pretty:
|
266
|
-
return _pretty_list_tests(tests, truncate=truncate)
|
267
|
-
|
268
|
-
return tests
|
269
|
-
|
270
|
-
|
271
|
-
def _load_validmind_test(test_id, reload=False):
|
272
|
-
parts = test_id.split(":")[0].split(".")
|
273
|
-
|
274
|
-
test_module = ".".join(parts[1:-1])
|
275
|
-
test_class = parts[-1]
|
276
|
-
|
277
|
-
error = None
|
278
|
-
test = None
|
279
|
-
|
280
|
-
try:
|
281
|
-
full_path = f"validmind.tests.{test_module}.{test_class}"
|
282
|
-
|
283
|
-
if reload and full_path in sys.modules:
|
284
|
-
module = importlib.reload(sys.modules[full_path])
|
285
|
-
else:
|
286
|
-
module = importlib.import_module(full_path)
|
287
|
-
|
288
|
-
test = getattr(module, test_class)
|
289
|
-
except ModuleNotFoundError as e:
|
290
|
-
error = f"Unable to load test {test_id}. {e}"
|
291
|
-
except AttributeError:
|
292
|
-
error = f"Unable to load test {test_id}. Test not in module: {test_class}"
|
293
|
-
|
294
|
-
return error, test
|
295
|
-
|
296
|
-
|
297
|
-
def load_test(test_id: str, reload=False):
|
298
|
-
"""Load a test by test ID
|
299
|
-
|
300
|
-
Test IDs are in the format `namespace.path_to_module.TestClassOrFuncName[:result_id]`.
|
301
|
-
The result ID is optional and is used to distinguish between multiple results from the
|
302
|
-
running the same test.
|
303
|
-
|
304
|
-
Args:
|
305
|
-
test_id (str): The test ID in the format `namespace.path_to_module.TestName[:result_id]`
|
306
|
-
reload (bool, optional): Whether to reload the test module. Defaults to False.
|
307
|
-
"""
|
308
|
-
# TODO: we should use a dedicated class for test IDs to handle this consistently
|
309
|
-
test_id, result_id = test_id.split(":", 1) if ":" in test_id else (test_id, None)
|
310
|
-
|
311
|
-
error = None
|
312
|
-
namespace = test_id.split(".", 1)[0]
|
313
|
-
|
314
|
-
# TODO: lets implement an extensible loading system instead of this ugly if/else
|
315
|
-
if test_id in __custom_tests:
|
316
|
-
test = __custom_tests[test_id]
|
317
|
-
|
318
|
-
elif test_id.startswith("validmind.composite_metric"):
|
319
|
-
error, test = load_composite_metric(test_id)
|
320
|
-
|
321
|
-
elif namespace == "validmind":
|
322
|
-
error, test = _load_validmind_test(test_id, reload=reload)
|
323
|
-
|
324
|
-
elif namespace in __test_providers:
|
325
|
-
try:
|
326
|
-
test = __test_providers[namespace].load_test(test_id.split(".", 1)[1])
|
327
|
-
except Exception as e:
|
328
|
-
error = (
|
329
|
-
f"Unable to load test {test_id} from test provider: "
|
330
|
-
f"{__test_providers[namespace]}\n Got Exception: {e}"
|
331
|
-
)
|
332
|
-
|
333
|
-
else:
|
334
|
-
error = f"Unable to load test {test_id}. No test provider found."
|
335
|
-
|
336
|
-
if error:
|
337
|
-
logger.error(error)
|
338
|
-
raise LoadTestError(error)
|
339
|
-
|
340
|
-
if inspect.isfunction(test):
|
341
|
-
# if its a function, we decorate it and then load the class
|
342
|
-
# TODO: simplify this as we move towards all functional metrics
|
343
|
-
# "_" is used here so it doesn't conflict with other test ids
|
344
|
-
test_decorator("_")(test)
|
345
|
-
test = __custom_tests["_"]
|
346
|
-
|
347
|
-
test.test_id = f"{test_id}:{result_id}" if result_id else test_id
|
348
|
-
|
349
|
-
return test
|
350
|
-
|
351
|
-
|
352
|
-
def describe_test(test_id: TestID = None, raw: bool = False, show: bool = True):
|
353
|
-
"""Get or show details about the test
|
354
|
-
|
355
|
-
This function can be used to see test details including the test name, description,
|
356
|
-
required inputs and default params. It can also be used to get a dictionary of the
|
357
|
-
above information for programmatic use.
|
358
|
-
|
359
|
-
Args:
|
360
|
-
test_id (str, optional): The test ID. Defaults to None.
|
361
|
-
raw (bool, optional): If True, returns a dictionary with the test details.
|
362
|
-
Defaults to False.
|
363
|
-
"""
|
364
|
-
test = load_test(test_id)
|
365
|
-
|
366
|
-
details = {
|
367
|
-
"ID": test_id,
|
368
|
-
"Name": test_id_to_name(test_id),
|
369
|
-
"Required Inputs": test.required_inputs,
|
370
|
-
"Params": test.default_params or {},
|
371
|
-
"Description": inspect.getdoc(test).strip() or "",
|
372
|
-
}
|
373
|
-
|
374
|
-
if raw:
|
375
|
-
return details
|
376
|
-
|
377
|
-
html = test_content_block_html.format(
|
378
|
-
test_id=test_id,
|
379
|
-
uuid=str(uuid4()),
|
380
|
-
title=f'{details["Name"]}',
|
381
|
-
description=md_to_html(details["Description"].strip()),
|
382
|
-
required_inputs=", ".join(details["Required Inputs"] or ["None"]),
|
383
|
-
params_table="\n".join(
|
384
|
-
[
|
385
|
-
f"<tr><td>{param}</td><td>{pformat(value, indent=4)}</td></tr>"
|
386
|
-
for param, value in details["Params"].items()
|
387
|
-
]
|
388
|
-
),
|
389
|
-
table_display="table" if details["Params"] else "none",
|
390
|
-
example_inputs=json.dumps(
|
391
|
-
{name: f"my_vm_{name}" for name in (details["Required Inputs"] or [])},
|
392
|
-
indent=4,
|
393
|
-
),
|
394
|
-
example_params=json.dumps(details["Params"] or {}, indent=4, cls=NumpyEncoder),
|
395
|
-
instructions_display="block" if show else "none",
|
396
|
-
)
|
397
|
-
|
398
|
-
if not show:
|
399
|
-
return html
|
400
|
-
|
401
|
-
display(
|
402
|
-
Accordion(
|
403
|
-
children=[HTML(html)],
|
404
|
-
titles=[f"Test Description: {details['Name']} ('{test_id}')"],
|
405
|
-
)
|
406
|
-
)
|
407
|
-
|
408
|
-
|
409
|
-
def run_test(
|
410
|
-
test_id: TestID = None,
|
411
|
-
name: str = None,
|
412
|
-
unit_metrics: list = None,
|
413
|
-
params: dict = None,
|
414
|
-
inputs=None,
|
415
|
-
output_template=None,
|
416
|
-
show=True,
|
417
|
-
**kwargs,
|
418
|
-
):
|
419
|
-
"""Run a test by test ID
|
420
|
-
|
421
|
-
Args:
|
422
|
-
test_id (str, option): The test ID to run - required when running a single test
|
423
|
-
i.e. when not running multiple unit metrics
|
424
|
-
name (str, optional): The name of the test (used to create a composite metric
|
425
|
-
out of multiple unit metrics) - required when running multiple unit metrics
|
426
|
-
unit_metrics (list, optional): A list of unit metric IDs to run as a composite
|
427
|
-
metric - required when running multiple unit metrics
|
428
|
-
params (dict, optional): A dictionary of params to override the default params
|
429
|
-
inputs: A dictionary of test inputs to pass to the Test
|
430
|
-
output_template (str, optional): A template to use for customizing the output
|
431
|
-
show (bool, optional): Whether to display the results. Defaults to True.
|
432
|
-
**kwargs: Any extra arguments will be passed in via the TestInput object. i.e.:
|
433
|
-
- dataset: A validmind Dataset object or a Pandas DataFrame
|
434
|
-
- model: A model to use for the test
|
435
|
-
- models: A list of models to use for the test
|
436
|
-
other inputs can be accessed inside the test via `self.inputs["input_name"]`
|
437
|
-
"""
|
438
|
-
if not test_id and not name and not unit_metrics:
|
439
|
-
raise ValueError(
|
440
|
-
"`test_id` or `name` and `unit_metrics` must be provided to run a test"
|
441
|
-
)
|
442
|
-
|
443
|
-
if (unit_metrics and not name) or (name and not unit_metrics):
|
444
|
-
raise ValueError("`name` and `unit_metrics` must be provided together")
|
445
|
-
|
446
|
-
if test_id and test_id.startswith("validmind.unit_metrics"):
|
447
|
-
# TODO: as we move towards a more unified approach to metrics
|
448
|
-
# we will want to make everything functional and remove the
|
449
|
-
# separation between unit metrics and "normal" metrics
|
450
|
-
return run_metric(test_id, inputs=inputs, params=params, show=show)
|
451
|
-
|
452
|
-
if unit_metrics:
|
453
|
-
metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
|
454
|
-
test_id = f"validmind.composite_test.{metric_id_name}"
|
455
|
-
|
456
|
-
error, TestClass = load_composite_metric(
|
457
|
-
unit_metrics=unit_metrics, metric_name=metric_id_name
|
458
|
-
)
|
459
|
-
|
460
|
-
if error:
|
461
|
-
raise LoadTestError(error)
|
462
|
-
|
463
|
-
else:
|
464
|
-
TestClass = load_test(test_id, reload=True)
|
465
|
-
|
466
|
-
test = TestClass(
|
467
|
-
test_id=test_id,
|
468
|
-
context=TestContext(),
|
469
|
-
inputs=TestInput({**kwargs, **(inputs or {})}),
|
470
|
-
output_template=output_template,
|
471
|
-
params=params,
|
472
|
-
)
|
473
|
-
|
474
|
-
test.run()
|
475
|
-
|
476
|
-
if show:
|
477
|
-
test.result.show()
|
478
|
-
|
479
|
-
return test.result
|
480
|
-
|
481
|
-
|
482
|
-
def register_test_provider(namespace: str, test_provider: TestProvider) -> None:
|
483
|
-
"""Register an external test provider
|
484
|
-
|
485
|
-
Args:
|
486
|
-
namespace (str): The namespace of the test provider
|
487
|
-
test_provider (TestProvider): The test provider
|
488
|
-
"""
|
489
|
-
__test_providers[namespace] = test_provider
|
490
|
-
|
491
|
-
|
492
|
-
def _register_custom_test(test_id: str, test_class: object):
|
493
|
-
__custom_tests[test_id] = test_class
|
validmind/tests/__types__.py
CHANGED
@@ -18,9 +18,12 @@ TestID = Literal[
|
|
18
18
|
"validmind.prompt_validation.NegativeInstruction",
|
19
19
|
"validmind.prompt_validation.Conciseness",
|
20
20
|
"validmind.prompt_validation.Delimitation",
|
21
|
+
"validmind.model_validation.ModelPredictionResiduals",
|
21
22
|
"validmind.model_validation.BertScore",
|
23
|
+
"validmind.model_validation.TimeSeriesPredictionsPlot",
|
22
24
|
"validmind.model_validation.RegardScore",
|
23
25
|
"validmind.model_validation.BleuScore",
|
26
|
+
"validmind.model_validation.TimeSeriesPredictionWithCI",
|
24
27
|
"validmind.model_validation.RegressionResidualsPlot",
|
25
28
|
"validmind.model_validation.FeaturesAUC",
|
26
29
|
"validmind.model_validation.ContextualRecall",
|
@@ -30,6 +33,8 @@ TestID = Literal[
|
|
30
33
|
"validmind.model_validation.ClusterSizeDistribution",
|
31
34
|
"validmind.model_validation.TokenDisparity",
|
32
35
|
"validmind.model_validation.ToxicityScore",
|
36
|
+
"validmind.model_validation.ModelMetadataComparison",
|
37
|
+
"validmind.model_validation.TimeSeriesR2SquareBySegments",
|
33
38
|
"validmind.model_validation.embeddings.CosineSimilarityComparison",
|
34
39
|
"validmind.model_validation.embeddings.EmbeddingsVisualization2D",
|
35
40
|
"validmind.model_validation.embeddings.StabilityAnalysisRandomNoise",
|
@@ -77,11 +82,14 @@ TestID = Literal[
|
|
77
82
|
"validmind.model_validation.sklearn.RegressionR2Square",
|
78
83
|
"validmind.model_validation.sklearn.RegressionErrors",
|
79
84
|
"validmind.model_validation.sklearn.ClusterPerformance",
|
85
|
+
"validmind.model_validation.sklearn.FeatureImportanceComparison",
|
80
86
|
"validmind.model_validation.sklearn.TrainingTestDegradation",
|
87
|
+
"validmind.model_validation.sklearn.RegressionErrorsComparison",
|
81
88
|
"validmind.model_validation.sklearn.HyperParametersTuning",
|
82
89
|
"validmind.model_validation.sklearn.KMeansClustersOptimization",
|
83
90
|
"validmind.model_validation.sklearn.ModelsPerformanceComparison",
|
84
91
|
"validmind.model_validation.sklearn.WeakspotsDiagnosis",
|
92
|
+
"validmind.model_validation.sklearn.RegressionR2SquareComparison",
|
85
93
|
"validmind.model_validation.sklearn.PopulationStabilityIndex",
|
86
94
|
"validmind.model_validation.sklearn.MinimumAccuracy",
|
87
95
|
"validmind.model_validation.statsmodels.RegressionModelsCoeffs",
|
@@ -118,6 +126,7 @@ TestID = Literal[
|
|
118
126
|
"validmind.data_validation.TabularCategoricalBarPlots",
|
119
127
|
"validmind.data_validation.AutoStationarity",
|
120
128
|
"validmind.data_validation.DescriptiveStatistics",
|
129
|
+
"validmind.data_validation.TimeSeriesDescription",
|
121
130
|
"validmind.data_validation.ANOVAOneWayTable",
|
122
131
|
"validmind.data_validation.TargetRateBarPlots",
|
123
132
|
"validmind.data_validation.PearsonCorrelationMatrix",
|
@@ -154,6 +163,7 @@ TestID = Literal[
|
|
154
163
|
"validmind.data_validation.ClassImbalance",
|
155
164
|
"validmind.data_validation.IQROutliersBarPlot",
|
156
165
|
"validmind.data_validation.DFGLSArch",
|
166
|
+
"validmind.data_validation.TimeSeriesDescriptiveStatistics",
|
157
167
|
"validmind.data_validation.AutoAR",
|
158
168
|
"validmind.data_validation.TabularDateTimeHistograms",
|
159
169
|
"validmind.data_validation.ADF",
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
"""Module for storing loaded tests and test providers"""
|
6
|
+
|
7
|
+
|
8
|
+
def singleton(cls):
|
9
|
+
"""Decorator to make a class a singleton"""
|
10
|
+
instances = {}
|
11
|
+
|
12
|
+
def get_instance(*args, **kwargs):
|
13
|
+
if cls not in instances:
|
14
|
+
instances[cls] = cls(*args, **kwargs)
|
15
|
+
return instances[cls]
|
16
|
+
|
17
|
+
return get_instance
|
18
|
+
|
19
|
+
|
20
|
+
@singleton
|
21
|
+
class TestProviderStore:
|
22
|
+
"""Singleton class for storing test providers"""
|
23
|
+
|
24
|
+
def __init__(self):
|
25
|
+
self.test_providers = {}
|
26
|
+
|
27
|
+
def has_test_provider(self, namespace: str) -> bool:
|
28
|
+
"""Check if a test provider exists by namespace
|
29
|
+
|
30
|
+
Args:
|
31
|
+
namespace (str): The namespace of the test provider
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
bool: True if the test provider exists
|
35
|
+
"""
|
36
|
+
return namespace in self.test_providers
|
37
|
+
|
38
|
+
def get_test_provider(self, namespace: str):
|
39
|
+
"""Get a test provider by namespace
|
40
|
+
|
41
|
+
Args:
|
42
|
+
namespace (str): The namespace of the test provider
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
TestProvider: The test provider
|
46
|
+
"""
|
47
|
+
return self.test_providers.get(namespace)
|
48
|
+
|
49
|
+
def register_test_provider(self, namespace: str, test_provider) -> None:
|
50
|
+
"""Register an external test provider
|
51
|
+
|
52
|
+
Args:
|
53
|
+
namespace (str): The namespace of the test provider
|
54
|
+
test_provider (TestProvider): The test provider
|
55
|
+
"""
|
56
|
+
self.test_providers[namespace] = test_provider
|
57
|
+
|
58
|
+
|
59
|
+
class TestStore:
|
60
|
+
"""Singleton class for storing loaded tests"""
|
61
|
+
|
62
|
+
def __init__(self):
|
63
|
+
self.tests = {}
|
64
|
+
self.custom_tests = {}
|
65
|
+
|
66
|
+
def get_test(self, test_id: str):
|
67
|
+
"""Get a test by test ID
|
68
|
+
|
69
|
+
Args:
|
70
|
+
test_id (str): The test ID
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
object: The test class or function
|
74
|
+
"""
|
75
|
+
return self.tests.get(test_id)
|
76
|
+
|
77
|
+
def get_custom_test(self, test_id: str):
|
78
|
+
"""Get a custom test by test ID
|
79
|
+
|
80
|
+
Args:
|
81
|
+
test_id (str): The test ID
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
object: The test class or function
|
85
|
+
"""
|
86
|
+
return self.custom_tests.get(test_id)
|
87
|
+
|
88
|
+
def get_test_ids(self) -> list:
|
89
|
+
"""Get all registered test IDs"""
|
90
|
+
return list(self.tests.keys())
|
91
|
+
|
92
|
+
def register_test(self, test_id: str, test: object = None):
|
93
|
+
"""Register a test"""
|
94
|
+
self.tests[test_id] = test
|
95
|
+
|
96
|
+
def register_custom_test(self, test_id: str, test: object):
|
97
|
+
"""Register a single one-off custom test"""
|
98
|
+
self.custom_tests[test_id] = test
|
99
|
+
|
100
|
+
|
101
|
+
test_store = TestStore()
|
102
|
+
test_provider_store = TestProviderStore()
|
@@ -50,15 +50,13 @@ class ACFandPACFPlot(Metric):
|
|
50
50
|
|
51
51
|
name = "acf_pacf_plot"
|
52
52
|
required_inputs = ["dataset"]
|
53
|
-
|
54
|
-
|
55
|
-
"
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
],
|
61
|
-
}
|
53
|
+
tasks = ["regression"]
|
54
|
+
tags = [
|
55
|
+
"time_series_data",
|
56
|
+
"forecasting",
|
57
|
+
"statistical_test",
|
58
|
+
"visualization",
|
59
|
+
]
|
62
60
|
|
63
61
|
def run(self):
|
64
62
|
# Check if index is datetime
|