validmind 2.5.8__py3-none-any.whl → 2.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +26 -7
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +3 -13
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +27 -20
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +36 -35
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +35 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/result_wrapper.py +93 -132
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.5.
|
1
|
+
__version__ = "2.5.15"
|
@@ -4,9 +4,11 @@
|
|
4
4
|
|
5
5
|
import os
|
6
6
|
from concurrent.futures import ThreadPoolExecutor
|
7
|
+
from typing import Union
|
7
8
|
|
8
9
|
from validmind.utils import md_to_html
|
9
10
|
|
11
|
+
from ..client_config import client_config
|
10
12
|
from ..logging import get_logger
|
11
13
|
|
12
14
|
__executor = ThreadPoolExecutor()
|
@@ -110,10 +112,11 @@ def generate_description(
|
|
110
112
|
test_id: str,
|
111
113
|
test_description: str,
|
112
114
|
test_summary: str,
|
115
|
+
metric: Union[float, int] = None,
|
113
116
|
figures: list = None,
|
114
117
|
):
|
115
118
|
"""Generate the description for the test results"""
|
116
|
-
if not test_summary and not figures:
|
119
|
+
if not test_summary and not figures and not metric:
|
117
120
|
raise ValueError("No summary or figures provided - cannot generate description")
|
118
121
|
|
119
122
|
# TODO: fix circular import
|
@@ -130,6 +133,13 @@ def generate_description(
|
|
130
133
|
else test_description
|
131
134
|
)
|
132
135
|
|
136
|
+
if metric:
|
137
|
+
metric_summary = f"**Metric Value**: {metric}"
|
138
|
+
if test_summary:
|
139
|
+
test_summary = metric_summary + "\n" + test_summary
|
140
|
+
else:
|
141
|
+
test_summary = metric_summary
|
142
|
+
|
133
143
|
if test_summary:
|
134
144
|
logger.debug(
|
135
145
|
f"Generating description for test {test_name} with stringified summary"
|
@@ -198,11 +208,16 @@ def background_generate_description(
|
|
198
208
|
test_description: str,
|
199
209
|
test_summary: str,
|
200
210
|
figures: list = None,
|
211
|
+
metric: Union[int, float] = None,
|
201
212
|
):
|
202
213
|
def wrapped():
|
203
214
|
try:
|
204
215
|
return generate_description(
|
205
|
-
test_id,
|
216
|
+
test_id=test_id,
|
217
|
+
test_description=test_description,
|
218
|
+
test_summary=test_summary,
|
219
|
+
figures=figures,
|
220
|
+
metric=metric,
|
206
221
|
)
|
207
222
|
except Exception as e:
|
208
223
|
logger.error(f"Failed to generate description: {e}")
|
@@ -217,6 +232,7 @@ def get_description_metadata(
|
|
217
232
|
default_description,
|
218
233
|
summary=None,
|
219
234
|
figures=None,
|
235
|
+
metric=None,
|
220
236
|
prefix="metric_description",
|
221
237
|
should_generate=True,
|
222
238
|
):
|
@@ -238,16 +254,18 @@ def get_description_metadata(
|
|
238
254
|
default_description (str): The default description for the test
|
239
255
|
summary (Any): The test summary or results to interpret
|
240
256
|
figures (List[Figure]): The figures to attach to the test suite result
|
257
|
+
metric (Union[int, float]): Unit metrics attached to the test result
|
241
258
|
prefix (str): The prefix to use for the content ID (Default: "metric_description")
|
242
259
|
should_generate (bool): Whether to generate the description or not (Default: True)
|
243
260
|
|
244
261
|
Returns:
|
245
262
|
dict: The metadata object to be logged with the test results
|
246
263
|
"""
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
264
|
+
# Check the feature flag first, then the environment variable
|
265
|
+
llm_descriptions_enabled = (
|
266
|
+
client_config.can_generate_llm_test_descriptions()
|
267
|
+
and os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") not in ["0", "false"]
|
268
|
+
)
|
251
269
|
|
252
270
|
# TODO: fix circular import
|
253
271
|
from validmind.ai.utils import is_configured
|
@@ -255,7 +273,7 @@ def get_description_metadata(
|
|
255
273
|
if (
|
256
274
|
should_generate
|
257
275
|
and (summary or figures)
|
258
|
-
and
|
276
|
+
and llm_descriptions_enabled
|
259
277
|
and is_configured()
|
260
278
|
):
|
261
279
|
revision_name = AI_REVISION_NAME
|
@@ -267,6 +285,7 @@ def get_description_metadata(
|
|
267
285
|
test_description=default_description,
|
268
286
|
test_summary=summary,
|
269
287
|
figures=figures,
|
288
|
+
metric=metric,
|
270
289
|
)
|
271
290
|
|
272
291
|
else:
|
validmind/api_client.py
CHANGED
@@ -186,12 +186,24 @@ def __ping() -> Dict[str, Any]:
|
|
186
186
|
client_config.project = client_info["project"]
|
187
187
|
client_config.documentation_template = client_info.get("documentation_template", {})
|
188
188
|
client_config.feature_flags = client_info.get("feature_flags", {})
|
189
|
+
client_config.model = client_info.get("model", {})
|
190
|
+
client_config.document_type = client_info.get(
|
191
|
+
"document_type", "model_documentation"
|
192
|
+
)
|
189
193
|
|
190
194
|
if ack_connected:
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
+
if client_config.model:
|
196
|
+
logger.info(
|
197
|
+
f"🎉 Connected to ValidMind!\n"
|
198
|
+
f"📊 Model: {client_config.model.get('name', 'N/A')} "
|
199
|
+
f"(ID: {client_config.model.get('cuid', 'N/A')})\n"
|
200
|
+
f"📁 Document Type: {client_config.document_type}"
|
201
|
+
)
|
202
|
+
else:
|
203
|
+
logger.info(
|
204
|
+
f"Connected to ValidMind... Current Model: {client_config.project['name']}"
|
205
|
+
f" ({client_config.project['cuid']})"
|
206
|
+
)
|
195
207
|
|
196
208
|
|
197
209
|
def reload():
|
@@ -331,32 +343,6 @@ async def log_figures(figures: List[Figure]) -> Dict[str, Any]:
|
|
331
343
|
Returns:
|
332
344
|
dict: The response from the API
|
333
345
|
"""
|
334
|
-
# this actually slows things down - better to log them in parallel
|
335
|
-
# if client_config.can_log_figures(): # check if the backend supports batch logging
|
336
|
-
# try:
|
337
|
-
# data = {}
|
338
|
-
# files = {}
|
339
|
-
# for figure in figures:
|
340
|
-
# data.update(
|
341
|
-
# {f"{k}-{figure.key}": v for k, v in figure.serialize().items()}
|
342
|
-
# )
|
343
|
-
# files.update(
|
344
|
-
# {
|
345
|
-
# f"{k}-{figure.key}": v
|
346
|
-
# for k, v in figure.serialize_files().items()
|
347
|
-
# }
|
348
|
-
# )
|
349
|
-
|
350
|
-
# return await _post(
|
351
|
-
# "log_figures",
|
352
|
-
# data=data,
|
353
|
-
# files=files,
|
354
|
-
# )
|
355
|
-
# except Exception as e:
|
356
|
-
# logger.error("Error logging figures to ValidMind API")
|
357
|
-
# raise e
|
358
|
-
|
359
|
-
# else:
|
360
346
|
return await asyncio.gather(*[log_figure(figure) for figure in figures])
|
361
347
|
|
362
348
|
|
@@ -416,11 +402,11 @@ async def log_metrics(
|
|
416
402
|
Returns:
|
417
403
|
dict: The response from the API
|
418
404
|
"""
|
419
|
-
|
405
|
+
request_params = {}
|
420
406
|
if section_id:
|
421
|
-
|
407
|
+
request_params["section_id"] = section_id
|
422
408
|
if position is not None:
|
423
|
-
|
409
|
+
request_params["position"] = position
|
424
410
|
|
425
411
|
data = []
|
426
412
|
|
@@ -430,7 +416,7 @@ async def log_metrics(
|
|
430
416
|
"inputs": inputs,
|
431
417
|
}
|
432
418
|
|
433
|
-
if output_template
|
419
|
+
if output_template:
|
434
420
|
metric_data["output_template"] = output_template
|
435
421
|
|
436
422
|
data.append(metric_data)
|
@@ -438,7 +424,7 @@ async def log_metrics(
|
|
438
424
|
try:
|
439
425
|
return await _post(
|
440
426
|
"log_metrics",
|
441
|
-
params=
|
427
|
+
params=request_params,
|
442
428
|
data=json.dumps(data, cls=NumpyEncoder, allow_nan=False),
|
443
429
|
)
|
444
430
|
except Exception as e:
|
@@ -469,16 +455,16 @@ async def log_test_result(
|
|
469
455
|
Returns:
|
470
456
|
dict: The response from the API
|
471
457
|
"""
|
472
|
-
|
458
|
+
request_params = {}
|
473
459
|
if section_id:
|
474
|
-
|
460
|
+
request_params["section_id"] = section_id
|
475
461
|
if position is not None:
|
476
|
-
|
462
|
+
request_params["position"] = position
|
477
463
|
|
478
464
|
try:
|
479
465
|
return await _post(
|
480
466
|
"log_test_results",
|
481
|
-
params=
|
467
|
+
params=request_params,
|
482
468
|
data=json.dumps(
|
483
469
|
{
|
484
470
|
**result.serialize(),
|
@@ -503,7 +489,7 @@ def log_test_results(
|
|
503
489
|
|
504
490
|
Args:
|
505
491
|
results (list): A list of ThresholdTestResults objects
|
506
|
-
inputs (list): A list of input
|
492
|
+
inputs (list): A list of input IDs that were used to run the test
|
507
493
|
|
508
494
|
Raises:
|
509
495
|
Exception: If the API call fails
|
@@ -522,11 +508,11 @@ def log_test_results(
|
|
522
508
|
return responses
|
523
509
|
|
524
510
|
|
525
|
-
def log_input(
|
511
|
+
def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
526
512
|
"""Logs input information - internal use for now (don't expose via public API)
|
527
513
|
|
528
514
|
Args:
|
529
|
-
|
515
|
+
input_id (str): The input_id of the input
|
530
516
|
type (str): The type of the input
|
531
517
|
metadata (dict): The metadata of the input
|
532
518
|
|
@@ -542,7 +528,7 @@ def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
542
528
|
"log_input",
|
543
529
|
data=json.dumps(
|
544
530
|
{
|
545
|
-
"name":
|
531
|
+
"name": input_id,
|
546
532
|
"type": type,
|
547
533
|
"metadata": metadata,
|
548
534
|
},
|
@@ -555,6 +541,66 @@ def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
555
541
|
raise e
|
556
542
|
|
557
543
|
|
544
|
+
async def alog_metric(
|
545
|
+
key: str,
|
546
|
+
value: float,
|
547
|
+
inputs: Optional[List[str]] = None,
|
548
|
+
params: Optional[Dict[str, Any]] = None,
|
549
|
+
recorded_at: Optional[str] = None,
|
550
|
+
) -> None:
|
551
|
+
"""See log_metric for details"""
|
552
|
+
if not key or not isinstance(key, str):
|
553
|
+
raise ValueError("`key` must be a non-empty string")
|
554
|
+
|
555
|
+
if not value or not isinstance(value, (int, float)):
|
556
|
+
raise ValueError("`value` must be a scalar (int or float)")
|
557
|
+
|
558
|
+
try:
|
559
|
+
return await _post(
|
560
|
+
"log_unit_metric",
|
561
|
+
data=json.dumps(
|
562
|
+
{
|
563
|
+
"key": key,
|
564
|
+
"value": value,
|
565
|
+
"inputs": inputs or [],
|
566
|
+
"params": params or {},
|
567
|
+
"recorded_at": recorded_at,
|
568
|
+
},
|
569
|
+
cls=NumpyEncoder,
|
570
|
+
allow_nan=False,
|
571
|
+
),
|
572
|
+
)
|
573
|
+
except Exception as e:
|
574
|
+
logger.error("Error logging metric to ValidMind API")
|
575
|
+
raise e
|
576
|
+
|
577
|
+
|
578
|
+
def log_metric(
|
579
|
+
key: str,
|
580
|
+
value: float,
|
581
|
+
inputs: Optional[List[str]] = None,
|
582
|
+
params: Optional[Dict[str, Any]] = None,
|
583
|
+
recorded_at: Optional[str] = None,
|
584
|
+
) -> None:
|
585
|
+
"""Logs a unit metric
|
586
|
+
|
587
|
+
Unit metrics are key-value pairs where the key is the metric name and the value is
|
588
|
+
a scalar (int or float). These key-value pairs are associated with the currently
|
589
|
+
selected model (inventory model in the ValidMind Platform) and keys can be logged
|
590
|
+
to over time to create a history of the metric. On the platform, these metrics
|
591
|
+
will be used to create plots/visualizations for documentation and dashboards etc.
|
592
|
+
|
593
|
+
Args:
|
594
|
+
key (str): The metric key
|
595
|
+
value (float): The metric value
|
596
|
+
inputs (list, optional): A list of input IDs that were used to compute the metric.
|
597
|
+
params (dict, optional): Dictionary of parameters used to compute the metric.
|
598
|
+
recorded_at (str, optional): The timestamp of the metric. Server will use
|
599
|
+
current time if not provided.
|
600
|
+
"""
|
601
|
+
run_async(alog_metric, key, value, inputs, params, recorded_at)
|
602
|
+
|
603
|
+
|
558
604
|
def start_run() -> str:
|
559
605
|
"""Starts a new test run
|
560
606
|
|
validmind/client.py
CHANGED
@@ -164,7 +164,7 @@ def init_dataset(
|
|
164
164
|
|
165
165
|
if __log:
|
166
166
|
log_input(
|
167
|
-
|
167
|
+
input_id=input_id,
|
168
168
|
type="dataset",
|
169
169
|
metadata=get_dataset_info(vm_dataset),
|
170
170
|
)
|
@@ -265,7 +265,7 @@ def init_model(
|
|
265
265
|
|
266
266
|
if __log:
|
267
267
|
log_input(
|
268
|
-
|
268
|
+
input_id=input_id,
|
269
269
|
type="model",
|
270
270
|
metadata=metadata,
|
271
271
|
)
|
validmind/client_config.py
CHANGED
@@ -18,7 +18,9 @@ class ClientConfig:
|
|
18
18
|
"""
|
19
19
|
|
20
20
|
project: object
|
21
|
+
model: object
|
21
22
|
feature_flags: dict
|
23
|
+
document_type: str
|
22
24
|
documentation_template: object
|
23
25
|
running_on_colab: bool = False
|
24
26
|
|
@@ -34,21 +36,16 @@ class ClientConfig:
|
|
34
36
|
except ImportError:
|
35
37
|
self.running_on_colab = False
|
36
38
|
|
37
|
-
def
|
38
|
-
"""
|
39
|
-
|
40
|
-
"""
|
41
|
-
return self.feature_flags.get("generate_json_plots", False)
|
42
|
-
|
43
|
-
def can_log_figures(self):
|
44
|
-
"""Returns True if the client can log figures to the API"""
|
45
|
-
return self.feature_flags.get("log_figures", False)
|
46
|
-
|
47
|
-
def can_log_output_template(self):
|
48
|
-
"""Returns True if the client can log output templates to the API"""
|
49
|
-
return self.feature_flags.get("output_templates", False)
|
39
|
+
def can_generate_llm_test_descriptions(self):
|
40
|
+
"""Returns True if the client can generate LLM based test descriptions"""
|
41
|
+
return self.feature_flags.get("llm_test_descriptions", True)
|
50
42
|
|
51
43
|
|
52
44
|
client_config = ClientConfig(
|
53
|
-
project=None,
|
45
|
+
project=None,
|
46
|
+
model=None,
|
47
|
+
feature_flags={},
|
48
|
+
document_type="model_documentation",
|
49
|
+
documentation_template=None,
|
50
|
+
running_on_colab=False,
|
54
51
|
)
|
@@ -68,65 +68,56 @@ def convert_to_levels(diff_df, original_df, target_column):
|
|
68
68
|
return levels_df
|
69
69
|
|
70
70
|
|
71
|
-
def get_demo_test_config(
|
71
|
+
def get_demo_test_config():
|
72
72
|
|
73
73
|
default_config = {}
|
74
74
|
|
75
75
|
default_config["validmind.data_validation.TimeSeriesDescription"] = {
|
76
|
-
"
|
77
|
-
"dataset":
|
76
|
+
"input_grid": {
|
77
|
+
"dataset": [
|
78
|
+
"raw_ds",
|
79
|
+
"train_diff_ds",
|
80
|
+
"test_diff_ds",
|
81
|
+
"train_ds",
|
82
|
+
"test_ds",
|
83
|
+
],
|
78
84
|
}
|
79
85
|
}
|
80
86
|
default_config["validmind.data_validation.TimeSeriesLinePlot"] = {
|
81
|
-
"
|
82
|
-
"dataset": "raw_ds",
|
87
|
+
"input_grid": {
|
88
|
+
"dataset": ["raw_ds"],
|
83
89
|
}
|
84
90
|
}
|
85
91
|
default_config["validmind.data_validation.TimeSeriesMissingValues"] = {
|
86
|
-
"
|
87
|
-
"dataset":
|
92
|
+
"input_grid": {
|
93
|
+
"dataset": [
|
94
|
+
"raw_ds",
|
95
|
+
"train_diff_ds",
|
96
|
+
"test_diff_ds",
|
97
|
+
"train_ds",
|
98
|
+
"test_ds",
|
99
|
+
],
|
88
100
|
}
|
89
101
|
}
|
90
102
|
default_config["validmind.data_validation.SeasonalDecompose"] = {
|
91
|
-
"
|
92
|
-
"dataset": "raw_ds",
|
103
|
+
"input_grid": {
|
104
|
+
"dataset": ["raw_ds"],
|
93
105
|
}
|
94
106
|
}
|
95
|
-
default_config[
|
96
|
-
"
|
97
|
-
|
98
|
-
"inputs": {
|
99
|
-
"dataset": "train_diff_ds",
|
107
|
+
default_config["validmind.data_validation.TimeSeriesDescriptiveStatistics"] = {
|
108
|
+
"input_grid": {
|
109
|
+
"dataset": ["train_diff_ds", "test_diff_ds"],
|
100
110
|
}
|
101
111
|
}
|
102
|
-
default_config[
|
103
|
-
"
|
104
|
-
|
105
|
-
"inputs": {
|
106
|
-
"dataset": "test_diff_ds",
|
107
|
-
}
|
108
|
-
}
|
109
|
-
default_config["validmind.data_validation.TimeSeriesOutliers:train_diff_data"] = {
|
110
|
-
"inputs": {
|
111
|
-
"dataset": "train_diff_ds",
|
112
|
+
default_config["validmind.data_validation.TimeSeriesOutliers"] = {
|
113
|
+
"input_grid": {
|
114
|
+
"dataset": ["train_diff_ds", "test_diff_ds"],
|
112
115
|
},
|
113
116
|
"params": {"zscore_threshold": 4},
|
114
117
|
}
|
115
|
-
default_config["validmind.data_validation.
|
116
|
-
"
|
117
|
-
"dataset": "test_diff_ds",
|
118
|
-
},
|
119
|
-
"params": {"zscore_threshold": 4},
|
120
|
-
}
|
121
|
-
default_config["validmind.data_validation.TimeSeriesHistogram:train_diff_data"] = {
|
122
|
-
"inputs": {
|
123
|
-
"dataset": "train_diff_ds",
|
124
|
-
},
|
125
|
-
"params": {"nbins": 100},
|
126
|
-
}
|
127
|
-
default_config["validmind.data_validation.TimeSeriesHistogram:test_diff_data"] = {
|
128
|
-
"inputs": {
|
129
|
-
"dataset": "test_diff_ds",
|
118
|
+
default_config["validmind.data_validation.TimeSeriesHistogram"] = {
|
119
|
+
"input_grid": {
|
120
|
+
"dataset": ["train_diff_ds", "test_diff_ds"],
|
130
121
|
},
|
131
122
|
"params": {"nbins": 100},
|
132
123
|
}
|
@@ -135,57 +126,37 @@ def get_demo_test_config(test_suite=None):
|
|
135
126
|
"datasets": ["train_diff_ds", "test_diff_ds"],
|
136
127
|
}
|
137
128
|
}
|
138
|
-
default_config["validmind.model_validation.
|
139
|
-
"
|
140
|
-
"
|
141
|
-
}
|
142
|
-
}
|
143
|
-
default_config[
|
144
|
-
"validmind.model_validation.sklearn.RegressionErrorsComparison:train_data"
|
145
|
-
] = {
|
146
|
-
"inputs": {
|
147
|
-
"datasets": ["train_ds", "train_ds"],
|
148
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
129
|
+
default_config["validmind.model_validation.ModelMetadata"] = {
|
130
|
+
"input_grid": {
|
131
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
149
132
|
}
|
150
133
|
}
|
151
|
-
default_config[
|
152
|
-
"
|
153
|
-
|
154
|
-
|
155
|
-
"datasets": ["test_ds", "test_ds"],
|
156
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
134
|
+
default_config["validmind.model_validation.sklearn.RegressionErrors"] = {
|
135
|
+
"input_grid": {
|
136
|
+
"dataset": ["train_ds", "test_ds"],
|
137
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
157
138
|
}
|
158
139
|
}
|
159
|
-
default_config[
|
160
|
-
"
|
161
|
-
|
162
|
-
|
163
|
-
"datasets": ["train_ds", "train_ds"],
|
164
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
165
|
-
}
|
166
|
-
}
|
167
|
-
default_config[
|
168
|
-
"validmind.model_validation.sklearn.RegressionR2SquareComparison:test_data"
|
169
|
-
] = {
|
170
|
-
"inputs": {
|
171
|
-
"datasets": ["test_ds", "test_ds"],
|
172
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
140
|
+
default_config["validmind.model_validation.sklearn.RegressionR2Square"] = {
|
141
|
+
"input_grid": {
|
142
|
+
"dataset": ["train_ds", "test_ds"],
|
143
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
173
144
|
}
|
174
145
|
}
|
175
146
|
default_config[
|
176
147
|
"validmind.model_validation.TimeSeriesR2SquareBySegments:train_data"
|
177
148
|
] = {
|
178
|
-
"
|
179
|
-
"
|
180
|
-
"
|
149
|
+
"input_grid": {
|
150
|
+
"dataset": ["train_ds"],
|
151
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
181
152
|
}
|
182
153
|
}
|
183
154
|
default_config[
|
184
155
|
"validmind.model_validation.TimeSeriesR2SquareBySegments:test_data"
|
185
156
|
] = {
|
186
|
-
"
|
187
|
-
"
|
188
|
-
"
|
157
|
+
"input_grid": {
|
158
|
+
"dataset": ["test_ds"],
|
159
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
189
160
|
},
|
190
161
|
"params": {
|
191
162
|
"segments": {
|
@@ -194,78 +165,36 @@ def get_demo_test_config(test_suite=None):
|
|
194
165
|
}
|
195
166
|
},
|
196
167
|
}
|
197
|
-
default_config[
|
198
|
-
"
|
199
|
-
|
200
|
-
|
201
|
-
"datasets": ["train_ds", "train_ds"],
|
202
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
203
|
-
}
|
204
|
-
}
|
205
|
-
default_config["validmind.model_validation.TimeSeriesPredictionsPlot:test_data"] = {
|
206
|
-
"inputs": {
|
207
|
-
"datasets": ["test_ds", "test_ds"],
|
208
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
209
|
-
}
|
210
|
-
}
|
211
|
-
default_config[
|
212
|
-
"validmind.model_validation.TimeSeriesPredictionWithCI:random_forests_model"
|
213
|
-
] = {
|
214
|
-
"inputs": {
|
215
|
-
"dataset": "test_ds",
|
216
|
-
"model": "random_forests_model",
|
217
|
-
}
|
218
|
-
}
|
219
|
-
default_config[
|
220
|
-
"validmind.model_validation.TimeSeriesPredictionWithCI:gradient_boosting_model"
|
221
|
-
] = {
|
222
|
-
"inputs": {
|
223
|
-
"dataset": "test_ds",
|
224
|
-
"model": "gradient_boosting_model",
|
225
|
-
}
|
226
|
-
}
|
227
|
-
default_config["validmind.model_validation.ModelPredictionResiduals:train_data"] = {
|
228
|
-
"inputs": {
|
229
|
-
"datasets": ["train_ds", "train_ds"],
|
230
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
168
|
+
default_config["validmind.model_validation.TimeSeriesPredictionsPlot"] = {
|
169
|
+
"input_grid": {
|
170
|
+
"dataset": ["train_ds", "test_ds"],
|
171
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
231
172
|
}
|
232
173
|
}
|
233
|
-
default_config["validmind.model_validation.
|
234
|
-
"
|
235
|
-
"
|
236
|
-
"
|
237
|
-
}
|
238
|
-
}
|
239
|
-
default_config[
|
240
|
-
"validmind.model_validation.sklearn.FeatureImportanceComparison:train_data"
|
241
|
-
] = {
|
242
|
-
"inputs": {
|
243
|
-
"datasets": ["train_ds", "train_ds"],
|
244
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
174
|
+
default_config["validmind.model_validation.TimeSeriesPredictionWithCI"] = {
|
175
|
+
"input_grid": {
|
176
|
+
"dataset": ["train_ds", "test_ds"],
|
177
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
245
178
|
}
|
246
179
|
}
|
247
|
-
default_config[
|
248
|
-
"
|
249
|
-
|
250
|
-
|
251
|
-
"datasets": ["test_ds", "test_ds"],
|
252
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
180
|
+
default_config["validmind.model_validation.ModelPredictionResiduals"] = {
|
181
|
+
"input_grid": {
|
182
|
+
"dataset": ["train_ds", "test_ds"],
|
183
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
253
184
|
}
|
254
185
|
}
|
255
|
-
default_config[
|
256
|
-
"
|
257
|
-
|
258
|
-
|
259
|
-
"dataset": "test_ds",
|
260
|
-
"model": "random_forests_model",
|
186
|
+
default_config["validmind.model_validation.sklearn.FeatureImportance"] = {
|
187
|
+
"input_grid": {
|
188
|
+
"dataset": ["train_ds", "test_ds"],
|
189
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
261
190
|
}
|
262
191
|
}
|
263
192
|
default_config[
|
264
|
-
"validmind.model_validation.sklearn.PermutationFeatureImportance
|
193
|
+
"validmind.model_validation.sklearn.PermutationFeatureImportance"
|
265
194
|
] = {
|
266
|
-
"
|
267
|
-
"dataset": "test_ds",
|
268
|
-
"model": "gradient_boosting_model",
|
195
|
+
"input_grid": {
|
196
|
+
"dataset": ["train_ds", "test_ds"],
|
197
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
269
198
|
}
|
270
199
|
}
|
271
200
|
|
validmind/template.py
CHANGED
@@ -42,7 +42,6 @@ from .time_series import (
|
|
42
42
|
TimeSeriesDataset,
|
43
43
|
TimeSeriesModelValidation,
|
44
44
|
TimeSeriesMultivariate,
|
45
|
-
TimeSeriesSensitivity,
|
46
45
|
TimeSeriesUnivariate,
|
47
46
|
)
|
48
47
|
|
@@ -78,7 +77,6 @@ core_test_suites = {
|
|
78
77
|
TimeSeriesDataset.suite_id: TimeSeriesDataset,
|
79
78
|
TimeSeriesModelValidation.suite_id: TimeSeriesModelValidation,
|
80
79
|
TimeSeriesMultivariate.suite_id: TimeSeriesMultivariate,
|
81
|
-
TimeSeriesSensitivity.suite_id: TimeSeriesSensitivity,
|
82
80
|
TimeSeriesUnivariate.suite_id: TimeSeriesUnivariate,
|
83
81
|
}
|
84
82
|
|