validmind 2.4.10__tar.gz → 2.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {validmind-2.4.10 → validmind-2.5.1}/PKG-INFO +1 -1
- {validmind-2.4.10 → validmind-2.5.1}/pyproject.toml +1 -1
- validmind-2.5.1/validmind/__version__.py +1 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/api_client.py +1 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/client.py +0 -2
- {validmind-2.4.10 → validmind-2.5.1}/validmind/input_registry.py +8 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/__types__.py +4 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/DatasetDescription.py +1 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -6
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +10 -3
- validmind-2.5.1/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +395 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +1 -1
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -37
- validmind-2.5.1/validmind/tests/ongoing_monitoring/FeatureDrift.py +182 -0
- validmind-2.5.1/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +76 -0
- validmind-2.5.1/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +91 -0
- validmind-2.5.1/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +57 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/run.py +35 -19
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/__init__.py +1 -1
- validmind-2.5.1/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +34 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/utils.py +1 -1
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/__init__.py +2 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/dataset/dataset.py +55 -14
- validmind-2.5.1/validmind/vm_models/input.py +31 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/model.py +4 -2
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test_context.py +9 -2
- validmind-2.4.10/validmind/__version__.py +0 -1
- validmind-2.4.10/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -337
- validmind-2.4.10/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +0 -13
- {validmind-2.4.10 → validmind-2.5.1}/LICENSE +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/README.pypi.md +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/ai/test_descriptions.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/ai/utils.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/client_config.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/classification/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/classification/customer_churn.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/classification/taiwan_credit.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/cluster/digits.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/credit_risk/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/credit_risk/lending_club.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/llm/rag/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/llm/rag/rfp.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/nlp/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/california_housing.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/datasets/leanding_club_loan_rates.csv +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/fred.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/fred_timeseries.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/lending_club.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/errors.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/html_templates/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/html_templates/content_blocks.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/logging.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/foundation.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/function.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/huggingface.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/metadata.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/pipeline.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/pytorch.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/r_model.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/models/sklearn.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/template.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/classifier.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/cluster.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/embeddings.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/llm.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/nlp.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/parameters_optimization.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/regression.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/statsmodels_timeseries.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/summarization.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/tabular_datasets.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/text_data.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/test_suites/time_series.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/_store.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/ADF.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/AutoAR.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/AutoMA.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/AutoStationarity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/ClassImbalance.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/DFGLSArch.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/DatasetSplit.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/Duplicates.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/HighCardinality.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/HighPearsonCorrelation.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/KPSS.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/MissingValues.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/PhillipsPerronArch.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/ScatterPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/Skewness.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/SpreadPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesDescription.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/UniqueRows.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/WOEBinTable.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/ZivotAndrewsArch.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Sentiment.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/TextDescription.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Toxicity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/data_validation/nlp/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/decorator.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/load.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/metadata.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/BertScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/BleuScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ContextualRecall.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/FeaturesAUC.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/MeteorScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ModelMetadata.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ModelMetadataComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ModelPredictionResiduals.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/RegardScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/RougeScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/TokenDisparity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ToxicityScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AnswerRelevance.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AspectCritique.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextPrecision.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextRecall.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextRelevancy.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/Faithfulness.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/ragas/utils.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/LJungBox.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RunsTest.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/Bias.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/Clarity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/Conciseness.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/Delimitation.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/NegativeInstruction.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/Robustness.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/Specificity.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/prompt_validation/ai_powered_test.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/test_providers.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/tests/utils.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/Accuracy.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/F1.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/Precision.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/Recall.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/composite.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/dataset/__init__.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/dataset/utils.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/figure.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/metric.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/metric_result.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/output_template.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/result_summary.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/result_wrapper.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/test.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/threshold_test.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test/threshold_test_result.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test_suite/runner.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test_suite/summary.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test_suite/test.py +0 -0
- {validmind-2.4.10 → validmind-2.5.1}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "2.5.1"
|
@@ -48,7 +48,6 @@ def init_dataset(
|
|
48
48
|
index_name: str = None,
|
49
49
|
date_time_index: bool = False,
|
50
50
|
columns: list = None,
|
51
|
-
options: dict = None,
|
52
51
|
text_column: str = None,
|
53
52
|
target_column: str = None,
|
54
53
|
feature_columns: list = None,
|
@@ -72,7 +71,6 @@ def init_dataset(
|
|
72
71
|
Args:
|
73
72
|
dataset : dataset from various python libraries
|
74
73
|
model (VMModel): ValidMind model object
|
75
|
-
options (dict): A dictionary of options for the dataset
|
76
74
|
targets (vm.vm.DatasetTargets): A list of target variables
|
77
75
|
target_column (str): The name of the target column in the dataset
|
78
76
|
feature_columns (list): A list of names of feature columns in the dataset
|
@@ -6,6 +6,8 @@
|
|
6
6
|
Central class to register inputs
|
7
7
|
"""
|
8
8
|
|
9
|
+
from validmind.vm_models.input import VMInput
|
10
|
+
|
9
11
|
from .errors import InvalidInputError
|
10
12
|
|
11
13
|
|
@@ -14,6 +16,12 @@ class InputRegistry:
|
|
14
16
|
self.registry = {}
|
15
17
|
|
16
18
|
def add(self, key, obj):
|
19
|
+
if not isinstance(obj, VMInput):
|
20
|
+
raise InvalidInputError(
|
21
|
+
f"Input object must be an instance of VMInput. "
|
22
|
+
f"Got {type(obj)} instead."
|
23
|
+
)
|
24
|
+
|
17
25
|
self.registry[key] = obj
|
18
26
|
|
19
27
|
def get(self, key):
|
@@ -113,6 +113,10 @@ TestID = Literal[
|
|
113
113
|
"validmind.model_validation.statsmodels.GINITable",
|
114
114
|
"validmind.model_validation.statsmodels.RegressionModelForecastPlot",
|
115
115
|
"validmind.model_validation.statsmodels.DurbinWatsonTest",
|
116
|
+
"validmind.ongoing_monitoring.PredictionCorrelation",
|
117
|
+
"validmind.ongoing_monitoring.PredictionAcrossEachFeature",
|
118
|
+
"validmind.ongoing_monitoring.FeatureDrift",
|
119
|
+
"validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
|
116
120
|
"validmind.data_validation.MissingValuesRisk",
|
117
121
|
"validmind.data_validation.IQROutliersTable",
|
118
122
|
"validmind.data_validation.BivariateFeaturesBarPlots",
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
|
7
|
+
import numpy as np
|
8
8
|
from sklearn.metrics import classification_report, roc_auc_score
|
9
9
|
from sklearn.preprocessing import LabelBinarizer
|
10
10
|
|
@@ -71,7 +71,7 @@ class ClassifierPerformance(Metric):
|
|
71
71
|
When building a multi-class summary we need to calculate weighted average,
|
72
72
|
macro average and per class metrics.
|
73
73
|
"""
|
74
|
-
classes = {str(i) for i in unique(self.inputs.dataset.y)}
|
74
|
+
classes = {str(i) for i in np.unique(self.inputs.dataset.y)}
|
75
75
|
pr_f1_table = [
|
76
76
|
{
|
77
77
|
"Class": class_name,
|
@@ -126,9 +126,18 @@ class ClassifierPerformance(Metric):
|
|
126
126
|
output_dict=True,
|
127
127
|
zero_division=0,
|
128
128
|
)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
)
|
129
|
+
|
130
|
+
y_true = self.inputs.dataset.y
|
131
|
+
|
132
|
+
if len(np.unique(y_true)) > 2:
|
133
|
+
y_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
134
|
+
y_true = y_true.astype(y_pred.dtype)
|
135
|
+
roc_auc = self.multiclass_roc_auc_score(y_true, y_pred)
|
136
|
+
else:
|
137
|
+
y_prob = self.inputs.dataset.y_prob(self.inputs.model)
|
138
|
+
y_true = y_true.astype(y_prob.dtype).flatten()
|
139
|
+
roc_auc = roc_auc_score(y_true, y_prob)
|
140
|
+
|
141
|
+
report["roc_auc"] = roc_auc
|
133
142
|
|
134
143
|
return self.cache_results(report)
|
{validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClusterPerformance.py
RENAMED
@@ -57,7 +57,7 @@ class ClusterPerformance(Metric):
|
|
57
57
|
"model_performance",
|
58
58
|
]
|
59
59
|
|
60
|
-
def
|
60
|
+
def cluster_performance_metrics(
|
61
61
|
self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
|
62
62
|
):
|
63
63
|
y_true_train = y_true_train.astype(y_pred_train.dtype).flatten()
|
@@ -107,7 +107,7 @@ class ClusterPerformance(Metric):
|
|
107
107
|
y_true_test = y_true_test.astype(class_pred_test.dtype)
|
108
108
|
|
109
109
|
samples = ["train", "test"]
|
110
|
-
results = self.
|
110
|
+
results = self.cluster_performance_metrics(
|
111
111
|
y_true_train,
|
112
112
|
class_pred_train,
|
113
113
|
y_true_test,
|
{validmind-2.4.10 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py
RENAMED
@@ -5,6 +5,7 @@
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
from typing import List
|
7
7
|
|
8
|
+
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
from sklearn import metrics, preprocessing
|
10
11
|
|
@@ -99,9 +100,15 @@ class MinimumROCAUCScore(ThresholdTest):
|
|
99
100
|
|
100
101
|
def run(self):
|
101
102
|
y_true = self.inputs.dataset.y
|
102
|
-
|
103
|
-
|
104
|
-
|
103
|
+
|
104
|
+
if len(np.unique(y_true)) > 2:
|
105
|
+
class_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
106
|
+
y_true = y_true.astype(class_pred.dtype)
|
107
|
+
roc_auc = self.multiclass_roc_auc_score(y_true, class_pred)
|
108
|
+
else:
|
109
|
+
y_prob = self.inputs.dataset.y_prob(self.inputs.model)
|
110
|
+
y_true = y_true.astype(y_prob.dtype).flatten()
|
111
|
+
roc_auc = metrics.roc_auc_score(y_true, y_prob)
|
105
112
|
|
106
113
|
passed = roc_auc > self.params["min_threshold"]
|
107
114
|
results = [
|
@@ -0,0 +1,395 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import List
|
7
|
+
|
8
|
+
import matplotlib.pyplot as plt
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
import seaborn as sns
|
12
|
+
from sklearn import metrics
|
13
|
+
|
14
|
+
from validmind.logging import get_logger
|
15
|
+
from validmind.vm_models import (
|
16
|
+
Figure,
|
17
|
+
ResultSummary,
|
18
|
+
ResultTable,
|
19
|
+
ResultTableMetadata,
|
20
|
+
ThresholdTest,
|
21
|
+
ThresholdTestResult,
|
22
|
+
VMDataset,
|
23
|
+
VMModel,
|
24
|
+
)
|
25
|
+
|
26
|
+
logger = get_logger(__name__)
|
27
|
+
|
28
|
+
DEFAULT_THRESHOLD = 0.04
|
29
|
+
PERFORMANCE_METRICS = {
|
30
|
+
"accuracy": {
|
31
|
+
"function": metrics.accuracy_score,
|
32
|
+
"is_classification": True,
|
33
|
+
"is_lower_better": False,
|
34
|
+
},
|
35
|
+
"auc": {
|
36
|
+
"function": metrics.roc_auc_score,
|
37
|
+
"is_classification": True,
|
38
|
+
"is_lower_better": False,
|
39
|
+
},
|
40
|
+
"f1": {
|
41
|
+
"function": metrics.f1_score,
|
42
|
+
"is_classification": True,
|
43
|
+
"is_lower_better": False,
|
44
|
+
},
|
45
|
+
"precision": {
|
46
|
+
"function": metrics.precision_score,
|
47
|
+
"is_classification": True,
|
48
|
+
"is_lower_better": False,
|
49
|
+
},
|
50
|
+
"recall": {
|
51
|
+
"function": metrics.recall_score,
|
52
|
+
"is_classification": True,
|
53
|
+
"is_lower_better": False,
|
54
|
+
},
|
55
|
+
"mse": {
|
56
|
+
"function": metrics.mean_squared_error,
|
57
|
+
"is_classification": False,
|
58
|
+
"is_lower_better": True,
|
59
|
+
},
|
60
|
+
"mae": {
|
61
|
+
"function": metrics.mean_absolute_error,
|
62
|
+
"is_classification": False,
|
63
|
+
"is_lower_better": True,
|
64
|
+
},
|
65
|
+
"r2": {
|
66
|
+
"function": metrics.r2_score,
|
67
|
+
"is_classification": False,
|
68
|
+
"is_lower_better": False,
|
69
|
+
},
|
70
|
+
"mape": {
|
71
|
+
"function": metrics.mean_absolute_percentage_error,
|
72
|
+
"is_classification": False,
|
73
|
+
"is_lower_better": True,
|
74
|
+
},
|
75
|
+
}
|
76
|
+
|
77
|
+
|
78
|
+
def _prepare_results(
|
79
|
+
results_train: dict, results_test: dict, metric: str
|
80
|
+
) -> pd.DataFrame:
|
81
|
+
results_train = pd.DataFrame(results_train)
|
82
|
+
results_test = pd.DataFrame(results_test)
|
83
|
+
results = results_train.copy()
|
84
|
+
results.rename(
|
85
|
+
columns={"shape": "training records", f"{metric}": f"training {metric}"},
|
86
|
+
inplace=True,
|
87
|
+
)
|
88
|
+
results[f"test {metric}"] = results_test[metric]
|
89
|
+
|
90
|
+
# Adjust gap calculation based on metric directionality
|
91
|
+
if PERFORMANCE_METRICS[metric]["is_lower_better"]:
|
92
|
+
results["gap"] = results[f"test {metric}"] - results[f"training {metric}"]
|
93
|
+
else:
|
94
|
+
results["gap"] = results[f"training {metric}"] - results[f"test {metric}"]
|
95
|
+
|
96
|
+
return results
|
97
|
+
|
98
|
+
|
99
|
+
def _compute_metrics(
|
100
|
+
results: dict,
|
101
|
+
region: str,
|
102
|
+
df_region: pd.DataFrame,
|
103
|
+
target_column: str,
|
104
|
+
prob_column: str,
|
105
|
+
pred_column: str,
|
106
|
+
feature_column: str,
|
107
|
+
metric: str,
|
108
|
+
is_classification: bool,
|
109
|
+
) -> None:
|
110
|
+
results["slice"].append(str(region))
|
111
|
+
results["shape"].append(df_region.shape[0])
|
112
|
+
results["feature"].append(feature_column)
|
113
|
+
|
114
|
+
# Check if any records
|
115
|
+
if df_region.empty:
|
116
|
+
results[metric].append(0)
|
117
|
+
return
|
118
|
+
|
119
|
+
metric_func = PERFORMANCE_METRICS[metric]["function"]
|
120
|
+
y_true = df_region[target_column].values
|
121
|
+
|
122
|
+
# AUC requires probability scores
|
123
|
+
if is_classification and metric == "auc":
|
124
|
+
# if only one class is present in the data, return 0
|
125
|
+
if len(np.unique(y_true)) == 1:
|
126
|
+
results[metric].append(0)
|
127
|
+
return
|
128
|
+
|
129
|
+
score = metric_func(y_true, df_region[prob_column].values)
|
130
|
+
|
131
|
+
# All other classification metrics
|
132
|
+
elif is_classification:
|
133
|
+
score = metric_func(y_true, df_region[pred_column].values)
|
134
|
+
|
135
|
+
# Regression metrics
|
136
|
+
else:
|
137
|
+
score = metric_func(y_true, df_region[pred_column].values)
|
138
|
+
|
139
|
+
results[metric].append(score)
|
140
|
+
|
141
|
+
|
142
|
+
def _plot_overfit_regions(
|
143
|
+
df: pd.DataFrame, feature_column: str, threshold: float, metric: str
|
144
|
+
) -> plt.Figure:
|
145
|
+
fig, ax = plt.subplots()
|
146
|
+
barplot = sns.barplot(data=df, x="slice", y="gap", ax=ax)
|
147
|
+
ax.tick_params(axis="x", rotation=90)
|
148
|
+
|
149
|
+
# Draw threshold line
|
150
|
+
axhline = ax.axhline(
|
151
|
+
y=threshold,
|
152
|
+
color="red",
|
153
|
+
linestyle="--",
|
154
|
+
linewidth=1,
|
155
|
+
label=f"Cut-Off Threshold: {threshold}",
|
156
|
+
)
|
157
|
+
ax.tick_params(axis="x", labelsize=20)
|
158
|
+
ax.tick_params(axis="y", labelsize=20)
|
159
|
+
|
160
|
+
ax.set_ylabel(f"{metric.upper()} Gap", weight="bold", fontsize=18)
|
161
|
+
ax.set_xlabel("Slice/Segments", weight="bold", fontsize=18)
|
162
|
+
ax.set_title(
|
163
|
+
f"Overfit regions in feature column: {feature_column}",
|
164
|
+
weight="bold",
|
165
|
+
fontsize=20,
|
166
|
+
wrap=True,
|
167
|
+
)
|
168
|
+
|
169
|
+
handles, labels = barplot.get_legend_handles_labels()
|
170
|
+
handles.append(axhline)
|
171
|
+
labels.append(axhline.get_label())
|
172
|
+
|
173
|
+
barplot.legend(
|
174
|
+
handles=handles[:-1],
|
175
|
+
labels=labels,
|
176
|
+
loc="upper center",
|
177
|
+
bbox_to_anchor=(0.5, 0.1),
|
178
|
+
ncol=len(handles),
|
179
|
+
)
|
180
|
+
|
181
|
+
plt.close("all")
|
182
|
+
|
183
|
+
return fig
|
184
|
+
|
185
|
+
|
186
|
+
# TODO: make this a functional test instead of class-based when appropriate
|
187
|
+
# simply have to remove the class and rename this func to OverfitDiagnosis
|
188
|
+
def overfit_diagnosis( # noqa: C901
|
189
|
+
model: VMModel,
|
190
|
+
datasets: List[VMDataset],
|
191
|
+
metric: str = None,
|
192
|
+
cut_off_threshold: float = DEFAULT_THRESHOLD,
|
193
|
+
):
|
194
|
+
"""Identify overfit regions in a model's predictions.
|
195
|
+
|
196
|
+
This test compares the model's performance on training versus test data, grouped by
|
197
|
+
feature columns. It calculates the difference between the training and test performance
|
198
|
+
for each group and identifies regions where the difference exceeds a specified threshold.
|
199
|
+
|
200
|
+
This test works for both classification and regression models and with a variety of
|
201
|
+
performance metrics. By default, it uses the AUC metric for classification models and
|
202
|
+
the MSE metric for regression models. The threshold for identifying overfit regions
|
203
|
+
defaults to 0.04 but should be adjusted based on the specific use case.
|
204
|
+
|
205
|
+
## Inputs
|
206
|
+
- `model` (VMModel): The ValidMind model object to evaluate.
|
207
|
+
- `datasets` (List[VMDataset]): A list of two VMDataset objects where the first dataset
|
208
|
+
is the training data and the second dataset is the test data.
|
209
|
+
|
210
|
+
## Parameters
|
211
|
+
- `metric` (str, optional): The performance metric to use for evaluation. Choose from:
|
212
|
+
'accuracy', 'auc', 'f1', 'precision', 'recall', 'mse', 'mae', 'r2', 'mape'.
|
213
|
+
Defaults to 'auc' for classification models and 'mse' for regression models.
|
214
|
+
- `cut_off_threshold` (float, optional): The threshold for identifying overfit regions.
|
215
|
+
Defaults to 0.04.
|
216
|
+
"""
|
217
|
+
|
218
|
+
# Determine if it's a classification or regression model
|
219
|
+
is_classification = bool(datasets[0].probability_column(model))
|
220
|
+
|
221
|
+
# Set default metric if not provided
|
222
|
+
if metric is None:
|
223
|
+
metric = "auc" if is_classification else "mse"
|
224
|
+
logger.info(
|
225
|
+
f"Using default {'classification' if is_classification else 'regression'} metric: {metric}"
|
226
|
+
)
|
227
|
+
|
228
|
+
if id(cut_off_threshold) == id(DEFAULT_THRESHOLD):
|
229
|
+
logger.info("Using default cut-off threshold of 0.04")
|
230
|
+
|
231
|
+
metric = metric.lower()
|
232
|
+
try:
|
233
|
+
_metric = PERFORMANCE_METRICS[metric.lower()]
|
234
|
+
except KeyError:
|
235
|
+
raise ValueError(
|
236
|
+
f"Invalid metric. Choose from: {', '.join(PERFORMANCE_METRICS.keys())}"
|
237
|
+
)
|
238
|
+
|
239
|
+
if is_classification and not _metric["is_classification"]:
|
240
|
+
raise ValueError(f"Cannot use regression metric ({metric}) for classification.")
|
241
|
+
elif not is_classification and _metric["is_classification"]:
|
242
|
+
raise ValueError(f"Cannot use classification metric ({metric}) for regression.")
|
243
|
+
|
244
|
+
train_df = datasets[0].df
|
245
|
+
test_df = datasets[1].df
|
246
|
+
|
247
|
+
pred_column = f"{datasets[0].target_column}_pred"
|
248
|
+
prob_column = f"{datasets[0].target_column}_prob"
|
249
|
+
|
250
|
+
train_df[pred_column] = datasets[0].y_pred(model)
|
251
|
+
test_df[pred_column] = datasets[1].y_pred(model)
|
252
|
+
|
253
|
+
if is_classification:
|
254
|
+
train_df[prob_column] = datasets[0].y_prob(model)
|
255
|
+
test_df[prob_column] = datasets[1].y_prob(model)
|
256
|
+
|
257
|
+
test_results = []
|
258
|
+
test_figures = []
|
259
|
+
results_headers = ["slice", "shape", "feature", metric]
|
260
|
+
|
261
|
+
for feature_column in datasets[0].feature_columns:
|
262
|
+
bins = 10
|
263
|
+
if feature_column in datasets[0].feature_columns_categorical:
|
264
|
+
bins = len(train_df[feature_column].unique())
|
265
|
+
train_df["bin"] = pd.cut(train_df[feature_column], bins=bins)
|
266
|
+
|
267
|
+
results_train = {k: [] for k in results_headers}
|
268
|
+
results_test = {k: [] for k in results_headers}
|
269
|
+
|
270
|
+
for region, df_region in train_df.groupby("bin"):
|
271
|
+
_compute_metrics(
|
272
|
+
results=results_train,
|
273
|
+
region=region,
|
274
|
+
df_region=df_region,
|
275
|
+
feature_column=feature_column,
|
276
|
+
target_column=datasets[0].target_column,
|
277
|
+
prob_column=prob_column,
|
278
|
+
pred_column=pred_column,
|
279
|
+
metric=metric,
|
280
|
+
is_classification=is_classification,
|
281
|
+
)
|
282
|
+
df_test_region = test_df[
|
283
|
+
(test_df[feature_column] > region.left)
|
284
|
+
& (test_df[feature_column] <= region.right)
|
285
|
+
]
|
286
|
+
_compute_metrics(
|
287
|
+
results=results_test,
|
288
|
+
region=region,
|
289
|
+
df_region=df_test_region,
|
290
|
+
feature_column=feature_column,
|
291
|
+
target_column=datasets[1].target_column,
|
292
|
+
prob_column=prob_column,
|
293
|
+
pred_column=pred_column,
|
294
|
+
metric=metric,
|
295
|
+
is_classification=is_classification,
|
296
|
+
)
|
297
|
+
|
298
|
+
results = _prepare_results(results_train, results_test, metric)
|
299
|
+
|
300
|
+
fig = _plot_overfit_regions(results, feature_column, cut_off_threshold, metric)
|
301
|
+
test_figures.append(
|
302
|
+
Figure(
|
303
|
+
key=f"overfit_diagnosis:{metric}:{feature_column}",
|
304
|
+
figure=fig,
|
305
|
+
metadata={
|
306
|
+
"metric": metric,
|
307
|
+
"cut_off_threshold": cut_off_threshold,
|
308
|
+
"feature": feature_column,
|
309
|
+
},
|
310
|
+
)
|
311
|
+
)
|
312
|
+
|
313
|
+
for _, row in results[results["gap"] > cut_off_threshold].iterrows():
|
314
|
+
test_results.append(
|
315
|
+
{
|
316
|
+
"Feature": feature_column,
|
317
|
+
"Slice": row["slice"],
|
318
|
+
"Number of Records": row["training records"],
|
319
|
+
f"Training {metric.upper()}": row[f"training {metric}"],
|
320
|
+
f"Test {metric.upper()}": row[f"test {metric}"],
|
321
|
+
"Gap": row["gap"],
|
322
|
+
}
|
323
|
+
)
|
324
|
+
|
325
|
+
return {"Overfit Diagnosis": test_results}, *test_figures
|
326
|
+
|
327
|
+
|
328
|
+
@dataclass
|
329
|
+
class OverfitDiagnosis(ThresholdTest):
|
330
|
+
"""Identify overfit regions in a model's predictions.
|
331
|
+
|
332
|
+
This test compares the model's performance on training versus test data, grouped by
|
333
|
+
feature columns. It calculates the difference between the training and test performance
|
334
|
+
for each group and identifies regions where the difference exceeds a specified threshold.
|
335
|
+
|
336
|
+
This test works for both classification and regression models and with a variety of
|
337
|
+
performance metrics. By default, it uses the AUC metric for classification models and
|
338
|
+
the MSE metric for regression models. The threshold for identifying overfit regions
|
339
|
+
defaults to 0.04 but should be adjusted based on the specific use case.
|
340
|
+
|
341
|
+
## Inputs
|
342
|
+
- `model` (VMModel): The ValidMind model object to evaluate.
|
343
|
+
- `datasets` (List[VMDataset]): A list of two VMDataset objects where the first dataset
|
344
|
+
is the training data and the second dataset is the test data.
|
345
|
+
|
346
|
+
## Parameters
|
347
|
+
- `metric` (str, optional): The performance metric to use for evaluation. Choose from:
|
348
|
+
'accuracy', 'auc', 'f1', 'precision', 'recall', 'mse', 'mae', 'r2', 'mape'.
|
349
|
+
Defaults to 'auc' for classification models and 'mse' for regression models.
|
350
|
+
- `cut_off_threshold` (float, optional): The threshold for identifying overfit regions.
|
351
|
+
Defaults to 0.04.
|
352
|
+
"""
|
353
|
+
|
354
|
+
required_inputs = ["model", "datasets"]
|
355
|
+
default_params = {"metric": None, "cut_off_threshold": DEFAULT_THRESHOLD}
|
356
|
+
tasks = ["classification", "regression"]
|
357
|
+
tags = [
|
358
|
+
"sklearn",
|
359
|
+
"binary_classification",
|
360
|
+
"multiclass_classification",
|
361
|
+
"linear_regression",
|
362
|
+
"model_diagnosis",
|
363
|
+
]
|
364
|
+
|
365
|
+
def run(self):
|
366
|
+
func_result = overfit_diagnosis(
|
367
|
+
self.inputs.model,
|
368
|
+
self.inputs.datasets,
|
369
|
+
metric=self.params["metric"],
|
370
|
+
cut_off_threshold=self.params["cut_off_threshold"],
|
371
|
+
)
|
372
|
+
|
373
|
+
return self.cache_results(
|
374
|
+
test_results_list=[
|
375
|
+
ThresholdTestResult(
|
376
|
+
test_name=self.params["metric"],
|
377
|
+
column=row["Feature"],
|
378
|
+
passed=False,
|
379
|
+
values={k: v for k, v in row.items()},
|
380
|
+
)
|
381
|
+
for row in func_result[0]["Overfit Diagnosis"]
|
382
|
+
],
|
383
|
+
passed=(not func_result[0]["Overfit Diagnosis"]),
|
384
|
+
figures=func_result[1:],
|
385
|
+
)
|
386
|
+
|
387
|
+
def summary(self, results, _):
|
388
|
+
return ResultSummary(
|
389
|
+
results=[
|
390
|
+
ResultTable(
|
391
|
+
data=[result.values for result in results],
|
392
|
+
metadata=ResultTableMetadata(title="Overfit Diagnosis"),
|
393
|
+
)
|
394
|
+
],
|
395
|
+
)
|
@@ -65,7 +65,7 @@ class PrecisionRecallCurve(Metric):
|
|
65
65
|
raise SkipTestError("Skipping PrecisionRecallCurve for Foundation models")
|
66
66
|
|
67
67
|
y_true = self.inputs.dataset.y
|
68
|
-
y_pred = self.inputs.
|
68
|
+
y_pred = self.inputs.dataset.y_prob(self.inputs.model)
|
69
69
|
|
70
70
|
# PR curve is only supported for binary classification
|
71
71
|
if len(np.unique(y_true)) > 2:
|