validmind 2.4.13__tar.gz → 2.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {validmind-2.4.13 → validmind-2.5.2}/PKG-INFO +2 -2
- {validmind-2.4.13 → validmind-2.5.2}/pyproject.toml +2 -2
- validmind-2.5.2/validmind/__version__.py +1 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/__types__.py +4 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -6
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +10 -3
- validmind-2.5.2/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +395 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +1 -1
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -37
- validmind-2.5.2/validmind/tests/ongoing_monitoring/FeatureDrift.py +182 -0
- validmind-2.5.2/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +76 -0
- validmind-2.5.2/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +91 -0
- validmind-2.5.2/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +57 -0
- validmind-2.5.2/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +34 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/utils.py +1 -1
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/dataset/dataset.py +2 -1
- validmind-2.4.13/validmind/__version__.py +0 -1
- validmind-2.4.13/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -337
- validmind-2.4.13/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +0 -13
- {validmind-2.4.13 → validmind-2.5.2}/LICENSE +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/README.pypi.md +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/ai/test_descriptions.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/ai/utils.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/api_client.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/client.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/client_config.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/classification/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/classification/customer_churn.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/classification/taiwan_credit.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/cluster/digits.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/credit_risk/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/credit_risk/lending_club.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/llm/rag/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/llm/rag/rfp.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/nlp/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/california_housing.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/datasets/leanding_club_loan_rates.csv +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/fred.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/fred_timeseries.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/lending_club.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/errors.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/html_templates/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/html_templates/content_blocks.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/input_registry.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/logging.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/foundation.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/function.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/huggingface.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/metadata.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/pipeline.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/pytorch.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/r_model.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/models/sklearn.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/template.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/classifier.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/cluster.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/embeddings.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/llm.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/nlp.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/parameters_optimization.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/regression.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/statsmodels_timeseries.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/summarization.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/tabular_datasets.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/text_data.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/test_suites/time_series.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/_store.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/ADF.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/AutoAR.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/AutoMA.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/AutoStationarity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/ClassImbalance.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/DFGLSArch.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/DatasetDescription.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/DatasetSplit.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/Duplicates.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/HighCardinality.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/HighPearsonCorrelation.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/KPSS.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/MissingValues.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/PhillipsPerronArch.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/ScatterPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/Skewness.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/SpreadPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TimeSeriesDescription.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/UniqueRows.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/WOEBinTable.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/ZivotAndrewsArch.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/Sentiment.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/TextDescription.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/Toxicity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/data_validation/nlp/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/decorator.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/load.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/metadata.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/BertScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/BleuScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ContextualRecall.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/FeaturesAUC.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/MeteorScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ModelMetadata.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ModelMetadataComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ModelPredictionResiduals.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/RegardScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/RougeScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/TokenDisparity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ToxicityScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/AnswerRelevance.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/AspectCritique.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/ContextPrecision.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/ContextRecall.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/ContextRelevancy.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/Faithfulness.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/ragas/utils.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/LJungBox.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/RunsTest.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/Bias.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/Clarity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/Conciseness.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/Delimitation.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/NegativeInstruction.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/Robustness.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/Specificity.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/prompt_validation/ai_powered_test.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/run.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/test_providers.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/tests/utils.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/classification/sklearn/Accuracy.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/classification/sklearn/F1.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/classification/sklearn/Precision.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/classification/sklearn/Recall.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/composite.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/dataset/__init__.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/dataset/utils.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/figure.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/input.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/model.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/metric.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/metric_result.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/output_template.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/result_summary.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/result_wrapper.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/test.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/threshold_test.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test/threshold_test_result.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test_context.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test_suite/runner.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test_suite/summary.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test_suite/test.py +0 -0
- {validmind-2.4.13 → validmind-2.5.2}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.5.2
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
@@ -49,7 +49,7 @@ Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
|
|
49
49
|
Requires-Dist: seaborn
|
50
50
|
Requires-Dist: sentencepiece (>=0.2.0,<0.3.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
|
51
51
|
Requires-Dist: sentry-sdk (>=1.24.0,<2.0.0)
|
52
|
-
Requires-Dist: shap (
|
52
|
+
Requires-Dist: shap (==0.44.1)
|
53
53
|
Requires-Dist: statsmodels
|
54
54
|
Requires-Dist: tabulate (>=0.8.9,<0.9.0)
|
55
55
|
Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
|
@@ -10,7 +10,7 @@ description = "ValidMind Developer Framework"
|
|
10
10
|
license = "Commercial License"
|
11
11
|
name = "validmind"
|
12
12
|
readme = "README.pypi.md"
|
13
|
-
version = "2.
|
13
|
+
version = "2.5.2"
|
14
14
|
|
15
15
|
[tool.poetry.dependencies]
|
16
16
|
python = ">=3.8.1,<3.12"
|
@@ -47,7 +47,7 @@ scorecardpy = "^0.1.9.6"
|
|
47
47
|
seaborn = "*"
|
48
48
|
sentencepiece = {version = "^0.2.0", optional = true}
|
49
49
|
sentry-sdk = "^1.24.0"
|
50
|
-
shap = "
|
50
|
+
shap = "0.44.1"
|
51
51
|
statsmodels = "*"
|
52
52
|
tabulate = "^0.8.9"
|
53
53
|
textblob = "^0.18.0.post0"
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "2.5.2"
|
@@ -113,6 +113,10 @@ TestID = Literal[
|
|
113
113
|
"validmind.model_validation.statsmodels.GINITable",
|
114
114
|
"validmind.model_validation.statsmodels.RegressionModelForecastPlot",
|
115
115
|
"validmind.model_validation.statsmodels.DurbinWatsonTest",
|
116
|
+
"validmind.ongoing_monitoring.PredictionCorrelation",
|
117
|
+
"validmind.ongoing_monitoring.PredictionAcrossEachFeature",
|
118
|
+
"validmind.ongoing_monitoring.FeatureDrift",
|
119
|
+
"validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
|
116
120
|
"validmind.data_validation.MissingValuesRisk",
|
117
121
|
"validmind.data_validation.IQROutliersTable",
|
118
122
|
"validmind.data_validation.BivariateFeaturesBarPlots",
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
|
7
|
+
import numpy as np
|
8
8
|
from sklearn.metrics import classification_report, roc_auc_score
|
9
9
|
from sklearn.preprocessing import LabelBinarizer
|
10
10
|
|
@@ -71,7 +71,7 @@ class ClassifierPerformance(Metric):
|
|
71
71
|
When building a multi-class summary we need to calculate weighted average,
|
72
72
|
macro average and per class metrics.
|
73
73
|
"""
|
74
|
-
classes = {str(i) for i in unique(self.inputs.dataset.y)}
|
74
|
+
classes = {str(i) for i in np.unique(self.inputs.dataset.y)}
|
75
75
|
pr_f1_table = [
|
76
76
|
{
|
77
77
|
"Class": class_name,
|
@@ -126,9 +126,18 @@ class ClassifierPerformance(Metric):
|
|
126
126
|
output_dict=True,
|
127
127
|
zero_division=0,
|
128
128
|
)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
)
|
129
|
+
|
130
|
+
y_true = self.inputs.dataset.y
|
131
|
+
|
132
|
+
if len(np.unique(y_true)) > 2:
|
133
|
+
y_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
134
|
+
y_true = y_true.astype(y_pred.dtype)
|
135
|
+
roc_auc = self.multiclass_roc_auc_score(y_true, y_pred)
|
136
|
+
else:
|
137
|
+
y_prob = self.inputs.dataset.y_prob(self.inputs.model)
|
138
|
+
y_true = y_true.astype(y_prob.dtype).flatten()
|
139
|
+
roc_auc = roc_auc_score(y_true, y_prob)
|
140
|
+
|
141
|
+
report["roc_auc"] = roc_auc
|
133
142
|
|
134
143
|
return self.cache_results(report)
|
{validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/ClusterPerformance.py
RENAMED
@@ -57,7 +57,7 @@ class ClusterPerformance(Metric):
|
|
57
57
|
"model_performance",
|
58
58
|
]
|
59
59
|
|
60
|
-
def
|
60
|
+
def cluster_performance_metrics(
|
61
61
|
self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
|
62
62
|
):
|
63
63
|
y_true_train = y_true_train.astype(y_pred_train.dtype).flatten()
|
@@ -107,7 +107,7 @@ class ClusterPerformance(Metric):
|
|
107
107
|
y_true_test = y_true_test.astype(class_pred_test.dtype)
|
108
108
|
|
109
109
|
samples = ["train", "test"]
|
110
|
-
results = self.
|
110
|
+
results = self.cluster_performance_metrics(
|
111
111
|
y_true_train,
|
112
112
|
class_pred_train,
|
113
113
|
y_true_test,
|
{validmind-2.4.13 → validmind-2.5.2}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py
RENAMED
@@ -5,6 +5,7 @@
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
from typing import List
|
7
7
|
|
8
|
+
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
from sklearn import metrics, preprocessing
|
10
11
|
|
@@ -99,9 +100,15 @@ class MinimumROCAUCScore(ThresholdTest):
|
|
99
100
|
|
100
101
|
def run(self):
|
101
102
|
y_true = self.inputs.dataset.y
|
102
|
-
|
103
|
-
|
104
|
-
|
103
|
+
|
104
|
+
if len(np.unique(y_true)) > 2:
|
105
|
+
class_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
106
|
+
y_true = y_true.astype(class_pred.dtype)
|
107
|
+
roc_auc = self.multiclass_roc_auc_score(y_true, class_pred)
|
108
|
+
else:
|
109
|
+
y_prob = self.inputs.dataset.y_prob(self.inputs.model)
|
110
|
+
y_true = y_true.astype(y_prob.dtype).flatten()
|
111
|
+
roc_auc = metrics.roc_auc_score(y_true, y_prob)
|
105
112
|
|
106
113
|
passed = roc_auc > self.params["min_threshold"]
|
107
114
|
results = [
|
@@ -0,0 +1,395 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import List
|
7
|
+
|
8
|
+
import matplotlib.pyplot as plt
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
import seaborn as sns
|
12
|
+
from sklearn import metrics
|
13
|
+
|
14
|
+
from validmind.logging import get_logger
|
15
|
+
from validmind.vm_models import (
|
16
|
+
Figure,
|
17
|
+
ResultSummary,
|
18
|
+
ResultTable,
|
19
|
+
ResultTableMetadata,
|
20
|
+
ThresholdTest,
|
21
|
+
ThresholdTestResult,
|
22
|
+
VMDataset,
|
23
|
+
VMModel,
|
24
|
+
)
|
25
|
+
|
26
|
+
logger = get_logger(__name__)
|
27
|
+
|
28
|
+
DEFAULT_THRESHOLD = 0.04
|
29
|
+
PERFORMANCE_METRICS = {
|
30
|
+
"accuracy": {
|
31
|
+
"function": metrics.accuracy_score,
|
32
|
+
"is_classification": True,
|
33
|
+
"is_lower_better": False,
|
34
|
+
},
|
35
|
+
"auc": {
|
36
|
+
"function": metrics.roc_auc_score,
|
37
|
+
"is_classification": True,
|
38
|
+
"is_lower_better": False,
|
39
|
+
},
|
40
|
+
"f1": {
|
41
|
+
"function": metrics.f1_score,
|
42
|
+
"is_classification": True,
|
43
|
+
"is_lower_better": False,
|
44
|
+
},
|
45
|
+
"precision": {
|
46
|
+
"function": metrics.precision_score,
|
47
|
+
"is_classification": True,
|
48
|
+
"is_lower_better": False,
|
49
|
+
},
|
50
|
+
"recall": {
|
51
|
+
"function": metrics.recall_score,
|
52
|
+
"is_classification": True,
|
53
|
+
"is_lower_better": False,
|
54
|
+
},
|
55
|
+
"mse": {
|
56
|
+
"function": metrics.mean_squared_error,
|
57
|
+
"is_classification": False,
|
58
|
+
"is_lower_better": True,
|
59
|
+
},
|
60
|
+
"mae": {
|
61
|
+
"function": metrics.mean_absolute_error,
|
62
|
+
"is_classification": False,
|
63
|
+
"is_lower_better": True,
|
64
|
+
},
|
65
|
+
"r2": {
|
66
|
+
"function": metrics.r2_score,
|
67
|
+
"is_classification": False,
|
68
|
+
"is_lower_better": False,
|
69
|
+
},
|
70
|
+
"mape": {
|
71
|
+
"function": metrics.mean_absolute_percentage_error,
|
72
|
+
"is_classification": False,
|
73
|
+
"is_lower_better": True,
|
74
|
+
},
|
75
|
+
}
|
76
|
+
|
77
|
+
|
78
|
+
def _prepare_results(
|
79
|
+
results_train: dict, results_test: dict, metric: str
|
80
|
+
) -> pd.DataFrame:
|
81
|
+
results_train = pd.DataFrame(results_train)
|
82
|
+
results_test = pd.DataFrame(results_test)
|
83
|
+
results = results_train.copy()
|
84
|
+
results.rename(
|
85
|
+
columns={"shape": "training records", f"{metric}": f"training {metric}"},
|
86
|
+
inplace=True,
|
87
|
+
)
|
88
|
+
results[f"test {metric}"] = results_test[metric]
|
89
|
+
|
90
|
+
# Adjust gap calculation based on metric directionality
|
91
|
+
if PERFORMANCE_METRICS[metric]["is_lower_better"]:
|
92
|
+
results["gap"] = results[f"test {metric}"] - results[f"training {metric}"]
|
93
|
+
else:
|
94
|
+
results["gap"] = results[f"training {metric}"] - results[f"test {metric}"]
|
95
|
+
|
96
|
+
return results
|
97
|
+
|
98
|
+
|
99
|
+
def _compute_metrics(
|
100
|
+
results: dict,
|
101
|
+
region: str,
|
102
|
+
df_region: pd.DataFrame,
|
103
|
+
target_column: str,
|
104
|
+
prob_column: str,
|
105
|
+
pred_column: str,
|
106
|
+
feature_column: str,
|
107
|
+
metric: str,
|
108
|
+
is_classification: bool,
|
109
|
+
) -> None:
|
110
|
+
results["slice"].append(str(region))
|
111
|
+
results["shape"].append(df_region.shape[0])
|
112
|
+
results["feature"].append(feature_column)
|
113
|
+
|
114
|
+
# Check if any records
|
115
|
+
if df_region.empty:
|
116
|
+
results[metric].append(0)
|
117
|
+
return
|
118
|
+
|
119
|
+
metric_func = PERFORMANCE_METRICS[metric]["function"]
|
120
|
+
y_true = df_region[target_column].values
|
121
|
+
|
122
|
+
# AUC requires probability scores
|
123
|
+
if is_classification and metric == "auc":
|
124
|
+
# if only one class is present in the data, return 0
|
125
|
+
if len(np.unique(y_true)) == 1:
|
126
|
+
results[metric].append(0)
|
127
|
+
return
|
128
|
+
|
129
|
+
score = metric_func(y_true, df_region[prob_column].values)
|
130
|
+
|
131
|
+
# All other classification metrics
|
132
|
+
elif is_classification:
|
133
|
+
score = metric_func(y_true, df_region[pred_column].values)
|
134
|
+
|
135
|
+
# Regression metrics
|
136
|
+
else:
|
137
|
+
score = metric_func(y_true, df_region[pred_column].values)
|
138
|
+
|
139
|
+
results[metric].append(score)
|
140
|
+
|
141
|
+
|
142
|
+
def _plot_overfit_regions(
|
143
|
+
df: pd.DataFrame, feature_column: str, threshold: float, metric: str
|
144
|
+
) -> plt.Figure:
|
145
|
+
fig, ax = plt.subplots()
|
146
|
+
barplot = sns.barplot(data=df, x="slice", y="gap", ax=ax)
|
147
|
+
ax.tick_params(axis="x", rotation=90)
|
148
|
+
|
149
|
+
# Draw threshold line
|
150
|
+
axhline = ax.axhline(
|
151
|
+
y=threshold,
|
152
|
+
color="red",
|
153
|
+
linestyle="--",
|
154
|
+
linewidth=1,
|
155
|
+
label=f"Cut-Off Threshold: {threshold}",
|
156
|
+
)
|
157
|
+
ax.tick_params(axis="x", labelsize=20)
|
158
|
+
ax.tick_params(axis="y", labelsize=20)
|
159
|
+
|
160
|
+
ax.set_ylabel(f"{metric.upper()} Gap", weight="bold", fontsize=18)
|
161
|
+
ax.set_xlabel("Slice/Segments", weight="bold", fontsize=18)
|
162
|
+
ax.set_title(
|
163
|
+
f"Overfit regions in feature column: {feature_column}",
|
164
|
+
weight="bold",
|
165
|
+
fontsize=20,
|
166
|
+
wrap=True,
|
167
|
+
)
|
168
|
+
|
169
|
+
handles, labels = barplot.get_legend_handles_labels()
|
170
|
+
handles.append(axhline)
|
171
|
+
labels.append(axhline.get_label())
|
172
|
+
|
173
|
+
barplot.legend(
|
174
|
+
handles=handles[:-1],
|
175
|
+
labels=labels,
|
176
|
+
loc="upper center",
|
177
|
+
bbox_to_anchor=(0.5, 0.1),
|
178
|
+
ncol=len(handles),
|
179
|
+
)
|
180
|
+
|
181
|
+
plt.close("all")
|
182
|
+
|
183
|
+
return fig
|
184
|
+
|
185
|
+
|
186
|
+
# TODO: make this a functional test instead of class-based when appropriate
|
187
|
+
# simply have to remove the class and rename this func to OverfitDiagnosis
|
188
|
+
def overfit_diagnosis( # noqa: C901
|
189
|
+
model: VMModel,
|
190
|
+
datasets: List[VMDataset],
|
191
|
+
metric: str = None,
|
192
|
+
cut_off_threshold: float = DEFAULT_THRESHOLD,
|
193
|
+
):
|
194
|
+
"""Identify overfit regions in a model's predictions.
|
195
|
+
|
196
|
+
This test compares the model's performance on training versus test data, grouped by
|
197
|
+
feature columns. It calculates the difference between the training and test performance
|
198
|
+
for each group and identifies regions where the difference exceeds a specified threshold.
|
199
|
+
|
200
|
+
This test works for both classification and regression models and with a variety of
|
201
|
+
performance metrics. By default, it uses the AUC metric for classification models and
|
202
|
+
the MSE metric for regression models. The threshold for identifying overfit regions
|
203
|
+
defaults to 0.04 but should be adjusted based on the specific use case.
|
204
|
+
|
205
|
+
## Inputs
|
206
|
+
- `model` (VMModel): The ValidMind model object to evaluate.
|
207
|
+
- `datasets` (List[VMDataset]): A list of two VMDataset objects where the first dataset
|
208
|
+
is the training data and the second dataset is the test data.
|
209
|
+
|
210
|
+
## Parameters
|
211
|
+
- `metric` (str, optional): The performance metric to use for evaluation. Choose from:
|
212
|
+
'accuracy', 'auc', 'f1', 'precision', 'recall', 'mse', 'mae', 'r2', 'mape'.
|
213
|
+
Defaults to 'auc' for classification models and 'mse' for regression models.
|
214
|
+
- `cut_off_threshold` (float, optional): The threshold for identifying overfit regions.
|
215
|
+
Defaults to 0.04.
|
216
|
+
"""
|
217
|
+
|
218
|
+
# Determine if it's a classification or regression model
|
219
|
+
is_classification = bool(datasets[0].probability_column(model))
|
220
|
+
|
221
|
+
# Set default metric if not provided
|
222
|
+
if metric is None:
|
223
|
+
metric = "auc" if is_classification else "mse"
|
224
|
+
logger.info(
|
225
|
+
f"Using default {'classification' if is_classification else 'regression'} metric: {metric}"
|
226
|
+
)
|
227
|
+
|
228
|
+
if id(cut_off_threshold) == id(DEFAULT_THRESHOLD):
|
229
|
+
logger.info("Using default cut-off threshold of 0.04")
|
230
|
+
|
231
|
+
metric = metric.lower()
|
232
|
+
try:
|
233
|
+
_metric = PERFORMANCE_METRICS[metric.lower()]
|
234
|
+
except KeyError:
|
235
|
+
raise ValueError(
|
236
|
+
f"Invalid metric. Choose from: {', '.join(PERFORMANCE_METRICS.keys())}"
|
237
|
+
)
|
238
|
+
|
239
|
+
if is_classification and not _metric["is_classification"]:
|
240
|
+
raise ValueError(f"Cannot use regression metric ({metric}) for classification.")
|
241
|
+
elif not is_classification and _metric["is_classification"]:
|
242
|
+
raise ValueError(f"Cannot use classification metric ({metric}) for regression.")
|
243
|
+
|
244
|
+
train_df = datasets[0].df
|
245
|
+
test_df = datasets[1].df
|
246
|
+
|
247
|
+
pred_column = f"{datasets[0].target_column}_pred"
|
248
|
+
prob_column = f"{datasets[0].target_column}_prob"
|
249
|
+
|
250
|
+
train_df[pred_column] = datasets[0].y_pred(model)
|
251
|
+
test_df[pred_column] = datasets[1].y_pred(model)
|
252
|
+
|
253
|
+
if is_classification:
|
254
|
+
train_df[prob_column] = datasets[0].y_prob(model)
|
255
|
+
test_df[prob_column] = datasets[1].y_prob(model)
|
256
|
+
|
257
|
+
test_results = []
|
258
|
+
test_figures = []
|
259
|
+
results_headers = ["slice", "shape", "feature", metric]
|
260
|
+
|
261
|
+
for feature_column in datasets[0].feature_columns:
|
262
|
+
bins = 10
|
263
|
+
if feature_column in datasets[0].feature_columns_categorical:
|
264
|
+
bins = len(train_df[feature_column].unique())
|
265
|
+
train_df["bin"] = pd.cut(train_df[feature_column], bins=bins)
|
266
|
+
|
267
|
+
results_train = {k: [] for k in results_headers}
|
268
|
+
results_test = {k: [] for k in results_headers}
|
269
|
+
|
270
|
+
for region, df_region in train_df.groupby("bin"):
|
271
|
+
_compute_metrics(
|
272
|
+
results=results_train,
|
273
|
+
region=region,
|
274
|
+
df_region=df_region,
|
275
|
+
feature_column=feature_column,
|
276
|
+
target_column=datasets[0].target_column,
|
277
|
+
prob_column=prob_column,
|
278
|
+
pred_column=pred_column,
|
279
|
+
metric=metric,
|
280
|
+
is_classification=is_classification,
|
281
|
+
)
|
282
|
+
df_test_region = test_df[
|
283
|
+
(test_df[feature_column] > region.left)
|
284
|
+
& (test_df[feature_column] <= region.right)
|
285
|
+
]
|
286
|
+
_compute_metrics(
|
287
|
+
results=results_test,
|
288
|
+
region=region,
|
289
|
+
df_region=df_test_region,
|
290
|
+
feature_column=feature_column,
|
291
|
+
target_column=datasets[1].target_column,
|
292
|
+
prob_column=prob_column,
|
293
|
+
pred_column=pred_column,
|
294
|
+
metric=metric,
|
295
|
+
is_classification=is_classification,
|
296
|
+
)
|
297
|
+
|
298
|
+
results = _prepare_results(results_train, results_test, metric)
|
299
|
+
|
300
|
+
fig = _plot_overfit_regions(results, feature_column, cut_off_threshold, metric)
|
301
|
+
test_figures.append(
|
302
|
+
Figure(
|
303
|
+
key=f"overfit_diagnosis:{metric}:{feature_column}",
|
304
|
+
figure=fig,
|
305
|
+
metadata={
|
306
|
+
"metric": metric,
|
307
|
+
"cut_off_threshold": cut_off_threshold,
|
308
|
+
"feature": feature_column,
|
309
|
+
},
|
310
|
+
)
|
311
|
+
)
|
312
|
+
|
313
|
+
for _, row in results[results["gap"] > cut_off_threshold].iterrows():
|
314
|
+
test_results.append(
|
315
|
+
{
|
316
|
+
"Feature": feature_column,
|
317
|
+
"Slice": row["slice"],
|
318
|
+
"Number of Records": row["training records"],
|
319
|
+
f"Training {metric.upper()}": row[f"training {metric}"],
|
320
|
+
f"Test {metric.upper()}": row[f"test {metric}"],
|
321
|
+
"Gap": row["gap"],
|
322
|
+
}
|
323
|
+
)
|
324
|
+
|
325
|
+
return {"Overfit Diagnosis": test_results}, *test_figures
|
326
|
+
|
327
|
+
|
328
|
+
@dataclass
|
329
|
+
class OverfitDiagnosis(ThresholdTest):
|
330
|
+
"""Identify overfit regions in a model's predictions.
|
331
|
+
|
332
|
+
This test compares the model's performance on training versus test data, grouped by
|
333
|
+
feature columns. It calculates the difference between the training and test performance
|
334
|
+
for each group and identifies regions where the difference exceeds a specified threshold.
|
335
|
+
|
336
|
+
This test works for both classification and regression models and with a variety of
|
337
|
+
performance metrics. By default, it uses the AUC metric for classification models and
|
338
|
+
the MSE metric for regression models. The threshold for identifying overfit regions
|
339
|
+
defaults to 0.04 but should be adjusted based on the specific use case.
|
340
|
+
|
341
|
+
## Inputs
|
342
|
+
- `model` (VMModel): The ValidMind model object to evaluate.
|
343
|
+
- `datasets` (List[VMDataset]): A list of two VMDataset objects where the first dataset
|
344
|
+
is the training data and the second dataset is the test data.
|
345
|
+
|
346
|
+
## Parameters
|
347
|
+
- `metric` (str, optional): The performance metric to use for evaluation. Choose from:
|
348
|
+
'accuracy', 'auc', 'f1', 'precision', 'recall', 'mse', 'mae', 'r2', 'mape'.
|
349
|
+
Defaults to 'auc' for classification models and 'mse' for regression models.
|
350
|
+
- `cut_off_threshold` (float, optional): The threshold for identifying overfit regions.
|
351
|
+
Defaults to 0.04.
|
352
|
+
"""
|
353
|
+
|
354
|
+
required_inputs = ["model", "datasets"]
|
355
|
+
default_params = {"metric": None, "cut_off_threshold": DEFAULT_THRESHOLD}
|
356
|
+
tasks = ["classification", "regression"]
|
357
|
+
tags = [
|
358
|
+
"sklearn",
|
359
|
+
"binary_classification",
|
360
|
+
"multiclass_classification",
|
361
|
+
"linear_regression",
|
362
|
+
"model_diagnosis",
|
363
|
+
]
|
364
|
+
|
365
|
+
def run(self):
|
366
|
+
func_result = overfit_diagnosis(
|
367
|
+
self.inputs.model,
|
368
|
+
self.inputs.datasets,
|
369
|
+
metric=self.params["metric"],
|
370
|
+
cut_off_threshold=self.params["cut_off_threshold"],
|
371
|
+
)
|
372
|
+
|
373
|
+
return self.cache_results(
|
374
|
+
test_results_list=[
|
375
|
+
ThresholdTestResult(
|
376
|
+
test_name=self.params["metric"],
|
377
|
+
column=row["Feature"],
|
378
|
+
passed=False,
|
379
|
+
values={k: v for k, v in row.items()},
|
380
|
+
)
|
381
|
+
for row in func_result[0]["Overfit Diagnosis"]
|
382
|
+
],
|
383
|
+
passed=(not func_result[0]["Overfit Diagnosis"]),
|
384
|
+
figures=func_result[1:],
|
385
|
+
)
|
386
|
+
|
387
|
+
def summary(self, results, _):
|
388
|
+
return ResultSummary(
|
389
|
+
results=[
|
390
|
+
ResultTable(
|
391
|
+
data=[result.values for result in results],
|
392
|
+
metadata=ResultTableMetadata(title="Overfit Diagnosis"),
|
393
|
+
)
|
394
|
+
],
|
395
|
+
)
|
@@ -65,7 +65,7 @@ class PrecisionRecallCurve(Metric):
|
|
65
65
|
raise SkipTestError("Skipping PrecisionRecallCurve for Foundation models")
|
66
66
|
|
67
67
|
y_true = self.inputs.dataset.y
|
68
|
-
y_pred = self.inputs.
|
68
|
+
y_pred = self.inputs.dataset.y_prob(self.inputs.model)
|
69
69
|
|
70
70
|
# PR curve is only supported for binary classification
|
71
71
|
if len(np.unique(y_true)) > 2:
|