validmind 2.7.5__tar.gz → 2.7.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {validmind-2.7.5 → validmind-2.7.6}/PKG-INFO +2 -2
- {validmind-2.7.5 → validmind-2.7.6}/pyproject.toml +2 -2
- validmind-2.7.6/validmind/__version__.py +1 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/credit_risk/lending_club.py +354 -88
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/HighPearsonCorrelation.py +12 -2
- validmind-2.7.6/validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +218 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +153 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +144 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +146 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +191 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +176 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/FeatureDrift.py +185 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +18 -23
- validmind-2.7.6/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +142 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +202 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +97 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/ROCCurveDrift.py +149 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +210 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +207 -0
- validmind-2.7.6/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +144 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/dataset/dataset.py +0 -4
- validmind-2.7.5/validmind/__version__.py +0 -1
- validmind-2.7.5/validmind/tests/ongoing_monitoring/FeatureDrift.py +0 -186
- validmind-2.7.5/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +0 -101
- validmind-2.7.5/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +0 -67
- {validmind-2.7.5 → validmind-2.7.6}/LICENSE +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/README.pypi.md +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/ai/test_descriptions.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/ai/test_result_description/config.yaml +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/ai/test_result_description/context.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/ai/test_result_description/image_processing.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/ai/test_result_description/system.jinja +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/ai/test_result_description/user.jinja +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/ai/utils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/api_client.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/client.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/client_config.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/classification/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/classification/customer_churn.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/classification/taiwan_credit.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/cluster/digits.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/credit_risk/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/credit_risk/lending_club_bias.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/llm/rag/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/llm/rag/rfp.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/nlp/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/california_housing.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/datasets/leanding_club_loan_rates.csv +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/fred.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/fred_timeseries.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/lending_club.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/errors.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/html_templates/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/html_templates/content_blocks.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/input_registry.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/logging.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/foundation.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/function.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/huggingface.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/metadata.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/pipeline.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/pytorch.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/r_model.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/models/sklearn.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/template.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/classifier.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/cluster.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/embeddings.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/llm.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/nlp.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/parameters_optimization.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/regression.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/statsmodels_timeseries.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/summarization.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/tabular_datasets.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/text_data.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/test_suites/time_series.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/__types__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/_store.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/comparison.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ADF.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/AutoAR.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/AutoMA.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/AutoStationarity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/BoxPierce.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ClassImbalance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/DatasetDescription.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/DatasetSplit.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/DickeyFullerGLS.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/Duplicates.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/HighCardinality.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/JarqueBera.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/KPSS.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/LJungBox.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/MissingValues.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/MutualInformation.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/PhillipsPerronArch.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ProtectedClassesCombination.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ProtectedClassesDescription.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ProtectedClassesDisparity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/RunsTest.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ScatterPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ScoreBandDefaultRates.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ShapiroWilk.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/Skewness.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/SpreadPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TimeSeriesDescription.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/UniqueRows.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/WOEBinTable.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/ZivotAndrewsArch.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/Sentiment.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/TextDescription.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/Toxicity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/nlp/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/decorator.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/load.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/BertScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/BleuScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ContextualRecall.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/FeaturesAUC.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/MeteorScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ModelMetadata.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ModelPredictionResiduals.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/RegardScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/RougeScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/TokenDisparity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ToxicityScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/embeddings/utils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/AspectCritic.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/ContextPrecision.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/ContextRecall.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/Faithfulness.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/NoiseSensitivity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/ResponseRelevancy.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/SemanticSimilarity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/ragas/utils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/CalibrationCurve.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/FeatureImportance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ModelParameters.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/RegressionPerformance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/output.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/Bias.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/Clarity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/Conciseness.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/Delimitation.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/NegativeInstruction.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/Robustness.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/Specificity.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/prompt_validation/ai_powered_test.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/run.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/test_providers.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/tests/utils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/classification/Accuracy.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/classification/F1.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/classification/Precision.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/classification/ROC_AUC.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/classification/Recall.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/AdjustedRSquaredScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/MeanAbsoluteError.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/MeanSquaredError.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/RSquaredScore.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/unit_metrics/regression/RootMeanSquaredError.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/utils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/dataset/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/dataset/utils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/figure.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/input.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/model.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/result/__init__.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/result/result.jinja +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/result/result.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/result/utils.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/test_suite/runner.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/test_suite/summary.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/test_suite/test.py +0 -0
- {validmind-2.7.5 → validmind-2.7.6}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.7.
|
3
|
+
Version: 2.7.6
|
4
4
|
Summary: ValidMind Library
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
@@ -44,7 +44,7 @@ Requires-Dist: python-dotenv
|
|
44
44
|
Requires-Dist: ragas (>=0.2.3) ; extra == "all" or extra == "llm"
|
45
45
|
Requires-Dist: rouge (>=1)
|
46
46
|
Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
|
47
|
-
Requires-Dist: scikit-learn (
|
47
|
+
Requires-Dist: scikit-learn (<1.6.0)
|
48
48
|
Requires-Dist: scipy
|
49
49
|
Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
|
50
50
|
Requires-Dist: seaborn
|
@@ -10,7 +10,7 @@ description = "ValidMind Library"
|
|
10
10
|
license = "Commercial License"
|
11
11
|
name = "validmind"
|
12
12
|
readme = "README.pypi.md"
|
13
|
-
version = "2.7.
|
13
|
+
version = "2.7.6"
|
14
14
|
|
15
15
|
[tool.poetry.dependencies]
|
16
16
|
aiohttp = {extras = ["speedups"], version = "*"}
|
@@ -42,7 +42,7 @@ python-dotenv = "*"
|
|
42
42
|
ragas = {version = ">=0.2.3", optional = true}
|
43
43
|
rouge = ">=1"
|
44
44
|
rpy2 = {version = "^3.5.10", optional = true}
|
45
|
-
scikit-learn = "
|
45
|
+
scikit-learn = "*,<1.6.0"
|
46
46
|
scipy = "*"
|
47
47
|
scorecardpy = "^0.1.9.6"
|
48
48
|
seaborn = "*"
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "2.7.6"
|
@@ -3,13 +3,20 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import os
|
6
|
-
|
6
|
+
import warnings
|
7
|
+
import logging
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import scorecardpy as sc
|
10
11
|
import statsmodels.api as sm
|
12
|
+
|
13
|
+
import xgboost as xgb
|
14
|
+
import validmind as vm
|
15
|
+
|
16
|
+
from sklearn.ensemble import RandomForestClassifier
|
11
17
|
from sklearn.model_selection import train_test_split
|
12
18
|
|
19
|
+
|
13
20
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
14
21
|
dataset_path = os.path.join(current_path, "datasets")
|
15
22
|
|
@@ -95,7 +102,7 @@ score_params = {
|
|
95
102
|
}
|
96
103
|
|
97
104
|
|
98
|
-
def load_data(source="online"):
|
105
|
+
def load_data(source="online", verbose=True):
|
99
106
|
"""
|
100
107
|
Load data from either an online source or offline files, automatically dropping specified columns for offline data.
|
101
108
|
|
@@ -104,28 +111,33 @@ def load_data(source="online"):
|
|
104
111
|
"""
|
105
112
|
|
106
113
|
if source == "online":
|
107
|
-
|
114
|
+
if verbose:
|
115
|
+
print(f"Loading data from an online source: {online_data_file}")
|
108
116
|
df = pd.read_csv(online_data_file)
|
109
|
-
df = _clean_data(df)
|
117
|
+
df = _clean_data(df, verbose=verbose)
|
110
118
|
|
111
119
|
elif source == "offline":
|
112
|
-
|
120
|
+
if verbose:
|
121
|
+
print(f"Loading data from an offline .gz file: {offline_data_file}")
|
113
122
|
# Since we know the offline_data_file path ends with '.zip', we replace it with '.csv.gz'
|
114
123
|
gzip_file_path = offline_data_file.replace(".zip", ".csv.gz")
|
115
|
-
|
124
|
+
if verbose:
|
125
|
+
print(f"Attempting to read from .gz file: {gzip_file_path}")
|
116
126
|
# Read the CSV file directly from the .gz archive
|
117
127
|
df = pd.read_csv(gzip_file_path, compression="gzip")
|
118
|
-
|
128
|
+
if verbose:
|
129
|
+
print("Data loaded successfully.")
|
119
130
|
else:
|
120
131
|
raise ValueError("Invalid source specified. Choose 'online' or 'offline'.")
|
121
132
|
|
122
|
-
|
123
|
-
|
124
|
-
|
133
|
+
if verbose:
|
134
|
+
print(
|
135
|
+
f"Rows: {df.shape[0]}, Columns: {df.shape[1]}, Missing values: {df.isnull().sum().sum()}"
|
136
|
+
)
|
125
137
|
return df
|
126
138
|
|
127
139
|
|
128
|
-
def _clean_data(df):
|
140
|
+
def _clean_data(df, verbose=True):
|
129
141
|
df = df.copy()
|
130
142
|
|
131
143
|
# Drop columns not relevant for application scorecards
|
@@ -133,41 +145,45 @@ def _clean_data(df):
|
|
133
145
|
|
134
146
|
# Drop rows with missing target values
|
135
147
|
df.dropna(subset=[target_column], inplace=True)
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
148
|
+
if verbose:
|
149
|
+
print("Dropping rows with missing target values:")
|
150
|
+
print(
|
151
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
152
|
+
)
|
140
153
|
|
141
154
|
# Drop columns with more than N percent missing values
|
142
155
|
missing_values = df.isnull().mean()
|
143
156
|
df = df.loc[:, missing_values < 0.7]
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
157
|
+
if verbose:
|
158
|
+
print("Dropping columns with more than 70% missing values:")
|
159
|
+
print(
|
160
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
161
|
+
)
|
148
162
|
|
149
163
|
# Drop columns with only one unique value
|
150
164
|
unique_values = df.nunique()
|
151
165
|
df = df.loc[:, unique_values > 1]
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
166
|
+
if verbose:
|
167
|
+
print("Dropping columns with only one unique value:")
|
168
|
+
print(
|
169
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
170
|
+
)
|
156
171
|
|
157
172
|
# Define the target variable for the model, representing loan default status.
|
158
173
|
df[target_column] = df[target_column].map({"Fully Paid": 0, "Charged Off": 1})
|
159
174
|
|
160
175
|
# Drop rows with NaN in target_column after mapping
|
161
176
|
df.dropna(subset=[target_column], inplace=True)
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
177
|
+
if verbose:
|
178
|
+
print("Dropping rows with missing target values:")
|
179
|
+
print(
|
180
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
181
|
+
)
|
166
182
|
|
167
183
|
return df
|
168
184
|
|
169
185
|
|
170
|
-
def preprocess(df):
|
186
|
+
def preprocess(df, verbose=True):
|
171
187
|
df = df.copy()
|
172
188
|
|
173
189
|
# Convert the target variable to integer type for modeling.
|
@@ -175,45 +191,51 @@ def preprocess(df):
|
|
175
191
|
|
176
192
|
# Keep rows where purpose is 'debt_consolidation' or 'credit_card'
|
177
193
|
df = df[df["purpose"].isin(["debt_consolidation", "credit_card"])]
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
194
|
+
if verbose:
|
195
|
+
print("Filtering 'purpose' to 'debt_consolidation' and 'credit_card':")
|
196
|
+
print(
|
197
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
198
|
+
)
|
182
199
|
|
183
200
|
# Remove rows where grade is 'F' or 'G'
|
184
201
|
df = df[~df["grade"].isin(["F", "G"])]
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
202
|
+
if verbose:
|
203
|
+
print("Filtering out 'grade' F and G:")
|
204
|
+
print(
|
205
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
206
|
+
)
|
189
207
|
|
190
208
|
# Remove rows where sub_grade starts with 'F' or 'G'
|
191
209
|
df = df[~df["sub_grade"].str.startswith(("F", "G"))]
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
210
|
+
if verbose:
|
211
|
+
print("Filtering out 'sub_grade' F and G:")
|
212
|
+
print(
|
213
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
214
|
+
)
|
196
215
|
|
197
216
|
# Remove rows where home_ownership is 'OTHER', 'NONE', or 'ANY'
|
198
217
|
df = df[~df["home_ownership"].isin(["OTHER", "NONE", "ANY"])]
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
218
|
+
if verbose:
|
219
|
+
print("Filtering out 'home_ownership' OTHER, NONE, ANY:")
|
220
|
+
print(
|
221
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
222
|
+
)
|
203
223
|
|
204
224
|
# Drop features that are not useful for modeling
|
205
225
|
df.drop(drop_features, axis=1, inplace=True)
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
226
|
+
if verbose:
|
227
|
+
print("Dropping specified features:")
|
228
|
+
print(
|
229
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
230
|
+
)
|
210
231
|
|
211
232
|
# Drop rows with missing values
|
212
233
|
df.dropna(inplace=True)
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
234
|
+
if verbose:
|
235
|
+
print("Dropping rows with any missing values:")
|
236
|
+
print(
|
237
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
238
|
+
)
|
217
239
|
|
218
240
|
# Preprocess emp_length column
|
219
241
|
df = _preprocess_emp_length(df)
|
@@ -260,34 +282,37 @@ def _preprocess_emp_length(df):
|
|
260
282
|
return df
|
261
283
|
|
262
284
|
|
263
|
-
def feature_engineering(df):
|
285
|
+
def feature_engineering(df, verbose=True):
|
264
286
|
df = df.copy()
|
265
287
|
|
266
288
|
# WoE encoding of numerical and categorical features
|
267
|
-
df = woe_encoding(df)
|
289
|
+
df = woe_encoding(df, verbose=verbose)
|
268
290
|
|
269
|
-
|
270
|
-
|
271
|
-
|
291
|
+
if verbose:
|
292
|
+
print(
|
293
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
294
|
+
)
|
272
295
|
|
273
296
|
return df
|
274
297
|
|
275
298
|
|
276
|
-
def woe_encoding(df):
|
299
|
+
def woe_encoding(df, verbose=True):
|
277
300
|
df = df.copy()
|
278
301
|
|
279
|
-
woe = _woebin(df)
|
302
|
+
woe = _woebin(df, verbose=verbose)
|
280
303
|
bins = _woe_to_bins(woe)
|
281
304
|
|
282
305
|
# Make sure we don't transform the target column
|
283
306
|
if target_column in bins:
|
284
307
|
del bins[target_column]
|
285
|
-
|
308
|
+
if verbose:
|
309
|
+
print(f"Excluded {target_column} from WoE transformation.")
|
286
310
|
|
287
311
|
# Apply the WoE transformation
|
288
312
|
df = sc.woebin_ply(df, bins=bins)
|
289
313
|
|
290
|
-
|
314
|
+
if verbose:
|
315
|
+
print("Successfully converted features to WoE values.")
|
291
316
|
|
292
317
|
return df
|
293
318
|
|
@@ -326,7 +351,7 @@ def _woe_to_bins(woe):
|
|
326
351
|
return bins
|
327
352
|
|
328
353
|
|
329
|
-
def _woebin(df):
|
354
|
+
def _woebin(df, verbose=True):
|
330
355
|
"""
|
331
356
|
This function performs automatic binning using WoE.
|
332
357
|
df: A pandas dataframe
|
@@ -337,9 +362,10 @@ def _woebin(df):
|
|
337
362
|
df[non_numeric_cols] = df[non_numeric_cols].astype(str)
|
338
363
|
|
339
364
|
try:
|
340
|
-
|
341
|
-
|
342
|
-
|
365
|
+
if verbose:
|
366
|
+
print(
|
367
|
+
f"Performing binning with breaks_adj: {breaks_adj}"
|
368
|
+
) # print the breaks_adj being used
|
343
369
|
bins = sc.woebin(df, target_column, breaks_list=breaks_adj)
|
344
370
|
except Exception as e:
|
345
371
|
print("Error during binning: ")
|
@@ -355,7 +381,7 @@ def _woebin(df):
|
|
355
381
|
return bins_df
|
356
382
|
|
357
383
|
|
358
|
-
def split(df, validation_size=None, test_size=0.2, add_constant=False):
|
384
|
+
def split(df, validation_size=None, test_size=0.2, add_constant=False, verbose=True):
|
359
385
|
"""
|
360
386
|
Split dataset into train, validation (optional), and test sets.
|
361
387
|
|
@@ -384,15 +410,16 @@ def split(df, validation_size=None, test_size=0.2, add_constant=False):
|
|
384
410
|
train_val_df = sm.add_constant(train_val_df)
|
385
411
|
|
386
412
|
# Print details for two-way split
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
413
|
+
if verbose:
|
414
|
+
print("After splitting the dataset into training and test sets:")
|
415
|
+
print(
|
416
|
+
f"Training Dataset:\nRows: {train_val_df.shape[0]}\nColumns: {train_val_df.shape[1]}\n"
|
417
|
+
f"Missing values: {train_val_df.isnull().sum().sum()}\n"
|
418
|
+
)
|
419
|
+
print(
|
420
|
+
f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\n"
|
421
|
+
f"Missing values: {test_df.isnull().sum().sum()}\n"
|
422
|
+
)
|
396
423
|
|
397
424
|
return train_val_df, test_df
|
398
425
|
|
@@ -407,19 +434,20 @@ def split(df, validation_size=None, test_size=0.2, add_constant=False):
|
|
407
434
|
validation_df = sm.add_constant(validation_df)
|
408
435
|
|
409
436
|
# Print details for three-way split
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
437
|
+
if verbose:
|
438
|
+
print("After splitting the dataset into training, validation, and test sets:")
|
439
|
+
print(
|
440
|
+
f"Training Dataset:\nRows: {train_df.shape[0]}\nColumns: {train_df.shape[1]}\n"
|
441
|
+
f"Missing values: {train_df.isnull().sum().sum()}\n"
|
442
|
+
)
|
443
|
+
print(
|
444
|
+
f"Validation Dataset:\nRows: {validation_df.shape[0]}\nColumns: {validation_df.shape[1]}\n"
|
445
|
+
f"Missing values: {validation_df.isnull().sum().sum()}\n"
|
446
|
+
)
|
447
|
+
print(
|
448
|
+
f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\n"
|
449
|
+
f"Missing values: {test_df.isnull().sum().sum()}\n"
|
450
|
+
)
|
423
451
|
|
424
452
|
return train_df, validation_df, test_df
|
425
453
|
|
@@ -822,3 +850,241 @@ def get_demo_test_config(x_test=None, y_test=None):
|
|
822
850
|
}
|
823
851
|
|
824
852
|
return default_config
|
853
|
+
|
854
|
+
|
855
|
+
def load_scorecard():
|
856
|
+
|
857
|
+
warnings.filterwarnings("ignore")
|
858
|
+
logging.getLogger("scorecardpy").setLevel(logging.ERROR)
|
859
|
+
|
860
|
+
os.environ["VALIDMIND_LLM_DESCRIPTIONS_CONTEXT_ENABLED"] = "1"
|
861
|
+
|
862
|
+
context = """
|
863
|
+
FORMAT FOR THE LLM DESCRIPTIONS:
|
864
|
+
**<Test Name>** is designed to <begin with a concise overview of what the test does and its primary purpose, extracted from the test description>.
|
865
|
+
|
866
|
+
The test operates by <write a paragraph about the test mechanism, explaining how it works and what it measures. Include any relevant formulas or methodologies mentioned in the test description.>
|
867
|
+
|
868
|
+
The primary advantages of this test include <write a paragraph about the test's strengths and capabilities, highlighting what makes it particularly useful for specific scenarios.>
|
869
|
+
|
870
|
+
Users should be aware that <write a paragraph about the test's limitations and potential risks. Include both technical limitations and interpretation challenges. If the test description includes specific signs of high risk, incorporate these here.>
|
871
|
+
|
872
|
+
**Key Insights:**
|
873
|
+
|
874
|
+
The test results reveal:
|
875
|
+
|
876
|
+
- **<insight title>**: <comprehensive description of one aspect of the results>
|
877
|
+
- **<insight title>**: <comprehensive description of another aspect>
|
878
|
+
...
|
879
|
+
|
880
|
+
Based on these results, <conclude with a brief paragraph that ties together the test results with the test's purpose and provides any final recommendations or considerations.>
|
881
|
+
|
882
|
+
ADDITIONAL INSTRUCTIONS:
|
883
|
+
Present insights in order from general to specific, with each insight as a single bullet point with bold title.
|
884
|
+
|
885
|
+
For each metric in the test results, include in the test overview:
|
886
|
+
- The metric's purpose and what it measures
|
887
|
+
- Its mathematical formula
|
888
|
+
- The range of possible values
|
889
|
+
- What constitutes good/bad performance
|
890
|
+
- How to interpret different values
|
891
|
+
|
892
|
+
Each insight should progressively cover:
|
893
|
+
1. Overall scope and distribution
|
894
|
+
2. Complete breakdown of all elements with specific values
|
895
|
+
3. Natural groupings and patterns
|
896
|
+
4. Comparative analysis between datasets/categories
|
897
|
+
5. Stability and variations
|
898
|
+
6. Notable relationships or dependencies
|
899
|
+
|
900
|
+
Remember:
|
901
|
+
- Keep all insights at the same level (no sub-bullets or nested structures)
|
902
|
+
- Make each insight complete and self-contained
|
903
|
+
- Include specific numerical values and ranges
|
904
|
+
- Cover all elements in the results comprehensively
|
905
|
+
- Maintain clear, concise language
|
906
|
+
- Use only "- **Title**: Description" format for insights
|
907
|
+
- Progress naturally from general to specific observations
|
908
|
+
|
909
|
+
""".strip()
|
910
|
+
|
911
|
+
os.environ["VALIDMIND_LLM_DESCRIPTIONS_CONTEXT"] = context
|
912
|
+
|
913
|
+
# Load the data
|
914
|
+
df = load_data(source="offline", verbose=False)
|
915
|
+
preprocess_df = preprocess(df, verbose=False)
|
916
|
+
fe_df = feature_engineering(preprocess_df, verbose=False)
|
917
|
+
|
918
|
+
# Split the data
|
919
|
+
train_df, test_df = split(fe_df, test_size=0.2, verbose=False)
|
920
|
+
|
921
|
+
x_train = train_df.drop(target_column, axis=1)
|
922
|
+
y_train = train_df[target_column]
|
923
|
+
|
924
|
+
x_test = test_df.drop(target_column, axis=1)
|
925
|
+
y_test = test_df[target_column]
|
926
|
+
|
927
|
+
# Define the XGBoost model
|
928
|
+
xgb_model = xgb.XGBClassifier(
|
929
|
+
n_estimators=50, random_state=42, early_stopping_rounds=10
|
930
|
+
)
|
931
|
+
xgb_model.set_params(
|
932
|
+
eval_metric=["error", "logloss", "auc"],
|
933
|
+
)
|
934
|
+
|
935
|
+
# Fit the model
|
936
|
+
xgb_model.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=False)
|
937
|
+
|
938
|
+
# Define the Random Forest model
|
939
|
+
rf_model = RandomForestClassifier(
|
940
|
+
n_estimators=50,
|
941
|
+
random_state=42,
|
942
|
+
)
|
943
|
+
|
944
|
+
# Fit the model
|
945
|
+
rf_model.fit(x_train, y_train)
|
946
|
+
|
947
|
+
# Compute the probabilities
|
948
|
+
train_xgb_prob = xgb_model.predict_proba(x_train)[:, 1]
|
949
|
+
test_xgb_prob = xgb_model.predict_proba(x_test)[:, 1]
|
950
|
+
|
951
|
+
train_rf_prob = rf_model.predict_proba(x_train)[:, 1]
|
952
|
+
test_rf_prob = rf_model.predict_proba(x_test)[:, 1]
|
953
|
+
|
954
|
+
# Compute binary predictions
|
955
|
+
cut_off_threshold = 0.3
|
956
|
+
|
957
|
+
train_xgb_binary_predictions = (train_xgb_prob > cut_off_threshold).astype(int)
|
958
|
+
test_xgb_binary_predictions = (test_xgb_prob > cut_off_threshold).astype(int)
|
959
|
+
|
960
|
+
train_rf_binary_predictions = (train_rf_prob > cut_off_threshold).astype(int)
|
961
|
+
test_rf_binary_predictions = (test_rf_prob > cut_off_threshold).astype(int)
|
962
|
+
|
963
|
+
# Compute credit risk scores
|
964
|
+
train_xgb_scores = compute_scores(train_xgb_prob)
|
965
|
+
test_xgb_scores = compute_scores(test_xgb_prob)
|
966
|
+
|
967
|
+
scorecard = {
|
968
|
+
"df": df,
|
969
|
+
"preprocess_df": preprocess_df,
|
970
|
+
"fe_df": fe_df,
|
971
|
+
"train_df": train_df,
|
972
|
+
"test_df": test_df,
|
973
|
+
"x_test": x_test,
|
974
|
+
"y_test": y_test,
|
975
|
+
"xgb_model": xgb_model,
|
976
|
+
"rf_model": rf_model,
|
977
|
+
"train_xgb_binary_predictions": train_xgb_binary_predictions,
|
978
|
+
"test_xgb_binary_predictions": test_xgb_binary_predictions,
|
979
|
+
"train_xgb_prob": train_xgb_prob,
|
980
|
+
"test_xgb_prob": test_xgb_prob,
|
981
|
+
"train_xgb_scores": train_xgb_scores,
|
982
|
+
"test_xgb_scores": test_xgb_scores,
|
983
|
+
"train_rf_binary_predictions": train_rf_binary_predictions,
|
984
|
+
"test_rf_binary_predictions": test_rf_binary_predictions,
|
985
|
+
"train_rf_prob": train_rf_prob,
|
986
|
+
"test_rf_prob": test_rf_prob,
|
987
|
+
}
|
988
|
+
|
989
|
+
return scorecard
|
990
|
+
|
991
|
+
|
992
|
+
def init_vm_objects(scorecard):
|
993
|
+
|
994
|
+
df = scorecard["df"]
|
995
|
+
preprocess_df = scorecard["preprocess_df"]
|
996
|
+
fe_df = scorecard["fe_df"]
|
997
|
+
train_df = scorecard["train_df"]
|
998
|
+
test_df = scorecard["test_df"]
|
999
|
+
xgb_model = scorecard["xgb_model"]
|
1000
|
+
rf_model = scorecard["rf_model"]
|
1001
|
+
train_xgb_binary_predictions = scorecard["train_xgb_binary_predictions"]
|
1002
|
+
test_xgb_binary_predictions = scorecard["test_xgb_binary_predictions"]
|
1003
|
+
train_xgb_prob = scorecard["train_xgb_prob"]
|
1004
|
+
test_xgb_prob = scorecard["test_xgb_prob"]
|
1005
|
+
train_rf_binary_predictions = scorecard["train_rf_binary_predictions"]
|
1006
|
+
test_rf_binary_predictions = scorecard["test_rf_binary_predictions"]
|
1007
|
+
train_rf_prob = scorecard["train_rf_prob"]
|
1008
|
+
test_rf_prob = scorecard["test_rf_prob"]
|
1009
|
+
train_xgb_scores = scorecard["train_xgb_scores"]
|
1010
|
+
test_xgb_scores = scorecard["test_xgb_scores"]
|
1011
|
+
|
1012
|
+
vm.init_dataset(
|
1013
|
+
dataset=df,
|
1014
|
+
input_id="raw_dataset",
|
1015
|
+
target_column=target_column,
|
1016
|
+
)
|
1017
|
+
|
1018
|
+
vm.init_dataset(
|
1019
|
+
dataset=preprocess_df,
|
1020
|
+
input_id="preprocess_dataset",
|
1021
|
+
target_column=target_column,
|
1022
|
+
)
|
1023
|
+
|
1024
|
+
vm.init_dataset(
|
1025
|
+
dataset=fe_df,
|
1026
|
+
input_id="fe_dataset",
|
1027
|
+
target_column=target_column,
|
1028
|
+
)
|
1029
|
+
|
1030
|
+
vm_train_ds = vm.init_dataset(
|
1031
|
+
dataset=train_df,
|
1032
|
+
input_id="train_dataset",
|
1033
|
+
target_column=target_column,
|
1034
|
+
)
|
1035
|
+
|
1036
|
+
vm_test_ds = vm.init_dataset(
|
1037
|
+
dataset=test_df,
|
1038
|
+
input_id="test_dataset",
|
1039
|
+
target_column=target_column,
|
1040
|
+
)
|
1041
|
+
|
1042
|
+
vm_xgb_model = vm.init_model(
|
1043
|
+
xgb_model,
|
1044
|
+
input_id="xgb_model",
|
1045
|
+
)
|
1046
|
+
|
1047
|
+
vm_rf_model = vm.init_model(
|
1048
|
+
rf_model,
|
1049
|
+
input_id="rf_model",
|
1050
|
+
)
|
1051
|
+
|
1052
|
+
# Assign predictions
|
1053
|
+
vm_train_ds.assign_predictions(
|
1054
|
+
model=vm_xgb_model,
|
1055
|
+
prediction_values=train_xgb_binary_predictions,
|
1056
|
+
prediction_probabilities=train_xgb_prob,
|
1057
|
+
)
|
1058
|
+
|
1059
|
+
vm_test_ds.assign_predictions(
|
1060
|
+
model=vm_xgb_model,
|
1061
|
+
prediction_values=test_xgb_binary_predictions,
|
1062
|
+
prediction_probabilities=test_xgb_prob,
|
1063
|
+
)
|
1064
|
+
|
1065
|
+
vm_train_ds.assign_predictions(
|
1066
|
+
model=vm_rf_model,
|
1067
|
+
prediction_values=train_rf_binary_predictions,
|
1068
|
+
prediction_probabilities=train_rf_prob,
|
1069
|
+
)
|
1070
|
+
|
1071
|
+
vm_test_ds.assign_predictions(
|
1072
|
+
model=vm_rf_model,
|
1073
|
+
prediction_values=test_rf_binary_predictions,
|
1074
|
+
prediction_probabilities=test_rf_prob,
|
1075
|
+
)
|
1076
|
+
|
1077
|
+
# Assign scores to the datasets
|
1078
|
+
vm_train_ds.add_extra_column("xgb_scores", train_xgb_scores)
|
1079
|
+
vm_test_ds.add_extra_column("xgb_scores", test_xgb_scores)
|
1080
|
+
|
1081
|
+
|
1082
|
+
def load_test_config(scorecard):
|
1083
|
+
|
1084
|
+
x_test = scorecard["x_test"]
|
1085
|
+
y_test = scorecard["y_test"]
|
1086
|
+
|
1087
|
+
# Get the test config
|
1088
|
+
test_config = get_demo_test_config(x_test, y_test)
|
1089
|
+
|
1090
|
+
return test_config
|
{validmind-2.7.5 → validmind-2.7.6}/validmind/tests/data_validation/HighPearsonCorrelation.py
RENAMED
@@ -9,7 +9,10 @@ from validmind.vm_models import VMDataset
|
|
9
9
|
@tags("tabular_data", "data_quality", "correlation")
|
10
10
|
@tasks("classification", "regression")
|
11
11
|
def HighPearsonCorrelation(
|
12
|
-
dataset: VMDataset,
|
12
|
+
dataset: VMDataset,
|
13
|
+
max_threshold: float = 0.3,
|
14
|
+
top_n_correlations: int = 10,
|
15
|
+
feature_columns: list = None,
|
13
16
|
):
|
14
17
|
"""
|
15
18
|
Identifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity.
|
@@ -51,8 +54,15 @@ def HighPearsonCorrelation(
|
|
51
54
|
- Limited to identifying redundancy only within feature pairs; may fail to spot more complex relationships among
|
52
55
|
three or more variables.
|
53
56
|
"""
|
57
|
+
|
58
|
+
# Select features
|
59
|
+
if feature_columns is None:
|
60
|
+
df = dataset.df
|
61
|
+
else:
|
62
|
+
df = dataset.df[feature_columns]
|
63
|
+
|
54
64
|
# Get correlation matrix for numeric columns
|
55
|
-
corr =
|
65
|
+
corr = df.corr(numeric_only=True)
|
56
66
|
|
57
67
|
# Create table of correlation coefficients and column pairs
|
58
68
|
pairs = []
|