validmind 2.0.0__tar.gz → 2.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {validmind-2.0.0 → validmind-2.0.7}/PKG-INFO +9 -13
- {validmind-2.0.0 → validmind-2.0.7}/pyproject.toml +14 -17
- {validmind-2.0.0 → validmind-2.0.7}/validmind/__init__.py +4 -1
- validmind-2.0.7/validmind/__version__.py +1 -0
- validmind-2.0.7/validmind/ai.py +197 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/api_client.py +16 -4
- {validmind-2.0.0 → validmind-2.0.7}/validmind/client.py +23 -3
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/classification/customer_churn.py +2 -2
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/nlp/__init__.py +5 -0
- validmind-2.0.7/validmind/datasets/nlp/cnn_dailymail.py +98 -0
- validmind-2.0.7/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
- validmind-2.0.7/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
- validmind-2.0.7/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/errors.py +11 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/huggingface.py +2 -2
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/pytorch.py +3 -3
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/sklearn.py +4 -4
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/__init__.py +47 -9
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/DatasetDescription.py +0 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/PiTCreditScoresHistogram.py +8 -3
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TargetRateBarPlots.py +3 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/nlp/StopWords.py +1 -6
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/nlp/TextDescription.py +20 -9
- validmind-2.0.7/validmind/tests/decorator.py +189 -0
- validmind-2.0.7/validmind/tests/model_validation/MeteorScore.py +92 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/RegardHistogram.py +5 -6
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/RegardScore.py +3 -5
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/RougeMetrics.py +6 -4
- validmind-2.0.7/validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +3 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +30 -4
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -3
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ADF.py +27 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +1 -13
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/ai_powered_test.py +2 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/unit_metrics/__init__.py +0 -2
- validmind-2.0.7/validmind/unit_metrics/composite.py +275 -0
- validmind-2.0.7/validmind/unit_metrics/regression/GiniCoefficient.py +39 -0
- validmind-2.0.7/validmind/unit_metrics/regression/HuberLoss.py +27 -0
- validmind-2.0.7/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +36 -0
- validmind-2.0.7/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +22 -0
- validmind-2.0.7/validmind/unit_metrics/regression/MeanBiasDeviation.py +22 -0
- validmind-2.0.7/validmind/unit_metrics/regression/QuantileLoss.py +25 -0
- validmind-2.0.7/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +27 -0
- validmind-2.0.7/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +22 -0
- validmind-2.0.7/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +22 -0
- validmind-2.0.7/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +22 -0
- validmind-2.0.7/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +23 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/unit_metrics/sklearn/classification/Accuracy.py +2 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/unit_metrics/sklearn/classification/F1.py +2 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/unit_metrics/sklearn/classification/Precision.py +2 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/unit_metrics/sklearn/classification/ROC_AUC.py +2 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/unit_metrics/sklearn/classification/Recall.py +2 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/utils.py +17 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/dataset.py +376 -21
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/figure.py +52 -17
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/metric.py +33 -30
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/output_template.py +0 -27
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/result_wrapper.py +57 -24
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/test.py +2 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/threshold_test.py +24 -13
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test_context.py +7 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test_suite/runner.py +1 -1
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test_suite/test.py +1 -1
- validmind-2.0.0/validmind/__version__.py +0 -1
- {validmind-2.0.0 → validmind-2.0.7}/LICENSE +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/README.pypi.md +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/client_config.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/classification/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/classification/taiwan_credit.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/cluster/digits.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/california_housing.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/datasets/lending_club_loan_rates.csv +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/fred.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/lending_club.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/html_templates/content_blocks.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/input_registry.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/logging.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/catboost.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/foundation.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/r_model.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/statsmodels.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/models/xgboost.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/template.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/classifier.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/cluster.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/embeddings.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/llm.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/nlp.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/parameters_optimization.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/regression.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/statsmodels_timeseries.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/summarization.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/tabular_datasets.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/text_data.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/test_suites/time_series.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/__types__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/AutoAR.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/AutoMA.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/AutoStationarity.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/ClassImbalance.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/DatasetSplit.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/Duplicates.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/HighCardinality.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/HighPearsonCorrelation.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/MissingValues.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/PiTPDHistogram.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/ScatterPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/Skewness.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/SpreadPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/UniqueRows.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/WOEBinTable.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/data_validation/nlp/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/BertScore.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/BertScoreAggregate.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/BleuScore.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/ContextualRecall.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/ModelMetadata.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/RougeMetricsAggregate.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/TokenDisparity.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/ToxicityHistogram.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/ToxicityScore.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ADFTest.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/DFGLSArch.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/KPSS.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/LJungBox.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/LogisticRegPredictionHistogram.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/PhillipsPerronArch.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/RunsTest.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/ZivotAndrewsArch.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/Bias.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/Clarity.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/Conciseness.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/Delimitation.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/NegativeInstruction.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/Robustness.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/Specificity.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/prompt_validation/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/tests/test_providers.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/__init__.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/model.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/metric_result.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/result_summary.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/threshold_test_result.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test/unit_metric.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test_suite/summary.py +0 -0
- {validmind-2.0.0 → validmind-2.0.7}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,16 +1,17 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.7
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
7
7
|
Author-email: andres@validmind.ai
|
8
|
-
Requires-Python: >=3.8,<3.
|
8
|
+
Requires-Python: >=3.8,<3.12
|
9
9
|
Classifier: License :: Other/Proprietary License
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
11
11
|
Classifier: Programming Language :: Python :: 3.8
|
12
12
|
Classifier: Programming Language :: Python :: 3.9
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
14
15
|
Provides-Extra: all
|
15
16
|
Provides-Extra: llm
|
16
17
|
Provides-Extra: pytorch
|
@@ -21,27 +22,22 @@ Requires-Dist: arch (>=5.4.0,<6.0.0)
|
|
21
22
|
Requires-Dist: bert-score (>=0.3.13,<0.4.0)
|
22
23
|
Requires-Dist: catboost (>=1.2,<2.0)
|
23
24
|
Requires-Dist: click (>=8.0.4,<9.0.0)
|
24
|
-
Requires-Dist: datasets (>=2.14.5,<3.0.0) ; extra == "all" or extra == "llm"
|
25
|
-
Requires-Dist: dython (>=0.7.1,<0.8.0)
|
26
25
|
Requires-Dist: evaluate (>=0.4.0,<0.5.0)
|
27
|
-
Requires-Dist: ipython (==7.34.0)
|
28
26
|
Requires-Dist: ipywidgets (>=8.0.6,<9.0.0)
|
29
|
-
Requires-Dist:
|
30
|
-
Requires-Dist: kaleido (==0.2.1)
|
27
|
+
Requires-Dist: kaleido (>=0.2.1,<0.3.0,!=0.2.1.post1)
|
31
28
|
Requires-Dist: langdetect (>=1.0.9,<2.0.0)
|
32
29
|
Requires-Dist: levenshtein (>=0.21.1,<0.22.0) ; extra == "all" or extra == "llm"
|
33
30
|
Requires-Dist: markdown (>=3.4.3,<4.0.0)
|
34
|
-
Requires-Dist: matplotlib (<3.8)
|
31
|
+
Requires-Dist: matplotlib (<3.8.3)
|
35
32
|
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
33
|
+
Requires-Dist: numba (<0.59.0)
|
36
34
|
Requires-Dist: numpy (>=1.23.3,<2.0.0)
|
37
35
|
Requires-Dist: openai (>=1.3.7,<2.0.0) ; extra == "all" or extra == "llm"
|
38
36
|
Requires-Dist: pandas (==1.5.3)
|
39
|
-
Requires-Dist: pandas-profiling (>=3.6.6,<4.0.0)
|
40
|
-
Requires-Dist: pdoc (>=13.1.1,<14.0.0)
|
41
37
|
Requires-Dist: plotly (>=5.14.1,<6.0.0)
|
42
38
|
Requires-Dist: plotly-express (>=0.4.1,<0.5.0)
|
39
|
+
Requires-Dist: polars (>=0.20.15,<0.21.0)
|
43
40
|
Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
|
44
|
-
Requires-Dist: pydantic (>=1.9.1,<2.0.0)
|
45
41
|
Requires-Dist: pypmml (>=0.9.17,<0.10.0)
|
46
42
|
Requires-Dist: python-dotenv (>=0.20.0,<0.21.0)
|
47
43
|
Requires-Dist: requests (>=2.27.1,<3.0.0)
|
@@ -50,7 +46,7 @@ Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
|
|
50
46
|
Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
|
51
47
|
Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
|
52
48
|
Requires-Dist: seaborn (>=0.11.2,<0.12.0)
|
53
|
-
Requires-Dist:
|
49
|
+
Requires-Dist: selfcheckgpt (>=0.1.7,<0.2.0)
|
54
50
|
Requires-Dist: sentry-sdk (>=1.24.0,<2.0.0)
|
55
51
|
Requires-Dist: shap (>=0.42.0,<0.43.0)
|
56
52
|
Requires-Dist: statsmodels (>=0.13.5,<0.14.0)
|
@@ -60,8 +56,8 @@ Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "
|
|
60
56
|
Requires-Dist: torchmetrics (>=1.1.1,<2.0.0) ; extra == "all" or extra == "llm"
|
61
57
|
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
62
58
|
Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "llm" or extra == "transformers"
|
63
|
-
Requires-Dist: wget (>=3.2,<4.0)
|
64
59
|
Requires-Dist: xgboost (>=1.5.2,<2.0.0)
|
60
|
+
Requires-Dist: ydata-profiling (>=4.7.0,<5.0.0)
|
65
61
|
Description-Content-Type: text/markdown
|
66
62
|
|
67
63
|
# ValidMind Developer Framework
|
@@ -10,7 +10,7 @@ description = "ValidMind Developer Framework"
|
|
10
10
|
license = "Commercial License"
|
11
11
|
name = "validmind"
|
12
12
|
readme = "README.pypi.md"
|
13
|
-
version = "2.0.
|
13
|
+
version = "2.0.7"
|
14
14
|
|
15
15
|
[tool.poetry.dependencies]
|
16
16
|
aiohttp = {extras = ["speedups"], version = "^3.8.4"}
|
@@ -18,29 +18,24 @@ arch = "^5.4.0"
|
|
18
18
|
bert-score = "^0.3.13"
|
19
19
|
catboost = "^1.2"
|
20
20
|
click = "^8.0.4"
|
21
|
-
datasets = {version = "^2.14.5", optional = true}
|
22
|
-
dython = "^0.7.1"
|
23
21
|
evaluate = "^0.4.0"
|
24
|
-
ipython = "7.34.0"
|
25
22
|
ipywidgets = "^8.0.6"
|
26
|
-
|
27
|
-
kaleido = "0.2.1"
|
23
|
+
kaleido = "^0.2.1,!=0.2.1.post1"
|
28
24
|
langdetect = "^1.0.9"
|
29
25
|
levenshtein = {version = "^0.21.1", optional = true}
|
30
26
|
markdown = "^3.4.3"
|
31
|
-
matplotlib = "<3.8"
|
27
|
+
matplotlib = "<3.8.3"
|
32
28
|
nltk = "^3.8.1"
|
29
|
+
numba = "<0.59.0" # TODO: https://github.com/validmind/developer-framework/pull/28
|
33
30
|
numpy = "^1.23.3"
|
34
31
|
openai = {version = "^1.3.7", optional = true}
|
35
32
|
pandas = "1.5.3"
|
36
|
-
pandas-profiling = "^3.6.6"
|
37
|
-
pdoc = "^13.1.1"
|
38
33
|
plotly = "^5.14.1"
|
39
34
|
plotly-express = "^0.4.1"
|
35
|
+
polars = "^0.20.15"
|
40
36
|
pycocoevalcap = {version = "^1.2", optional = true}
|
41
|
-
pydantic = "^1.9.1"
|
42
37
|
pypmml = "^0.9.17"
|
43
|
-
python = ">=3.8,<3.
|
38
|
+
python = ">=3.8,<3.12"
|
44
39
|
python-dotenv = "^0.20.0"
|
45
40
|
requests = "^2.27.1"
|
46
41
|
rouge = "^1.0.1"
|
@@ -48,7 +43,7 @@ rpy2 = {version = "^3.5.10", optional = true}
|
|
48
43
|
scikit-learn = "^1.0.2"
|
49
44
|
scorecardpy = "^0.1.9.6"
|
50
45
|
seaborn = "^0.11.2"
|
51
|
-
|
46
|
+
selfcheckgpt = "^0.1.7"
|
52
47
|
sentry-sdk = "^1.24.0"
|
53
48
|
shap = "^0.42.0"
|
54
49
|
statsmodels = "^0.13.5"
|
@@ -57,9 +52,9 @@ textstat = "^0.7.3"
|
|
57
52
|
torch = {version = ">=1.10.0", optional = true}
|
58
53
|
torchmetrics = {version = "^1.1.1", optional = true}
|
59
54
|
tqdm = "^4.64.0"
|
60
|
-
transformers = "^4.32.0"
|
61
|
-
wget = "^3.2"
|
55
|
+
transformers = {version = "^4.32.0", optional = true}
|
62
56
|
xgboost = "^1.5.2"
|
57
|
+
ydata-profiling = "^4.7.0"
|
63
58
|
|
64
59
|
[tool.poetry.group.dev.dependencies]
|
65
60
|
black = "^22.1.0"
|
@@ -69,9 +64,10 @@ flake8 = "^4.0.1"
|
|
69
64
|
gradio = "^3.43.2"
|
70
65
|
ipykernel = "^6.22.0"
|
71
66
|
isort = "^5.12.0"
|
72
|
-
|
67
|
+
jupyter = "^1.0.0"
|
73
68
|
openai = "^1.3.7"
|
74
69
|
papermill = "^2.4.0"
|
70
|
+
pdoc = "^14.4.0"
|
75
71
|
pre-commit = "^3.3.3"
|
76
72
|
pytest = "^5.2"
|
77
73
|
sphinx = "^6.1.3"
|
@@ -88,7 +84,6 @@ all = [
|
|
88
84
|
"pycocoevalcap",
|
89
85
|
"torchmetrics",
|
90
86
|
"levenshtein",
|
91
|
-
"datasets",
|
92
87
|
]
|
93
88
|
llm = [
|
94
89
|
"torch",
|
@@ -97,7 +92,6 @@ llm = [
|
|
97
92
|
"pycocoevalcap",
|
98
93
|
"torchmetrics",
|
99
94
|
"levenshtein",
|
100
|
-
"datasets",
|
101
95
|
]
|
102
96
|
pytorch = ["torch"]
|
103
97
|
r-support = ["rpy2"]
|
@@ -110,3 +104,6 @@ requires = ["poetry-core>=1.0.0"]
|
|
110
104
|
[tool.isort]
|
111
105
|
known_first_party = "validmind"
|
112
106
|
profile = "black"
|
107
|
+
|
108
|
+
[tool.poetry.scripts]
|
109
|
+
vm-create-new-test = "scripts.create_new_test:generate_test"
|
@@ -50,7 +50,7 @@ from .__version__ import __version__ # noqa: E402
|
|
50
50
|
from .api_client import init
|
51
51
|
from .api_client import log_figure as _log_figure_async
|
52
52
|
from .api_client import log_metrics as _log_metrics_async
|
53
|
-
from .api_client import log_test_results
|
53
|
+
from .api_client import log_test_results, reload
|
54
54
|
from .client import ( # noqa: E402
|
55
55
|
get_test_suite,
|
56
56
|
init_dataset,
|
@@ -60,6 +60,7 @@ from .client import ( # noqa: E402
|
|
60
60
|
run_documentation_tests,
|
61
61
|
run_test_suite,
|
62
62
|
)
|
63
|
+
from .tests.decorator import metric
|
63
64
|
from .unit_metrics import run_metric
|
64
65
|
from .utils import run_async # noqa: E402
|
65
66
|
|
@@ -105,7 +106,9 @@ __all__ = [ # noqa
|
|
105
106
|
"init_dataset",
|
106
107
|
"init_model",
|
107
108
|
"init_r_model",
|
109
|
+
"metric",
|
108
110
|
"preview_template",
|
111
|
+
"reload",
|
109
112
|
"run_documentation_tests",
|
110
113
|
"run_test_suite",
|
111
114
|
"tests",
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "2.0.7"
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import concurrent.futures
|
6
|
+
import os
|
7
|
+
|
8
|
+
from openai import AzureOpenAI, OpenAI
|
9
|
+
|
10
|
+
from .utils import clean_docstring
|
11
|
+
|
12
|
+
SYSTEM_PROMPT = """
|
13
|
+
You are an expert data scientist and MRM specialist tasked with providing concise and'
|
14
|
+
objective insights based on the results of quantitative model or dataset analysis.
|
15
|
+
|
16
|
+
Examine the provided statistical test results and compose a brief summary. Highlight crucial
|
17
|
+
insights, focusing on the distribution characteristics, central tendencies (such as mean or median),
|
18
|
+
and the variability (including standard deviation and range) of the metrics. Evaluate how
|
19
|
+
these statistics might influence the development and performance of a predictive model. Identify
|
20
|
+
and explain any discernible trends or anomalies in the test results.
|
21
|
+
|
22
|
+
Your analysis will act as the description of the result in the model documentation.
|
23
|
+
|
24
|
+
Avoid long sentences and complex vocabulary.
|
25
|
+
Structure the response clearly and logically.
|
26
|
+
Use Markdown syntax to format the response.
|
27
|
+
Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
|
28
|
+
Use the following format for the response:
|
29
|
+
```
|
30
|
+
**<Test Name>** <continue to explain what it does in detail>...
|
31
|
+
|
32
|
+
The results of this test <detailed explanation of the results>...
|
33
|
+
|
34
|
+
In summary the following key insights can be gained from this <Test Type>
|
35
|
+
|
36
|
+
- **<key insight 1 - title>**: <explanation of key insight 1>
|
37
|
+
- ...<continue with any other key insights using the same format>
|
38
|
+
```
|
39
|
+
It is very important that the text is nicely formatted and contains enough information to be useful to the user as documentation.
|
40
|
+
|
41
|
+
- use valid markdown syntax: make sure to have two newlines between paragraphs and before bullet points etc.
|
42
|
+
""".strip()
|
43
|
+
USER_PROMPT = """
|
44
|
+
Test ID: {test_name}
|
45
|
+
Test Description: {test_description}
|
46
|
+
Test Results (the raw results of the test):
|
47
|
+
{test_results}
|
48
|
+
Test Summary (what the user sees in the documentation):
|
49
|
+
{test_summary}
|
50
|
+
""".strip()
|
51
|
+
USER_PROMPT_FIGURES = """
|
52
|
+
Test ID: {test_name}
|
53
|
+
Test Description: {test_description}
|
54
|
+
The attached plots show the results of the test.
|
55
|
+
""".strip()
|
56
|
+
|
57
|
+
__client = None
|
58
|
+
__model = None
|
59
|
+
|
60
|
+
__executor = concurrent.futures.ThreadPoolExecutor()
|
61
|
+
|
62
|
+
|
63
|
+
def __get_client_and_model():
|
64
|
+
"""
|
65
|
+
Get the model to use for generating interpretations
|
66
|
+
"""
|
67
|
+
global __client, __model
|
68
|
+
|
69
|
+
if __client and __model:
|
70
|
+
return __client, __model
|
71
|
+
|
72
|
+
if "OPENAI_API_KEY" in os.environ:
|
73
|
+
__client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
|
74
|
+
__model = os.environ.get("VM_OPENAI_MODEL", "gpt-4-turbo-preview")
|
75
|
+
|
76
|
+
elif "AZURE_OPENAI_KEY" in os.environ:
|
77
|
+
if "AZURE_OPENAI_ENDPOINT" not in os.environ:
|
78
|
+
raise ValueError(
|
79
|
+
"AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
|
80
|
+
)
|
81
|
+
|
82
|
+
if "AZURE_OPENAI_MODEL" not in os.environ:
|
83
|
+
raise ValueError(
|
84
|
+
"AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
|
85
|
+
)
|
86
|
+
|
87
|
+
__client = AzureOpenAI(
|
88
|
+
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
89
|
+
api_key=os.environ.get("AZURE_OPENAI_KEY"),
|
90
|
+
api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-05-15"),
|
91
|
+
)
|
92
|
+
__model = os.environ.get("AZURE_OPENAI_MODEL")
|
93
|
+
|
94
|
+
else:
|
95
|
+
raise ValueError("OPENAI_API_KEY or AZURE_OPENAI_KEY must be set")
|
96
|
+
|
97
|
+
return __client, __model
|
98
|
+
|
99
|
+
|
100
|
+
class DescriptionFuture:
|
101
|
+
"""This will be immediately returned from generate_description so that
|
102
|
+
the tests can continue to be run in parallel while the description is
|
103
|
+
retrieved asynchronously.
|
104
|
+
|
105
|
+
The value will be retrieved later and if its not ready yet, it should
|
106
|
+
block until it is.
|
107
|
+
"""
|
108
|
+
|
109
|
+
def __init__(self, future):
|
110
|
+
self._future = future
|
111
|
+
|
112
|
+
def get_description(self):
|
113
|
+
# This will block until the future is completed
|
114
|
+
return clean_docstring(self._future.result())
|
115
|
+
|
116
|
+
|
117
|
+
def generate_description_async(
|
118
|
+
test_name: str,
|
119
|
+
test_description: str,
|
120
|
+
test_results: str,
|
121
|
+
test_summary: str,
|
122
|
+
figures: list = None,
|
123
|
+
):
|
124
|
+
"""Generate the description for the test results"""
|
125
|
+
client, _ = __get_client_and_model()
|
126
|
+
|
127
|
+
# get last part of test id
|
128
|
+
test_name = test_name.split(".")[-1]
|
129
|
+
|
130
|
+
if not test_results and not test_summary:
|
131
|
+
if not figures:
|
132
|
+
raise ValueError("No results, summary or figures provided")
|
133
|
+
|
134
|
+
response = client.chat.completions.create(
|
135
|
+
model="gpt-4-1106-vision-preview",
|
136
|
+
messages=[
|
137
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
138
|
+
{
|
139
|
+
"role": "user",
|
140
|
+
"content": [
|
141
|
+
{
|
142
|
+
"type": "text",
|
143
|
+
"text": USER_PROMPT_FIGURES.format(
|
144
|
+
test_name=test_name,
|
145
|
+
test_description=test_description,
|
146
|
+
),
|
147
|
+
},
|
148
|
+
*[
|
149
|
+
{
|
150
|
+
"type": "image_url",
|
151
|
+
"image_url": {
|
152
|
+
"url": figure._get_b64_url(),
|
153
|
+
},
|
154
|
+
}
|
155
|
+
for figure in figures
|
156
|
+
],
|
157
|
+
],
|
158
|
+
},
|
159
|
+
],
|
160
|
+
)
|
161
|
+
else:
|
162
|
+
response = client.chat.completions.create(
|
163
|
+
model="gpt-4-turbo-preview",
|
164
|
+
messages=[
|
165
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
166
|
+
{
|
167
|
+
"role": "user",
|
168
|
+
"content": USER_PROMPT.format(
|
169
|
+
test_name=test_name,
|
170
|
+
test_description=test_description,
|
171
|
+
test_results=test_results,
|
172
|
+
test_summary=test_summary,
|
173
|
+
),
|
174
|
+
},
|
175
|
+
],
|
176
|
+
)
|
177
|
+
|
178
|
+
return response.choices[0].message.content.strip("```").strip()
|
179
|
+
|
180
|
+
|
181
|
+
def generate_description(
|
182
|
+
test_name: str,
|
183
|
+
test_description: str,
|
184
|
+
test_results: str,
|
185
|
+
test_summary: str,
|
186
|
+
figures: list = None,
|
187
|
+
):
|
188
|
+
future = __executor.submit(
|
189
|
+
generate_description_async,
|
190
|
+
test_name,
|
191
|
+
test_description,
|
192
|
+
test_results,
|
193
|
+
test_summary,
|
194
|
+
figures,
|
195
|
+
)
|
196
|
+
|
197
|
+
return DescriptionFuture(future)
|
@@ -171,6 +171,18 @@ def __ping() -> Dict[str, Any]:
|
|
171
171
|
)
|
172
172
|
|
173
173
|
|
174
|
+
def reload():
|
175
|
+
"""Reconnect to the ValidMind API and reload the project configuration"""
|
176
|
+
|
177
|
+
try:
|
178
|
+
__ping()
|
179
|
+
except Exception as e:
|
180
|
+
# if the api host is https, assume we're not in dev mode and send to sentry
|
181
|
+
if _api_host.startswith("https://"):
|
182
|
+
send_single_error(e)
|
183
|
+
raise e
|
184
|
+
|
185
|
+
|
174
186
|
async def __get_url(endpoint: str, params: Optional[Dict[str, str]] = None) -> str:
|
175
187
|
if not _run_cuid:
|
176
188
|
start_run()
|
@@ -313,14 +325,14 @@ async def log_figures(figures: List[Figure]) -> Dict[str, Any]:
|
|
313
325
|
async def log_metadata(
|
314
326
|
content_id: str,
|
315
327
|
text: Optional[str] = None,
|
316
|
-
|
328
|
+
_json: Optional[Dict[str, Any]] = None,
|
317
329
|
) -> Dict[str, Any]:
|
318
330
|
"""Logs free-form metadata to ValidMind API.
|
319
331
|
|
320
332
|
Args:
|
321
333
|
content_id (str): Unique content identifier for the metadata
|
322
334
|
text (str, optional): Free-form text to assign to the metadata. Defaults to None.
|
323
|
-
|
335
|
+
_json (dict, optional): Free-form key-value pairs to assign to the metadata. Defaults to None.
|
324
336
|
|
325
337
|
Raises:
|
326
338
|
Exception: If the API call fails
|
@@ -331,8 +343,8 @@ async def log_metadata(
|
|
331
343
|
metadata_dict = {"content_id": content_id}
|
332
344
|
if text is not None:
|
333
345
|
metadata_dict["text"] = text
|
334
|
-
if
|
335
|
-
metadata_dict["
|
346
|
+
if _json is not None:
|
347
|
+
metadata_dict["json"] = _json
|
336
348
|
|
337
349
|
try:
|
338
350
|
return await _post(
|
@@ -7,6 +7,7 @@ Client interface for all data and model validation functions
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
import pandas as pd
|
10
|
+
import polars as pl
|
10
11
|
|
11
12
|
from .api_client import _log_input as log_input
|
12
13
|
from .client_config import client_config
|
@@ -26,7 +27,13 @@ from .template import preview_template as _preview_template
|
|
26
27
|
from .test_suites import get_by_id as get_test_suite_by_id
|
27
28
|
from .utils import get_dataset_info, get_model_info
|
28
29
|
from .vm_models import TestInput, TestSuite, TestSuiteRunner
|
29
|
-
from .vm_models.dataset import
|
30
|
+
from .vm_models.dataset import (
|
31
|
+
DataFrameDataset,
|
32
|
+
NumpyDataset,
|
33
|
+
PolarsDataset,
|
34
|
+
TorchDataset,
|
35
|
+
VMDataset,
|
36
|
+
)
|
30
37
|
from .vm_models.model import VMModel, get_model_class
|
31
38
|
|
32
39
|
pd.option_context("format.precision", 2)
|
@@ -58,7 +65,7 @@ def init_dataset(
|
|
58
65
|
DataFrames at the moment.
|
59
66
|
|
60
67
|
Args:
|
61
|
-
dataset
|
68
|
+
dataset : dataset from various python libraries
|
62
69
|
model (VMModel): ValidMind model object
|
63
70
|
options (dict): A dictionary of options for the dataset
|
64
71
|
targets (vm.vm.DatasetTargets): A list of target variables
|
@@ -89,7 +96,7 @@ def init_dataset(
|
|
89
96
|
input_id = input_id or "dataset"
|
90
97
|
|
91
98
|
# Instantiate supported dataset types here
|
92
|
-
if
|
99
|
+
if isinstance(dataset, pd.DataFrame):
|
93
100
|
logger.info("Pandas dataset detected. Initializing VM Dataset instance...")
|
94
101
|
vm_dataset = DataFrameDataset(
|
95
102
|
input_id=input_id,
|
@@ -102,6 +109,19 @@ def init_dataset(
|
|
102
109
|
target_class_labels=class_labels,
|
103
110
|
date_time_index=date_time_index,
|
104
111
|
)
|
112
|
+
elif isinstance(dataset, pl.DataFrame):
|
113
|
+
logger.info("Polars dataset detected. Initializing VM Dataset instance...")
|
114
|
+
vm_dataset = PolarsDataset(
|
115
|
+
input_id=input_id,
|
116
|
+
raw_dataset=dataset,
|
117
|
+
model=model,
|
118
|
+
target_column=target_column,
|
119
|
+
feature_columns=feature_columns,
|
120
|
+
text_column=text_column,
|
121
|
+
extra_columns=extra_columns,
|
122
|
+
target_class_labels=class_labels,
|
123
|
+
date_time_index=date_time_index,
|
124
|
+
)
|
105
125
|
elif dataset_class == "ndarray":
|
106
126
|
logger.info("Numpy ndarray detected. Initializing VM Dataset instance...")
|
107
127
|
vm_dataset = NumpyDataset(
|
@@ -60,7 +60,7 @@ def preprocess(df):
|
|
60
60
|
return train_df, validation_df, test_df
|
61
61
|
|
62
62
|
|
63
|
-
def get_demo_test_config():
|
63
|
+
def get_demo_test_config(test_suite=None):
|
64
64
|
"""
|
65
65
|
Returns input configuration for the default documentation
|
66
66
|
template assigned to this demo model
|
@@ -81,7 +81,7 @@ def get_demo_test_config():
|
|
81
81
|
- The only exception is ClassifierPerformance since that runs twice: once
|
82
82
|
with the train_dataset (in sample) and once with the test_dataset (out of sample)
|
83
83
|
"""
|
84
|
-
default_config = vm.get_test_suite().get_default_config()
|
84
|
+
default_config = (test_suite or vm.get_test_suite()).get_default_config()
|
85
85
|
|
86
86
|
for _, test_config in default_config.items():
|
87
87
|
if "model" in test_config["inputs"]:
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import os
|
6
|
+
import textwrap
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
from datasets import load_dataset
|
10
|
+
from IPython.display import HTML, display
|
11
|
+
from tabulate import tabulate
|
12
|
+
|
13
|
+
# Define column names
|
14
|
+
text_column = "article"
|
15
|
+
target_column = "highlights"
|
16
|
+
gpt_35_prediction_column = "gpt_35_prediction"
|
17
|
+
t5_prediction = "t5_prediction"
|
18
|
+
|
19
|
+
# Define the path to the dataset directory
|
20
|
+
current_path = os.path.dirname(os.path.abspath(__file__))
|
21
|
+
dataset_path = os.path.join(current_path, "datasets")
|
22
|
+
|
23
|
+
|
24
|
+
def load_data(source="online", dataset_size=None):
|
25
|
+
"""
|
26
|
+
Load data from either online source or offline files.
|
27
|
+
|
28
|
+
:param source: 'online' for online data, 'offline' for offline data. Defaults to 'online'.
|
29
|
+
:param dataset_size: Applicable if source is 'offline'. '300k' or '500k' for dataset size. Defaults to None.
|
30
|
+
:return: DataFrame containing the loaded data.
|
31
|
+
"""
|
32
|
+
if source == "online":
|
33
|
+
# Load online data without predictions
|
34
|
+
cnn_dataset = load_dataset("cnn_dailymail", "3.0.0")
|
35
|
+
train_df = cnn_dataset["train"].to_pandas()
|
36
|
+
test_df = cnn_dataset["test"].to_pandas()
|
37
|
+
|
38
|
+
# Process the DataFrame to include necessary columns
|
39
|
+
train_df = train_df[["article", "highlights"]]
|
40
|
+
test_df = test_df[["article", "highlights"]]
|
41
|
+
|
42
|
+
return train_df, test_df
|
43
|
+
|
44
|
+
elif source == "offline":
|
45
|
+
# Determine the file name based on the dataset size
|
46
|
+
if dataset_size == "100":
|
47
|
+
data_file_name = "cnn_dailymail_100_with_predictions.csv"
|
48
|
+
elif dataset_size == "500":
|
49
|
+
data_file_name = "cnn_dailymail_500_with_predictions.csv"
|
50
|
+
else:
|
51
|
+
raise ValueError("Invalid dataset_size specified. Choose '100' or '500'.")
|
52
|
+
|
53
|
+
# Construct the file path
|
54
|
+
data_file = os.path.join(dataset_path, data_file_name)
|
55
|
+
|
56
|
+
# Load the dataset
|
57
|
+
df = pd.read_csv(data_file)
|
58
|
+
df = df[["article", "highlights", "gpt_35_prediction", "t5_prediction"]]
|
59
|
+
|
60
|
+
train_df = df.sample(frac=0.7, random_state=42)
|
61
|
+
test_df = df.drop(train_df.index)
|
62
|
+
return train_df, test_df
|
63
|
+
|
64
|
+
else:
|
65
|
+
raise ValueError("Invalid source specified. Choose 'online' or 'offline'.")
|
66
|
+
|
67
|
+
|
68
|
+
def _format_cell_text(text, width=50):
|
69
|
+
"""Private function to format a cell's text."""
|
70
|
+
return "\n".join([textwrap.fill(line, width=width) for line in text.split("\n")])
|
71
|
+
|
72
|
+
|
73
|
+
def _format_dataframe_for_tabulate(df):
|
74
|
+
"""Private function to format the entire DataFrame for tabulation."""
|
75
|
+
df_out = df.copy()
|
76
|
+
|
77
|
+
# Format all string columns
|
78
|
+
for column in df_out.columns:
|
79
|
+
# Check if column is of type object (likely strings)
|
80
|
+
if df_out[column].dtype == object:
|
81
|
+
df_out[column] = df_out[column].apply(_format_cell_text)
|
82
|
+
return df_out
|
83
|
+
|
84
|
+
|
85
|
+
def _dataframe_to_html_table(df):
|
86
|
+
"""Private function to convert a DataFrame to an HTML table."""
|
87
|
+
headers = df.columns.tolist()
|
88
|
+
table_data = df.values.tolist()
|
89
|
+
return tabulate(table_data, headers=headers, tablefmt="html")
|
90
|
+
|
91
|
+
|
92
|
+
def display_nice(df, num_rows=None):
|
93
|
+
"""Primary function to format and display a DataFrame."""
|
94
|
+
if num_rows is not None:
|
95
|
+
df = df.head(num_rows)
|
96
|
+
formatted_df = _format_dataframe_for_tabulate(df)
|
97
|
+
html_table = _dataframe_to_html_table(formatted_df)
|
98
|
+
display(HTML(html_table))
|