validmind 2.2.2__tar.gz → 2.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {validmind-2.2.2 → validmind-2.2.5}/PKG-INFO +1 -1
- {validmind-2.2.2 → validmind-2.2.5}/pyproject.toml +1 -1
- validmind-2.2.5/validmind/__version__.py +1 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/ai.py +71 -50
- {validmind-2.2.2 → validmind-2.2.5}/validmind/api_client.py +40 -13
- {validmind-2.2.2 → validmind-2.2.5}/validmind/errors.py +1 -1
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/__init__.py +1 -1
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/decorator.py +10 -5
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/composite.py +8 -6
- {validmind-2.2.2 → validmind-2.2.5}/validmind/utils.py +51 -1
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/dataset/dataset.py +4 -2
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/metric.py +9 -24
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/result_wrapper.py +64 -5
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/threshold_test.py +11 -28
- validmind-2.2.2/validmind/__version__.py +0 -1
- {validmind-2.2.2 → validmind-2.2.5}/LICENSE +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/README.pypi.md +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/client.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/client_config.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/classification/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/classification/customer_churn.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/classification/taiwan_credit.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/cluster/digits.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/credit_risk/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/credit_risk/lending_club.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/llm/rag/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/llm/rag/rfp.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/nlp/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/california_housing.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/datasets/lending_club_loan_rates.csv +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/fred.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/lending_club.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/html_templates/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/html_templates/content_blocks.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/input_registry.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/logging.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/foundation.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/function.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/huggingface.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/metadata.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/pipeline.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/pytorch.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/r_model.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/models/sklearn.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/template.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/classifier.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/cluster.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/embeddings.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/llm.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/nlp.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/parameters_optimization.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/regression.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/statsmodels_timeseries.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/summarization.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/tabular_datasets.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/text_data.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/test_suites/time_series.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/AutoAR.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/AutoMA.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/AutoStationarity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/ClassImbalance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/DatasetDescription.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/DatasetSplit.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/Duplicates.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/HighCardinality.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/HighPearsonCorrelation.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/MissingValues.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/PiTPDHistogram.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/ScatterPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/Skewness.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/SpreadPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/UniqueRows.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/WOEBinTable.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/Sentiment.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/TextDescription.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/Toxicity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/data_validation/nlp/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/BertScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/BleuScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ContextualRecall.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/FeaturesAUC.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/MeteorScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ModelMetadata.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/RegardScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/RougeScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/TokenDisparity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ToxicityScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/AnswerRelevance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/AspectCritique.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/ContextPrecision.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/ContextRecall.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/ContextRelevancy.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/Faithfulness.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/ragas/utils.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/ADF.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/ADFTest.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/DFGLSArch.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/KPSS.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/LJungBox.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/PhillipsPerronArch.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/RunsTest.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/ZivotAndrewsArch.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/Bias.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/Clarity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/Conciseness.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/Delimitation.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/NegativeInstruction.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/Robustness.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/Specificity.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/prompt_validation/ai_powered_test.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/tests/test_providers.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/classification/sklearn/Accuracy.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/classification/sklearn/F1.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/classification/sklearn/Precision.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/classification/sklearn/Recall.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/dataset/__init__.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/dataset/utils.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/figure.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/model.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/metric_result.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/output_template.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/result_summary.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/test.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test/threshold_test_result.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test_context.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test_suite/runner.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test_suite/summary.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test_suite/test.py +0 -0
- {validmind-2.2.2 → validmind-2.2.5}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "2.2.5"
|
@@ -8,45 +8,65 @@ import os
|
|
8
8
|
from openai import AzureOpenAI, OpenAI
|
9
9
|
|
10
10
|
SYSTEM_PROMPT = """
|
11
|
-
You are an expert data scientist and MRM specialist
|
12
|
-
|
11
|
+
You are an expert data scientist and MRM specialist.
|
12
|
+
You are tasked with analyzing the results of a quantitative test run on some model or dataset.
|
13
|
+
Your goal is to create a test description that will act as part of the model documentation.
|
14
|
+
You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
|
15
|
+
The overarching theme to maintain is MRM documentation.
|
13
16
|
|
14
|
-
Examine the provided statistical test results and compose a
|
15
|
-
|
16
|
-
|
17
|
-
these statistics might influence the development and performance of a predictive model. Identify
|
18
|
-
and explain any discernible trends or anomalies in the test results.
|
19
|
-
|
20
|
-
Your analysis will act as the description of the result in the model documentation.
|
17
|
+
Examine the provided statistical test results and compose a description of the results.
|
18
|
+
This will act as the description and interpretation of the result in the model documentation.
|
19
|
+
It will be displayed alongside the test results table and figures.
|
21
20
|
|
22
21
|
Avoid long sentences and complex vocabulary.
|
23
22
|
Structure the response clearly and logically.
|
24
|
-
Use valid Markdown syntax to format the response
|
23
|
+
Use valid Markdown syntax to format the response.
|
24
|
+
Respond only with your analysis and insights, not the verbatim test results.
|
25
|
+
Respond only with the markdown content, no explanation or context for your response is necessary.
|
25
26
|
Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
|
27
|
+
|
28
|
+
Explain the test, its purpose, its mechanism/formula etc and why it is useful.
|
29
|
+
If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
|
30
|
+
Highlight the key insights from the test results. The key insights should be concise and easily understood.
|
31
|
+
End the response with any closing remarks, summary or additional useful information.
|
32
|
+
|
26
33
|
Use the following format for the response (feel free to modify slightly if necessary):
|
27
|
-
```
|
28
|
-
**<Test Name>** <continue to explain what it does in detail>...
|
29
34
|
|
30
|
-
|
35
|
+
<ResponseFormat>
|
36
|
+
**<Test Name>** calculates the xyz <continue to explain what it does in detail>...
|
37
|
+
|
38
|
+
This test is useful for <explain why and for what this test is useful>...
|
31
39
|
|
32
|
-
|
40
|
+
**Key Insights:**
|
33
41
|
|
34
|
-
|
42
|
+
The following key insights can be identified in the test results:
|
43
|
+
|
44
|
+
- **<key insight 1 - title>**: <concise explanation of key insight 1>
|
35
45
|
- ...<continue with any other key insights using the same format>
|
36
|
-
|
37
|
-
It is very important that the text is nicely formatted and contains enough information to be useful to the user as documentation.
|
46
|
+
</ResponseFormat>
|
38
47
|
""".strip()
|
48
|
+
|
49
|
+
|
39
50
|
USER_PROMPT = """
|
40
|
-
Test ID: {test_name}
|
41
|
-
|
42
|
-
Test
|
43
|
-
{
|
44
|
-
Test
|
51
|
+
Test ID: `{test_name}`
|
52
|
+
|
53
|
+
<Test Docstring>
|
54
|
+
{test_description}
|
55
|
+
</Test Docstring>
|
56
|
+
|
57
|
+
<Test Results Summary>
|
45
58
|
{test_summary}
|
59
|
+
</Test Results Summary>
|
46
60
|
""".strip()
|
61
|
+
|
62
|
+
|
47
63
|
USER_PROMPT_FIGURES = """
|
48
|
-
Test ID: {test_name}
|
49
|
-
|
64
|
+
Test ID: `{test_name}`
|
65
|
+
|
66
|
+
<Test Docstring>
|
67
|
+
{test_description}
|
68
|
+
</Test Docstring>
|
69
|
+
|
50
70
|
The attached plots show the results of the test.
|
51
71
|
""".strip()
|
52
72
|
|
@@ -113,21 +133,39 @@ class DescriptionFuture:
|
|
113
133
|
def generate_description_async(
|
114
134
|
test_name: str,
|
115
135
|
test_description: str,
|
116
|
-
test_results: str,
|
117
136
|
test_summary: str,
|
118
137
|
figures: list = None,
|
119
138
|
):
|
120
139
|
"""Generate the description for the test results"""
|
121
|
-
|
140
|
+
if not test_summary and not figures:
|
141
|
+
raise ValueError("No summary or figures provided - cannot generate description")
|
122
142
|
|
143
|
+
client, _ = __get_client_and_model()
|
123
144
|
# get last part of test id
|
124
145
|
test_name = test_name.split(".")[-1]
|
125
146
|
|
126
|
-
if
|
127
|
-
|
128
|
-
|
147
|
+
if test_summary:
|
148
|
+
return (
|
149
|
+
client.chat.completions.create(
|
150
|
+
model="gpt-4o",
|
151
|
+
messages=[
|
152
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
153
|
+
{
|
154
|
+
"role": "user",
|
155
|
+
"content": USER_PROMPT.format(
|
156
|
+
test_name=test_name,
|
157
|
+
test_description=test_description,
|
158
|
+
test_summary=test_summary,
|
159
|
+
),
|
160
|
+
},
|
161
|
+
],
|
162
|
+
)
|
163
|
+
.choices[0]
|
164
|
+
.message.content.strip()
|
165
|
+
)
|
129
166
|
|
130
|
-
|
167
|
+
return (
|
168
|
+
client.chat.completions.create(
|
131
169
|
model="gpt-4o",
|
132
170
|
messages=[
|
133
171
|
{"role": "system", "content": SYSTEM_PROMPT},
|
@@ -154,30 +192,14 @@ def generate_description_async(
|
|
154
192
|
},
|
155
193
|
],
|
156
194
|
)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
messages=[
|
161
|
-
{"role": "system", "content": SYSTEM_PROMPT},
|
162
|
-
{
|
163
|
-
"role": "user",
|
164
|
-
"content": USER_PROMPT.format(
|
165
|
-
test_name=test_name,
|
166
|
-
test_description=test_description,
|
167
|
-
test_results=test_results,
|
168
|
-
test_summary=test_summary,
|
169
|
-
),
|
170
|
-
},
|
171
|
-
],
|
172
|
-
)
|
173
|
-
|
174
|
-
return response.choices[0].message.content.strip("```").strip()
|
195
|
+
.choices[0]
|
196
|
+
.message.content.strip()
|
197
|
+
)
|
175
198
|
|
176
199
|
|
177
200
|
def generate_description(
|
178
201
|
test_name: str,
|
179
202
|
test_description: str,
|
180
|
-
test_results: str,
|
181
203
|
test_summary: str,
|
182
204
|
figures: list = None,
|
183
205
|
):
|
@@ -185,7 +207,6 @@ def generate_description(
|
|
185
207
|
generate_description_async,
|
186
208
|
test_name,
|
187
209
|
test_description,
|
188
|
-
test_results,
|
189
210
|
test_summary,
|
190
211
|
figures,
|
191
212
|
)
|
@@ -161,14 +161,20 @@ def __ping() -> Dict[str, Any]:
|
|
161
161
|
|
162
162
|
init_sentry(client_info.get("sentry_config", {}))
|
163
163
|
|
164
|
+
# Only show this confirmation the first time we connect to the API
|
165
|
+
ack_connected = False
|
166
|
+
if client_config.project is None:
|
167
|
+
ack_connected = True
|
168
|
+
|
164
169
|
client_config.project = client_info["project"]
|
165
170
|
client_config.documentation_template = client_info.get("documentation_template", {})
|
166
171
|
client_config.feature_flags = client_info.get("feature_flags", {})
|
167
172
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
173
|
+
if ack_connected:
|
174
|
+
logger.info(
|
175
|
+
f"Connected to ValidMind. Project: {client_config.project['name']}"
|
176
|
+
f" ({client_config.project['cuid']})"
|
177
|
+
)
|
172
178
|
|
173
179
|
|
174
180
|
def reload():
|
@@ -358,7 +364,11 @@ async def log_metadata(
|
|
358
364
|
|
359
365
|
|
360
366
|
async def log_metrics(
|
361
|
-
metrics: List[MetricResult],
|
367
|
+
metrics: List[MetricResult],
|
368
|
+
inputs: List[str],
|
369
|
+
output_template: str = None,
|
370
|
+
section_id: str = None,
|
371
|
+
position: int = None,
|
362
372
|
) -> Dict[str, Any]:
|
363
373
|
"""Logs metrics to ValidMind API.
|
364
374
|
|
@@ -366,6 +376,8 @@ async def log_metrics(
|
|
366
376
|
metrics (list): A list of MetricResult objects
|
367
377
|
inputs (list): A list of input keys (names) that were used to run the test
|
368
378
|
output_template (str): The optional output template for the test
|
379
|
+
section_id (str): The section ID add a test driven block to the documentation
|
380
|
+
position (int): The position in the section to add the test driven block
|
369
381
|
|
370
382
|
Raises:
|
371
383
|
Exception: If the API call fails
|
@@ -373,7 +385,14 @@ async def log_metrics(
|
|
373
385
|
Returns:
|
374
386
|
dict: The response from the API
|
375
387
|
"""
|
388
|
+
params = {}
|
389
|
+
if section_id:
|
390
|
+
params["section_id"] = section_id
|
391
|
+
if position is not None:
|
392
|
+
params["position"] = position
|
393
|
+
|
376
394
|
data = []
|
395
|
+
|
377
396
|
for metric in metrics:
|
378
397
|
metric_data = {
|
379
398
|
**metric.serialize(),
|
@@ -388,6 +407,7 @@ async def log_metrics(
|
|
388
407
|
try:
|
389
408
|
return await _post(
|
390
409
|
"log_metrics",
|
410
|
+
params=params,
|
391
411
|
data=json.dumps(data, cls=NumpyEncoder, allow_nan=False),
|
392
412
|
)
|
393
413
|
except Exception as e:
|
@@ -396,7 +416,10 @@ async def log_metrics(
|
|
396
416
|
|
397
417
|
|
398
418
|
async def log_test_result(
|
399
|
-
result: ThresholdTestResults,
|
419
|
+
result: ThresholdTestResults,
|
420
|
+
inputs: List[str],
|
421
|
+
section_id: str = None,
|
422
|
+
position: int = None,
|
400
423
|
) -> Dict[str, Any]:
|
401
424
|
"""Logs test results information
|
402
425
|
|
@@ -406,8 +429,8 @@ async def log_test_result(
|
|
406
429
|
Args:
|
407
430
|
result (validmind.ThresholdTestResults): A ThresholdTestResults object
|
408
431
|
inputs (list): A list of input keys (names) that were used to run the test
|
409
|
-
|
410
|
-
|
432
|
+
section_id (str, optional): The section ID add a test driven block to the documentation
|
433
|
+
position (int): The position in the section to add the test driven block
|
411
434
|
|
412
435
|
Raises:
|
413
436
|
Exception: If the API call fails
|
@@ -415,10 +438,16 @@ async def log_test_result(
|
|
415
438
|
Returns:
|
416
439
|
dict: The response from the API
|
417
440
|
"""
|
441
|
+
params = {}
|
442
|
+
if section_id:
|
443
|
+
params["section_id"] = section_id
|
444
|
+
if position is not None:
|
445
|
+
params["position"] = position
|
446
|
+
|
418
447
|
try:
|
419
448
|
return await _post(
|
420
449
|
"log_test_results",
|
421
|
-
params=
|
450
|
+
params=params,
|
422
451
|
data=json.dumps(
|
423
452
|
{
|
424
453
|
**result.serialize(),
|
@@ -434,7 +463,7 @@ async def log_test_result(
|
|
434
463
|
|
435
464
|
|
436
465
|
def log_test_results(
|
437
|
-
results: List[ThresholdTestResults], inputs
|
466
|
+
results: List[ThresholdTestResults], inputs
|
438
467
|
) -> List[Callable[..., Dict[str, Any]]]:
|
439
468
|
"""Logs test results information
|
440
469
|
|
@@ -444,8 +473,6 @@ def log_test_results(
|
|
444
473
|
Args:
|
445
474
|
results (list): A list of ThresholdTestResults objects
|
446
475
|
inputs (list): A list of input keys (names) that were used to run the test
|
447
|
-
dataset_type (str, optional): The type of dataset. Can be one of "training",
|
448
|
-
"test", or "validation". Defaults to "training".
|
449
476
|
|
450
477
|
Raises:
|
451
478
|
Exception: If the API call fails
|
@@ -456,7 +483,7 @@ def log_test_results(
|
|
456
483
|
try:
|
457
484
|
responses = [] # TODO: use asyncio.gather
|
458
485
|
for result in results:
|
459
|
-
responses.append(run_async(log_test_result, result, inputs
|
486
|
+
responses.append(run_async(log_test_result, result, inputs))
|
460
487
|
except Exception as e:
|
461
488
|
logger.error("Error logging test results to ValidMind API")
|
462
489
|
raise e
|
@@ -339,7 +339,7 @@ def raise_api_error(error_string):
|
|
339
339
|
try:
|
340
340
|
json_response = json.loads(error_string)
|
341
341
|
api_code = json_response.get("code")
|
342
|
-
api_description = json_response.get("description")
|
342
|
+
api_description = json_response.get("description", json_response.get("message"))
|
343
343
|
except json.decoder.JSONDecodeError:
|
344
344
|
api_code = "unknown"
|
345
345
|
api_description = error_string
|
@@ -388,7 +388,7 @@ def describe_test(test_id: str = None, raw: bool = False, show: bool = True):
|
|
388
388
|
),
|
389
389
|
table_display="table" if details["Params"] else "none",
|
390
390
|
example_inputs=json.dumps(
|
391
|
-
{name: f"my_vm_{name}" for name in details["Required Inputs"]},
|
391
|
+
{name: f"my_vm_{name}" for name in (details["Required Inputs"] or [])},
|
392
392
|
indent=4,
|
393
393
|
),
|
394
394
|
example_params=json.dumps(details["Params"] or {}, indent=4, cls=NumpyEncoder),
|
@@ -15,6 +15,7 @@ import pandas as pd
|
|
15
15
|
|
16
16
|
from validmind.errors import MissingRequiredTestInputError
|
17
17
|
from validmind.logging import get_logger
|
18
|
+
from validmind.utils import get_description_metadata
|
18
19
|
from validmind.vm_models import (
|
19
20
|
Metric,
|
20
21
|
MetricResult,
|
@@ -113,20 +114,24 @@ def _build_result(results, test_id, description, output_template, inputs): # no
|
|
113
114
|
else:
|
114
115
|
process_item(results)
|
115
116
|
|
117
|
+
result_summary = ResultSummary(results=tables)
|
118
|
+
|
116
119
|
return MetricResultWrapper(
|
117
120
|
result_id=test_id,
|
118
121
|
metric=MetricResult(
|
119
122
|
key=test_id,
|
120
123
|
ref_id=ref_id,
|
121
124
|
value="Empty",
|
122
|
-
summary=
|
125
|
+
summary=result_summary,
|
123
126
|
),
|
124
127
|
figures=figures,
|
125
128
|
result_metadata=[
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
129
|
+
get_description_metadata(
|
130
|
+
test_id=test_id,
|
131
|
+
default_description=description,
|
132
|
+
summary=result_summary.serialize(),
|
133
|
+
figures=figures,
|
134
|
+
)
|
130
135
|
],
|
131
136
|
inputs=inputs,
|
132
137
|
output_template=output_template,
|
@@ -8,7 +8,7 @@ from uuid import uuid4
|
|
8
8
|
|
9
9
|
from ..logging import get_logger
|
10
10
|
from ..tests.decorator import _inspect_signature
|
11
|
-
from ..utils import run_async, test_id_to_name
|
11
|
+
from ..utils import get_description_metadata, run_async, test_id_to_name
|
12
12
|
from ..vm_models.test.metric import Metric
|
13
13
|
from ..vm_models.test.metric_result import MetricResult
|
14
14
|
from ..vm_models.test.result_summary import ResultSummary, ResultTable
|
@@ -200,13 +200,15 @@ def run_metrics(
|
|
200
200
|
</style>
|
201
201
|
"""
|
202
202
|
|
203
|
+
result_summary = ResultSummary(results=[ResultTable(data=[results])])
|
203
204
|
result_wrapper = MetricResultWrapper(
|
204
205
|
result_id=test_id,
|
205
206
|
result_metadata=[
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
207
|
+
get_description_metadata(
|
208
|
+
test_id=test_id,
|
209
|
+
default_description=description,
|
210
|
+
summary=result_summary.serialize(),
|
211
|
+
),
|
210
212
|
{
|
211
213
|
"content_id": f"composite_metric_def:{test_id}:unit_metrics",
|
212
214
|
"json": metric_ids,
|
@@ -222,7 +224,7 @@ def run_metrics(
|
|
222
224
|
key=test_id,
|
223
225
|
ref_id=str(uuid4()),
|
224
226
|
value=results,
|
225
|
-
summary=
|
227
|
+
summary=result_summary,
|
226
228
|
),
|
227
229
|
)
|
228
230
|
|
@@ -6,6 +6,7 @@ import asyncio
|
|
6
6
|
import difflib
|
7
7
|
import json
|
8
8
|
import math
|
9
|
+
import os
|
9
10
|
import re
|
10
11
|
import sys
|
11
12
|
from platform import python_version
|
@@ -25,6 +26,7 @@ from matplotlib.axes._axes import _log as matplotlib_axes_logger
|
|
25
26
|
from numpy import ndarray
|
26
27
|
from tabulate import tabulate
|
27
28
|
|
29
|
+
from .ai import generate_description
|
28
30
|
from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
|
29
31
|
|
30
32
|
DEFAULT_BIG_NUMBER_DECIMALS = 2
|
@@ -432,7 +434,9 @@ def display(widget_or_html, syntax_highlighting=True, mathjax=True):
|
|
432
434
|
def md_to_html(md: str, mathml=False) -> str:
|
433
435
|
"""Converts Markdown to HTML using mistune with plugins"""
|
434
436
|
# use mistune with math plugin to convert to html
|
435
|
-
html = mistune.create_markdown(
|
437
|
+
html = mistune.create_markdown(
|
438
|
+
plugins=["math", "table", "strikethrough", "footnotes"]
|
439
|
+
)(md)
|
436
440
|
|
437
441
|
if not mathml:
|
438
442
|
# return the html as is (with latex that will be rendered by MathJax)
|
@@ -453,3 +457,49 @@ def md_to_html(md: str, mathml=False) -> str:
|
|
453
457
|
)
|
454
458
|
|
455
459
|
return html
|
460
|
+
|
461
|
+
|
462
|
+
def get_description_metadata(
|
463
|
+
test_id,
|
464
|
+
default_description,
|
465
|
+
summary=None,
|
466
|
+
figures=None,
|
467
|
+
prefix="metric_description",
|
468
|
+
):
|
469
|
+
"""Get Metadata Dictionary for a Test or Metric Result
|
470
|
+
|
471
|
+
Generates an LLM interpretation of the test results or uses the default
|
472
|
+
description and returns a metadata object that can be logged with the test results.
|
473
|
+
|
474
|
+
To enable LLM-generated descriptions, set the VALIDMIND_LLM_DESCRIPTIONS_ENABLED
|
475
|
+
environment variable to "true". The default description will be used if LLM
|
476
|
+
descriptions are disabled.
|
477
|
+
|
478
|
+
Note: Either the summary or figures must be provided to generate the description.
|
479
|
+
|
480
|
+
Args:
|
481
|
+
test_id (str): The test ID
|
482
|
+
default_description (str): The default description for the test
|
483
|
+
summary (Any): The test summary or results to interpret
|
484
|
+
figures (List[Figure]): The figures to attach to the test suite result
|
485
|
+
prefix (str): The prefix to use for the content ID (Default: "metric_description")
|
486
|
+
|
487
|
+
Returns:
|
488
|
+
dict: The metadata object to be logged with the test results
|
489
|
+
"""
|
490
|
+
if os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower() == "true":
|
491
|
+
revision_name = "Generated by ValidMind AI"
|
492
|
+
description = generate_description(
|
493
|
+
test_name=test_id,
|
494
|
+
test_description=default_description,
|
495
|
+
test_summary=summary,
|
496
|
+
figures=figures,
|
497
|
+
)
|
498
|
+
else:
|
499
|
+
revision_name = "Default Description"
|
500
|
+
description = default_description
|
501
|
+
|
502
|
+
return {
|
503
|
+
"content_id": f"{prefix}:{test_id}::{revision_name}",
|
504
|
+
"text": description,
|
505
|
+
}
|
@@ -114,7 +114,7 @@ class VMDataset:
|
|
114
114
|
if model:
|
115
115
|
self.assign_predictions(model)
|
116
116
|
|
117
|
-
def _set_feature_columns(self, feature_columns):
|
117
|
+
def _set_feature_columns(self, feature_columns=None):
|
118
118
|
if feature_columns is not None and (
|
119
119
|
not isinstance(feature_columns, list)
|
120
120
|
or not all(isinstance(col, str) for col in feature_columns)
|
@@ -269,7 +269,9 @@ class VMDataset:
|
|
269
269
|
column_name (str): The name of the extra column.
|
270
270
|
column_values (np.ndarray, optional): The values of the extra column.
|
271
271
|
"""
|
272
|
-
if column_name not in self.columns and
|
272
|
+
if column_name not in self.columns and (
|
273
|
+
column_values is None or len(column_values) == 0
|
274
|
+
):
|
273
275
|
raise ValueError(
|
274
276
|
"Column values must be provided when the column doesn't exist in the dataset"
|
275
277
|
)
|
@@ -6,15 +6,14 @@
|
|
6
6
|
Class for storing ValidMind metric objects and associated
|
7
7
|
data for display and reporting purposes
|
8
8
|
"""
|
9
|
-
import os
|
10
9
|
from abc import abstractmethod
|
11
10
|
from dataclasses import dataclass
|
12
11
|
from typing import ClassVar, List, Optional, Union
|
13
12
|
|
14
13
|
import pandas as pd
|
15
14
|
|
16
|
-
from ...ai import generate_description
|
17
15
|
from ...errors import MissingCacheResultsArgumentsError
|
16
|
+
from ...utils import get_description_metadata
|
18
17
|
from ..figure import Figure
|
19
18
|
from .metric_result import MetricResult
|
20
19
|
from .result_wrapper import MetricResultWrapper
|
@@ -83,30 +82,16 @@ class Metric(Test):
|
|
83
82
|
summary=self.summary(metric_value),
|
84
83
|
)
|
85
84
|
|
86
|
-
if (
|
87
|
-
os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
|
88
|
-
== "true"
|
89
|
-
):
|
90
|
-
revision_name = "Generated by ValidMind AI"
|
91
|
-
description = generate_description(
|
92
|
-
test_name=self.test_id,
|
93
|
-
test_description=self.description().splitlines()[0],
|
94
|
-
test_results=metric.serialize()["value"],
|
95
|
-
test_summary=metric.serialize()["summary"],
|
96
|
-
figures=figures,
|
97
|
-
)
|
98
|
-
else:
|
99
|
-
revision_name = "Default Description"
|
100
|
-
description = self.description()
|
101
|
-
|
102
|
-
description_metadata = {
|
103
|
-
"content_id": f"metric_description:{self.test_id}::{revision_name}",
|
104
|
-
"text": description,
|
105
|
-
}
|
106
|
-
|
107
85
|
self.result = MetricResultWrapper(
|
108
86
|
result_id=self.test_id,
|
109
|
-
result_metadata=[
|
87
|
+
result_metadata=[
|
88
|
+
get_description_metadata(
|
89
|
+
test_id=self.test_id,
|
90
|
+
default_description=self.description(),
|
91
|
+
summary=metric.serialize()["summary"],
|
92
|
+
figures=figures,
|
93
|
+
)
|
94
|
+
],
|
110
95
|
metric=metric,
|
111
96
|
figures=figures,
|
112
97
|
inputs=self.get_accessed_inputs(),
|