validmind 2.2.6__tar.gz → 2.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {validmind-2.2.6 → validmind-2.3.3}/PKG-INFO +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/pyproject.toml +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/__init__.py +2 -1
- validmind-2.3.3/validmind/__version__.py +1 -0
- validmind-2.2.6/validmind/ai.py → validmind-2.3.3/validmind/ai/test_descriptions.py +74 -82
- validmind-2.3.3/validmind/ai/utils.py +104 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/api_client.py +58 -19
- {validmind-2.2.6 → validmind-2.3.3}/validmind/client.py +5 -5
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/foundation.py +10 -6
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/function.py +3 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/metadata.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/__init__.py +1 -9
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/regression.py +0 -16
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/statsmodels_timeseries.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/__init__.py +7 -7
- validmind-2.3.3/validmind/tests/__types__.py +170 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
- {validmind-2.2.6/validmind/tests/model_validation/statsmodels → validmind-2.3.3/validmind/tests/data_validation}/ADF.py +42 -13
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
- {validmind-2.2.6/validmind/tests/model_validation/statsmodels → validmind-2.3.3/validmind/tests/data_validation}/DFGLSArch.py +67 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
- {validmind-2.2.6/validmind/tests/model_validation/statsmodels → validmind-2.3.3/validmind/tests/data_validation}/KPSS.py +64 -11
- {validmind-2.2.6/validmind/tests/model_validation/statsmodels → validmind-2.3.3/validmind/tests/data_validation}/PhillipsPerronArch.py +65 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/ScatterPlot.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/SeasonalDecompose.py +12 -7
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/WOEBinPlots.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/WOEBinTable.py +1 -1
- {validmind-2.2.6/validmind/tests/model_validation/statsmodels → validmind-2.3.3/validmind/tests/data_validation}/ZivotAndrewsArch.py +65 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/CommonWords.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/Hashtags.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/Mentions.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/Punctuations.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/Sentiment.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/TextDescription.py +5 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/Toxicity.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/decorator.py +13 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/FeaturesAUC.py +5 -3
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ragas/utils.py +35 -9
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/Bias.py +14 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/Clarity.py +14 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/Conciseness.py +14 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/Delimitation.py +14 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/Robustness.py +11 -11
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/Specificity.py +14 -11
- validmind-2.3.3/validmind/tests/prompt_validation/ai_powered_test.py +69 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/composite.py +2 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/utils.py +34 -59
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/dataset/dataset.py +17 -3
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/dataset/utils.py +2 -2
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/model.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/metric.py +1 -8
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/result_wrapper.py +2 -2
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/test.py +3 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/threshold_test.py +1 -1
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test_suite/runner.py +7 -4
- validmind-2.2.6/validmind/__version__.py +0 -1
- validmind-2.2.6/validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
- validmind-2.2.6/validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
- validmind-2.2.6/validmind/tests/data_validation/PiTPDHistogram.py +0 -152
- validmind-2.2.6/validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
- validmind-2.2.6/validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
- validmind-2.2.6/validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
- validmind-2.2.6/validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
- validmind-2.2.6/validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
- validmind-2.2.6/validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
- validmind-2.2.6/validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
- validmind-2.2.6/validmind/tests/prompt_validation/ai_powered_test.py +0 -91
- {validmind-2.2.6 → validmind-2.3.3}/LICENSE +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/README.pypi.md +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/client_config.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/classification/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/classification/customer_churn.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/classification/taiwan_credit.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/cluster/digits.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/credit_risk/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/credit_risk/lending_club.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/llm/rag/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/llm/rag/rfp.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/nlp/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/california_housing.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/datasets/lending_club_loan_rates.csv +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/fred.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/lending_club.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/errors.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/html_templates/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/html_templates/content_blocks.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/input_registry.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/logging.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/huggingface.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/pipeline.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/pytorch.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/r_model.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/models/sklearn.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/template.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/classifier.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/cluster.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/embeddings.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/llm.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/nlp.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/parameters_optimization.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/summarization.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/tabular_datasets.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/text_data.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/test_suites/time_series.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/AutoAR.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/AutoMA.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/AutoStationarity.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/ClassImbalance.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/DatasetDescription.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/DatasetSplit.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/Duplicates.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/HighCardinality.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/MissingValues.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/Skewness.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/SpreadPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/UniqueRows.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/data_validation/nlp/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/BertScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/BleuScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ContextualRecall.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/MeteorScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ModelMetadata.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/RegardScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/RougeScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/TokenDisparity.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/ToxicityScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/prompt_validation/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/tests/test_providers.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/classification/sklearn/Accuracy.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/classification/sklearn/F1.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/classification/sklearn/Precision.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/classification/sklearn/Recall.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/dataset/__init__.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/figure.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/metric_result.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/output_template.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/result_summary.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test/threshold_test_result.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test_context.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test_suite/summary.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test_suite/test.py +0 -0
- {validmind-2.2.6 → validmind-2.3.3}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -60,7 +60,7 @@ from .client import ( # noqa: E402
|
|
60
60
|
run_documentation_tests,
|
61
61
|
run_test_suite,
|
62
62
|
)
|
63
|
-
from .tests.decorator import metric, tags, tasks
|
63
|
+
from .tests.decorator import metric, tags, tasks, test
|
64
64
|
from .utils import run_async # noqa: E402
|
65
65
|
|
66
66
|
|
@@ -112,6 +112,7 @@ __all__ = [ # noqa
|
|
112
112
|
"run_test_suite",
|
113
113
|
"tags",
|
114
114
|
"tasks",
|
115
|
+
"test",
|
115
116
|
"tests",
|
116
117
|
"test_suites",
|
117
118
|
"vm_models",
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "2.3.3"
|
@@ -2,17 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
import concurrent.futures
|
6
5
|
import os
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
7
7
|
|
8
|
-
from
|
8
|
+
from validmind.utils import md_to_html
|
9
9
|
|
10
|
-
from
|
10
|
+
from ..logging import get_logger
|
11
|
+
|
12
|
+
__executor = ThreadPoolExecutor()
|
11
13
|
|
12
14
|
logger = get_logger(__name__)
|
13
15
|
|
14
16
|
|
15
|
-
|
17
|
+
AI_REVISION_NAME = "Generated by ValidMind AI"
|
18
|
+
DEFAULT_REVISION_NAME = "Default Description"
|
19
|
+
|
20
|
+
SYSTEM_PROMPT = """ # noqa
|
16
21
|
You are an expert data scientist and MRM specialist.
|
17
22
|
You are tasked with analyzing the results of a quantitative test run on some model or dataset.
|
18
23
|
Your goal is to create a test description that will act as part of the model documentation.
|
@@ -20,13 +25,14 @@ You will provide both the developer and other consumers of the documentation wit
|
|
20
25
|
The overarching theme to maintain is MRM documentation.
|
21
26
|
|
22
27
|
Examine the provided statistical test results and compose a description of the results.
|
23
|
-
|
24
|
-
|
28
|
+
The results are either in the form of serialized tables or images of plots.
|
29
|
+
Compose a description and interpretation of the result to accompany it in MRM documentation.
|
30
|
+
It will be read by other data scientists and developers and by validators and stakeholders.
|
25
31
|
|
32
|
+
Use valid Markdown syntax to format the response.
|
26
33
|
Avoid long sentences and complex vocabulary.
|
27
34
|
Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
|
28
35
|
Structure the response clearly and logically.
|
29
|
-
Use valid Markdown syntax to format the response.
|
30
36
|
Respond only with your analysis and insights, not the verbatim test results.
|
31
37
|
Respond only with the markdown content, no explanation or context for your response is necessary.
|
32
38
|
Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
|
@@ -61,9 +67,9 @@ Test ID: `{test_name}`
|
|
61
67
|
{test_description}
|
62
68
|
</Test Docstring>
|
63
69
|
|
64
|
-
<Test Results
|
70
|
+
<Test Results Table(s)>
|
65
71
|
{test_summary}
|
66
|
-
</Test Results
|
72
|
+
</Test Results Table(s)>
|
67
73
|
""".strip()
|
68
74
|
|
69
75
|
|
@@ -77,57 +83,6 @@ Test ID: `{test_name}`
|
|
77
83
|
The attached plots show the results of the test.
|
78
84
|
""".strip()
|
79
85
|
|
80
|
-
__client = None
|
81
|
-
__model = None
|
82
|
-
|
83
|
-
# can be None, True or False (ternary to represent initial state, ack and failed ack)
|
84
|
-
__ack = None
|
85
|
-
|
86
|
-
__executor = concurrent.futures.ThreadPoolExecutor()
|
87
|
-
|
88
|
-
|
89
|
-
def __get_client_and_model():
|
90
|
-
"""Get model and client to use for generating interpretations
|
91
|
-
|
92
|
-
On first call, it will look in the environment for the API key endpoint, model etc.
|
93
|
-
and store them in a global variable to avoid loading them up again.
|
94
|
-
"""
|
95
|
-
global __client, __model
|
96
|
-
|
97
|
-
if __client and __model:
|
98
|
-
return __client, __model
|
99
|
-
|
100
|
-
if "OPENAI_API_KEY" in os.environ:
|
101
|
-
__client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
102
|
-
__model = os.getenv("VM_OPENAI_MODEL", "gpt-4o")
|
103
|
-
|
104
|
-
logger.debug(f"Using OpenAI {__model} for generating descriptions")
|
105
|
-
|
106
|
-
elif "AZURE_OPENAI_KEY" in os.environ:
|
107
|
-
if "AZURE_OPENAI_ENDPOINT" not in os.environ:
|
108
|
-
raise ValueError(
|
109
|
-
"AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
|
110
|
-
)
|
111
|
-
|
112
|
-
if "AZURE_OPENAI_MODEL" not in os.environ:
|
113
|
-
raise ValueError(
|
114
|
-
"AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
|
115
|
-
)
|
116
|
-
|
117
|
-
__client = AzureOpenAI(
|
118
|
-
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
119
|
-
api_key=os.getenv("AZURE_OPENAI_KEY"),
|
120
|
-
api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
|
121
|
-
)
|
122
|
-
__model = os.getenv("AZURE_OPENAI_MODEL")
|
123
|
-
|
124
|
-
logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
|
125
|
-
|
126
|
-
else:
|
127
|
-
raise ValueError("OPENAI_API_KEY or AZURE_OPENAI_KEY must be set")
|
128
|
-
|
129
|
-
return __client, __model
|
130
|
-
|
131
86
|
|
132
87
|
class DescriptionFuture:
|
133
88
|
"""This will be immediately returned from generate_description so that
|
@@ -142,8 +97,6 @@ class DescriptionFuture:
|
|
142
97
|
self._future = future
|
143
98
|
|
144
99
|
def get_description(self):
|
145
|
-
from .utils import md_to_html
|
146
|
-
|
147
100
|
if isinstance(self._future, str):
|
148
101
|
description = self._future
|
149
102
|
else:
|
@@ -163,7 +116,11 @@ def generate_description(
|
|
163
116
|
if not test_summary and not figures:
|
164
117
|
raise ValueError("No summary or figures provided - cannot generate description")
|
165
118
|
|
166
|
-
|
119
|
+
# TODO: fix circular import
|
120
|
+
from validmind.ai.utils import get_client_and_model
|
121
|
+
|
122
|
+
client, model = get_client_and_model()
|
123
|
+
|
167
124
|
# get last part of test id
|
168
125
|
test_name = test_id.split(".")[-1]
|
169
126
|
# truncate the test description to save time
|
@@ -255,26 +212,61 @@ def background_generate_description(
|
|
255
212
|
return DescriptionFuture(__executor.submit(wrapped))
|
256
213
|
|
257
214
|
|
258
|
-
def
|
259
|
-
|
215
|
+
def get_description_metadata(
|
216
|
+
test_id,
|
217
|
+
default_description,
|
218
|
+
summary=None,
|
219
|
+
figures=None,
|
220
|
+
prefix="metric_description",
|
221
|
+
):
|
222
|
+
"""Get Metadata Dictionary for a Test or Metric Result
|
223
|
+
|
224
|
+
Generates an LLM interpretation of the test results or uses the default
|
225
|
+
description and returns a metadata object that can be logged with the test results.
|
260
226
|
|
261
|
-
|
262
|
-
|
227
|
+
By default, the description is generated by an LLM that will interpret the test
|
228
|
+
results and provide a human-readable description. If the summary or figures are
|
229
|
+
not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
|
230
|
+
set to `0` or `false` or no LLM has been configured, the default description will
|
231
|
+
be used as the test result description.
|
263
232
|
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
)
|
272
|
-
|
273
|
-
|
233
|
+
Note: Either the summary or figures must be provided to generate the description.
|
234
|
+
|
235
|
+
Args:
|
236
|
+
test_id (str): The test ID
|
237
|
+
default_description (str): The default description for the test
|
238
|
+
summary (Any): The test summary or results to interpret
|
239
|
+
figures (List[Figure]): The figures to attach to the test suite result
|
240
|
+
prefix (str): The prefix to use for the content ID (Default: "metric_description")
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
dict: The metadata object to be logged with the test results
|
244
|
+
"""
|
245
|
+
env_disabled = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") in [
|
246
|
+
"0",
|
247
|
+
"false",
|
248
|
+
]
|
249
|
+
|
250
|
+
# TODO: fix circular import
|
251
|
+
from validmind.ai.utils import is_configured
|
252
|
+
|
253
|
+
if (summary or figures) and not env_disabled and is_configured():
|
254
|
+
revision_name = AI_REVISION_NAME
|
255
|
+
|
256
|
+
# get description future and set it as the description in the metadata
|
257
|
+
# this will lazily retrieved so it can run in the background in parallel
|
258
|
+
description = background_generate_description(
|
259
|
+
test_id=test_id,
|
260
|
+
test_description=default_description,
|
261
|
+
test_summary=summary,
|
262
|
+
figures=figures,
|
274
263
|
)
|
275
|
-
__ack = True
|
276
|
-
except Exception as e:
|
277
|
-
logger.debug(f"Failed to connect to OpenAI: {e}")
|
278
|
-
__ack = False
|
279
264
|
|
280
|
-
|
265
|
+
else:
|
266
|
+
revision_name = DEFAULT_REVISION_NAME
|
267
|
+
description = md_to_html(default_description, mathml=True)
|
268
|
+
|
269
|
+
return {
|
270
|
+
"content_id": f"{prefix}:{test_id}::{revision_name}",
|
271
|
+
"text": description,
|
272
|
+
}
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import os
|
6
|
+
|
7
|
+
from openai import AzureOpenAI, Client, OpenAI
|
8
|
+
|
9
|
+
from ..api_client import get_ai_key, get_api_host
|
10
|
+
from ..logging import get_logger
|
11
|
+
|
12
|
+
logger = get_logger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
__client = None
|
16
|
+
__model = None
|
17
|
+
# can be None, True or False (ternary to represent initial state, ack and failed ack)
|
18
|
+
__ack = None
|
19
|
+
|
20
|
+
|
21
|
+
def get_client_and_model():
|
22
|
+
"""Get model and client to use for generating interpretations
|
23
|
+
|
24
|
+
On first call, it will look in the environment for the API key endpoint, model etc.
|
25
|
+
and store them in a global variable to avoid loading them up again.
|
26
|
+
"""
|
27
|
+
global __client, __model
|
28
|
+
|
29
|
+
if __client and __model:
|
30
|
+
return __client, __model
|
31
|
+
|
32
|
+
if "OPENAI_API_KEY" in os.environ:
|
33
|
+
__client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
34
|
+
__model = os.getenv("VM_OPENAI_MODEL", "gpt-4o")
|
35
|
+
|
36
|
+
logger.debug(f"Using OpenAI {__model} for generating descriptions")
|
37
|
+
|
38
|
+
elif "AZURE_OPENAI_KEY" in os.environ:
|
39
|
+
if "AZURE_OPENAI_ENDPOINT" not in os.environ:
|
40
|
+
raise ValueError(
|
41
|
+
"AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
|
42
|
+
)
|
43
|
+
|
44
|
+
if "AZURE_OPENAI_MODEL" not in os.environ:
|
45
|
+
raise ValueError(
|
46
|
+
"AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
|
47
|
+
)
|
48
|
+
|
49
|
+
__client = AzureOpenAI(
|
50
|
+
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
51
|
+
api_key=os.getenv("AZURE_OPENAI_KEY"),
|
52
|
+
api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
|
53
|
+
)
|
54
|
+
__model = os.getenv("AZURE_OPENAI_MODEL")
|
55
|
+
|
56
|
+
logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
|
57
|
+
|
58
|
+
else:
|
59
|
+
try:
|
60
|
+
response = get_ai_key()
|
61
|
+
__client = Client(
|
62
|
+
base_url=(
|
63
|
+
# TODO: improve this to be a bit more dynamic
|
64
|
+
"http://localhost:4000/genai"
|
65
|
+
if "localhost" in get_api_host()
|
66
|
+
else f"{get_api_host()}/genai"
|
67
|
+
),
|
68
|
+
api_key=response["key"],
|
69
|
+
)
|
70
|
+
__model = "gpt-4o" # TODO: backend should tell us which model to use
|
71
|
+
logger.debug(f"Using ValidMind {__model} for generating descriptions")
|
72
|
+
except Exception as e:
|
73
|
+
logger.debug(f"Failed to get API key: {e}")
|
74
|
+
raise ValueError(
|
75
|
+
"OPENAI_API_KEY, AZURE_OPENAI_KEY must be set, or your account "
|
76
|
+
"must be setup to use ValidMind's LLM in order to use LLM features"
|
77
|
+
)
|
78
|
+
|
79
|
+
return __client, __model
|
80
|
+
|
81
|
+
|
82
|
+
def is_configured():
|
83
|
+
global __ack
|
84
|
+
|
85
|
+
if __ack:
|
86
|
+
return True
|
87
|
+
|
88
|
+
try:
|
89
|
+
client, model = get_client_and_model()
|
90
|
+
# send an empty message with max_tokens=1 to "ping" the API
|
91
|
+
response = client.chat.completions.create(
|
92
|
+
model=model,
|
93
|
+
messages=[{"role": "user", "content": ""}],
|
94
|
+
max_tokens=1,
|
95
|
+
)
|
96
|
+
logger.debug(
|
97
|
+
f"Received response from OpenAI: {response.choices[0].message.content}"
|
98
|
+
)
|
99
|
+
__ack = True
|
100
|
+
except Exception as e:
|
101
|
+
logger.debug(f"Failed to connect to OpenAI: {e}")
|
102
|
+
__ack = False
|
103
|
+
|
104
|
+
return __ack
|
@@ -11,9 +11,9 @@ import asyncio
|
|
11
11
|
import atexit
|
12
12
|
import json
|
13
13
|
import os
|
14
|
-
import urllib.parse
|
15
14
|
from io import BytesIO
|
16
15
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
16
|
+
from urllib.parse import urlencode, urljoin
|
17
17
|
|
18
18
|
import aiohttp
|
19
19
|
import requests
|
@@ -69,6 +69,14 @@ def get_api_project() -> Optional[str]:
|
|
69
69
|
return _project
|
70
70
|
|
71
71
|
|
72
|
+
def get_api_headers() -> Dict[str, str]:
|
73
|
+
return {
|
74
|
+
"X-API-KEY": _api_key,
|
75
|
+
"X-API-SECRET": _api_secret,
|
76
|
+
"X-PROJECT-CUID": _project,
|
77
|
+
}
|
78
|
+
|
79
|
+
|
72
80
|
def init(
|
73
81
|
project: Optional[str] = None,
|
74
82
|
api_key: Optional[str] = None,
|
@@ -97,10 +105,7 @@ def init(
|
|
97
105
|
# special case to detect when running a notebook with the standard init snippet
|
98
106
|
# will override with environment variables so we don't have to keep updating
|
99
107
|
# the notebook
|
100
|
-
api_host = None
|
101
|
-
api_key = None
|
102
|
-
api_secret = None
|
103
|
-
project = None
|
108
|
+
api_host = api_key = api_secret = project = None
|
104
109
|
|
105
110
|
_project = project or os.getenv("VM_API_PROJECT")
|
106
111
|
|
@@ -114,8 +119,9 @@ def init(
|
|
114
119
|
raise MissingAPICredentialsError()
|
115
120
|
|
116
121
|
_api_host = api_host or os.getenv(
|
117
|
-
"VM_API_HOST", "http://127.0.0.1:5000/api/v1/tracking"
|
122
|
+
"VM_API_HOST", "http://127.0.0.1:5000/api/v1/tracking/"
|
118
123
|
)
|
124
|
+
|
119
125
|
_run_cuid = os.getenv("VM_RUN_CUID", None)
|
120
126
|
|
121
127
|
try:
|
@@ -127,7 +133,7 @@ def init(
|
|
127
133
|
raise e
|
128
134
|
|
129
135
|
|
130
|
-
|
136
|
+
def _get_session() -> aiohttp.ClientSession:
|
131
137
|
"""Initializes the async client session"""
|
132
138
|
global __api_session
|
133
139
|
|
@@ -147,7 +153,7 @@ async def _get_session() -> aiohttp.ClientSession:
|
|
147
153
|
def __ping() -> Dict[str, Any]:
|
148
154
|
"""Validates that we can connect to the ValidMind API (does not use the async session)"""
|
149
155
|
r = requests.get(
|
150
|
-
|
156
|
+
__get_url("ping", should_start_run=False),
|
151
157
|
headers={
|
152
158
|
"X-API-KEY": _api_key,
|
153
159
|
"X-API-SECRET": _api_secret,
|
@@ -189,21 +195,35 @@ def reload():
|
|
189
195
|
raise e
|
190
196
|
|
191
197
|
|
192
|
-
|
193
|
-
|
194
|
-
|
198
|
+
def __get_url(
|
199
|
+
endpoint: str,
|
200
|
+
params: Optional[Dict[str, str]] = None,
|
201
|
+
should_start_run: bool = True,
|
202
|
+
) -> str:
|
203
|
+
global _api_host
|
195
204
|
|
196
205
|
params = params or {}
|
197
|
-
params["run_cuid"] = _run_cuid
|
198
206
|
|
199
|
-
|
207
|
+
if not _run_cuid and should_start_run:
|
208
|
+
start_run()
|
209
|
+
|
210
|
+
if should_start_run:
|
211
|
+
params["run_cuid"] = _run_cuid
|
212
|
+
|
213
|
+
if not _api_host.endswith("/"):
|
214
|
+
_api_host += "/"
|
215
|
+
|
216
|
+
if params:
|
217
|
+
return f"{urljoin(_api_host, endpoint)}?{urlencode(params)}"
|
218
|
+
|
219
|
+
return urljoin(_api_host, endpoint)
|
200
220
|
|
201
221
|
|
202
222
|
async def _get(
|
203
223
|
endpoint: str, params: Optional[Dict[str, str]] = None
|
204
224
|
) -> Dict[str, Any]:
|
205
|
-
url =
|
206
|
-
session =
|
225
|
+
url = __get_url(endpoint, params)
|
226
|
+
session = _get_session()
|
207
227
|
session.headers.update({"X-RUN-CUID": _run_cuid})
|
208
228
|
|
209
229
|
async with session.get(url) as r:
|
@@ -219,8 +239,8 @@ async def _post(
|
|
219
239
|
data: Optional[Union[dict, FormData]] = None,
|
220
240
|
files: Optional[Dict[str, Tuple[str, BytesIO, str]]] = None,
|
221
241
|
) -> Dict[str, Any]:
|
222
|
-
url =
|
223
|
-
session =
|
242
|
+
url = __get_url(endpoint, params)
|
243
|
+
session = _get_session()
|
224
244
|
session.headers.update({"X-RUN-CUID": _run_cuid})
|
225
245
|
|
226
246
|
if not isinstance(data, (dict)) and files is not None:
|
@@ -491,7 +511,7 @@ def log_test_results(
|
|
491
511
|
return responses
|
492
512
|
|
493
513
|
|
494
|
-
def
|
514
|
+
def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
495
515
|
"""Logs input information - internal use for now (don't expose via public API)
|
496
516
|
|
497
517
|
Args:
|
@@ -539,7 +559,7 @@ def start_run() -> str:
|
|
539
559
|
global _run_cuid
|
540
560
|
|
541
561
|
r = requests.post(
|
542
|
-
|
562
|
+
__get_url("start_run", should_start_run=False),
|
543
563
|
headers={
|
544
564
|
"X-API-KEY": _api_key,
|
545
565
|
"X-API-SECRET": _api_secret,
|
@@ -555,3 +575,22 @@ def start_run() -> str:
|
|
555
575
|
_run_cuid = test_run["cuid"]
|
556
576
|
|
557
577
|
return test_run["cuid"]
|
578
|
+
|
579
|
+
|
580
|
+
def get_ai_key() -> str:
|
581
|
+
"""Calls the api to get an api key for our LLM proxy"""
|
582
|
+
r = requests.get(
|
583
|
+
__get_url("ai/key", should_start_run=False),
|
584
|
+
headers={
|
585
|
+
"X-API-KEY": _api_key,
|
586
|
+
"X-API-SECRET": _api_secret,
|
587
|
+
"X-PROJECT-CUID": _project,
|
588
|
+
},
|
589
|
+
)
|
590
|
+
|
591
|
+
if r.status_code != 200:
|
592
|
+
# TODO: improve error handling when there's no Open AI API or AI key available
|
593
|
+
# logger.error("Could not get AI key from ValidMind API")
|
594
|
+
raise_api_error(r.text)
|
595
|
+
|
596
|
+
return r.json()
|
@@ -9,7 +9,7 @@ Client interface for all data and model validation functions
|
|
9
9
|
import pandas as pd
|
10
10
|
import polars as pl
|
11
11
|
|
12
|
-
from .api_client import
|
12
|
+
from .api_client import log_input as log_input
|
13
13
|
from .client_config import client_config
|
14
14
|
from .errors import (
|
15
15
|
GetTestSuiteError,
|
@@ -180,6 +180,7 @@ def init_model(
|
|
180
180
|
attributes: dict = None,
|
181
181
|
predict_fn: callable = None,
|
182
182
|
__log=True,
|
183
|
+
**kwargs,
|
183
184
|
) -> VMModel:
|
184
185
|
"""
|
185
186
|
Initializes a VM Model, which can then be passed to other functions
|
@@ -194,6 +195,7 @@ def init_model(
|
|
194
195
|
this to the same key.
|
195
196
|
attributes (dict): A dictionary of model attributes
|
196
197
|
predict_fn (callable): A function that takes an input and returns a prediction
|
198
|
+
**kwargs: Additional arguments to pass to the model
|
197
199
|
|
198
200
|
Raises:
|
199
201
|
ValueError: If the model type is not supported
|
@@ -246,6 +248,7 @@ def init_model(
|
|
246
248
|
input_id=input_id,
|
247
249
|
model=model, # Trained model instance
|
248
250
|
predict_fn=predict_fn,
|
251
|
+
**kwargs,
|
249
252
|
)
|
250
253
|
metadata = get_model_info(vm_model)
|
251
254
|
else:
|
@@ -351,10 +354,7 @@ def get_test_suite(
|
|
351
354
|
)
|
352
355
|
|
353
356
|
return get_template_test_suite(
|
354
|
-
client_config.documentation_template,
|
355
|
-
section=section,
|
356
|
-
*args,
|
357
|
-
**kwargs,
|
357
|
+
client_config.documentation_template, section=section
|
358
358
|
)
|
359
359
|
|
360
360
|
return get_test_suite_by_id(test_suite_id)(*args, **kwargs)
|
@@ -15,7 +15,7 @@ logger = get_logger(__name__)
|
|
15
15
|
@dataclass
|
16
16
|
class Prompt:
|
17
17
|
template: str
|
18
|
-
variables: list
|
18
|
+
variables: list = None
|
19
19
|
|
20
20
|
|
21
21
|
class FoundationModel(FunctionModel):
|
@@ -33,17 +33,21 @@ class FoundationModel(FunctionModel):
|
|
33
33
|
"""
|
34
34
|
|
35
35
|
def __post_init__(self):
|
36
|
-
|
37
|
-
raise ValueError("FoundationModel requires a callable predict_fn")
|
36
|
+
super().__post_init__()
|
38
37
|
|
39
|
-
|
38
|
+
if not hasattr(self, "prompt") or not isinstance(self.prompt, Prompt):
|
39
|
+
raise ValueError("FoundationModel requires a Prompt object")
|
40
40
|
|
41
41
|
def _build_prompt(self, x: pd.DataFrame):
|
42
42
|
"""
|
43
43
|
Builds the prompt for the model
|
44
44
|
"""
|
45
|
-
return
|
46
|
-
|
45
|
+
return (
|
46
|
+
self.prompt.template.format(
|
47
|
+
**{key: x[key] for key in self.prompt.variables}
|
48
|
+
)
|
49
|
+
if self.prompt.variables
|
50
|
+
else self.prompt.template
|
47
51
|
)
|
48
52
|
|
49
53
|
def predict(self, X: pd.DataFrame):
|
@@ -31,10 +31,12 @@ class FunctionModel(VMModel):
|
|
31
31
|
input features and return a prediction.
|
32
32
|
input_id (str, optional): The input ID for the model. Defaults to None.
|
33
33
|
name (str, optional): The name of the model. Defaults to the name of the predict_fn.
|
34
|
+
prompt (Prompt, optional): If using a prompt, the prompt object that defines the template
|
35
|
+
and the variables (if any). Defaults to None.
|
34
36
|
"""
|
35
37
|
|
36
38
|
def __post_init__(self):
|
37
|
-
if not
|
39
|
+
if not hasattr(self, "predict_fn") or not callable(self.predict_fn):
|
38
40
|
raise ValueError("FunctionModel requires a callable predict_fn")
|
39
41
|
|
40
42
|
self.name = self.name or self.predict_fn.__name__
|
@@ -25,12 +25,7 @@ from .embeddings import EmbeddingsFullSuite, EmbeddingsMetrics, EmbeddingsPerfor
|
|
25
25
|
from .llm import LLMClassifierFullSuite, PromptValidation
|
26
26
|
from .nlp import NLPClassifierFullSuite
|
27
27
|
from .parameters_optimization import KmeansParametersOptimization
|
28
|
-
from .regression import
|
29
|
-
RegressionFullSuite,
|
30
|
-
RegressionMetrics,
|
31
|
-
RegressionModelsComparison,
|
32
|
-
RegressionPerformance,
|
33
|
-
)
|
28
|
+
from .regression import RegressionFullSuite, RegressionMetrics, RegressionPerformance
|
34
29
|
from .statsmodels_timeseries import (
|
35
30
|
RegressionModelDescription,
|
36
31
|
RegressionModelsEvaluation,
|
@@ -72,7 +67,6 @@ core_test_suites = {
|
|
72
67
|
RegressionMetrics.suite_id: RegressionMetrics,
|
73
68
|
RegressionModelDescription.suite_id: RegressionModelDescription,
|
74
69
|
RegressionModelsEvaluation.suite_id: RegressionModelsEvaluation,
|
75
|
-
RegressionModelsComparison.suite_id: RegressionModelsComparison,
|
76
70
|
RegressionFullSuite.suite_id: RegressionFullSuite,
|
77
71
|
RegressionPerformance.suite_id: RegressionPerformance,
|
78
72
|
SummarizationMetrics.suite_id: SummarizationMetrics,
|
@@ -188,7 +182,6 @@ def describe_suite(test_suite_id: str, verbose=False):
|
|
188
182
|
"Test Suite Section": "",
|
189
183
|
"Test ID": item,
|
190
184
|
"Test Name": test.__name__,
|
191
|
-
"Test Type": test.test_type,
|
192
185
|
}
|
193
186
|
)
|
194
187
|
elif isinstance(item, dict):
|
@@ -201,7 +194,6 @@ def describe_suite(test_suite_id: str, verbose=False):
|
|
201
194
|
"Test Suite Section": item["section_id"],
|
202
195
|
"Test ID": test_id,
|
203
196
|
"Test Name": test_id_to_name(test_id),
|
204
|
-
"Test Type": test.test_type,
|
205
197
|
}
|
206
198
|
)
|
207
199
|
else:
|