validmind 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/utils.py +4 -24
  3. validmind/api_client.py +6 -17
  4. validmind/logging.py +48 -0
  5. validmind/models/function.py +11 -3
  6. validmind/tests/__init__.py +2 -0
  7. validmind/tests/__types__.py +18 -0
  8. validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
  9. validmind/tests/data_validation/ADF.py +3 -1
  10. validmind/tests/data_validation/AutoAR.py +3 -1
  11. validmind/tests/data_validation/AutoMA.py +5 -1
  12. validmind/tests/data_validation/AutoStationarity.py +5 -1
  13. validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
  14. validmind/tests/data_validation/BoxPierce.py +4 -1
  15. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  16. validmind/tests/data_validation/ClassImbalance.py +1 -1
  17. validmind/tests/data_validation/DatasetDescription.py +4 -1
  18. validmind/tests/data_validation/DatasetSplit.py +3 -2
  19. validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
  20. validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
  21. validmind/tests/data_validation/Duplicates.py +3 -1
  22. validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
  23. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  24. validmind/tests/data_validation/HighCardinality.py +3 -1
  25. validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
  26. validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
  27. validmind/tests/data_validation/IQROutliersTable.py +6 -1
  28. validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
  29. validmind/tests/data_validation/JarqueBera.py +3 -1
  30. validmind/tests/data_validation/KPSS.py +3 -1
  31. validmind/tests/data_validation/LJungBox.py +3 -1
  32. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
  33. validmind/tests/data_validation/MissingValues.py +5 -1
  34. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  35. validmind/tests/data_validation/MutualInformation.py +4 -1
  36. validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
  37. validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
  38. validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
  39. validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
  40. validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
  41. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
  42. validmind/tests/data_validation/RollingStatsPlot.py +5 -1
  43. validmind/tests/data_validation/RunsTest.py +1 -1
  44. validmind/tests/data_validation/ScatterPlot.py +2 -1
  45. validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
  46. validmind/tests/data_validation/SeasonalDecompose.py +6 -1
  47. validmind/tests/data_validation/ShapiroWilk.py +4 -1
  48. validmind/tests/data_validation/Skewness.py +3 -1
  49. validmind/tests/data_validation/SpreadPlot.py +3 -1
  50. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
  51. validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
  52. validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
  53. validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
  54. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  55. validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
  56. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
  57. validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
  58. validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
  59. validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
  60. validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
  61. validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
  62. validmind/tests/data_validation/TooManyZeroValues.py +6 -1
  63. validmind/tests/data_validation/UniqueRows.py +5 -1
  64. validmind/tests/data_validation/WOEBinPlots.py +4 -1
  65. validmind/tests/data_validation/WOEBinTable.py +5 -1
  66. validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
  67. validmind/tests/data_validation/nlp/CommonWords.py +2 -1
  68. validmind/tests/data_validation/nlp/Hashtags.py +2 -1
  69. validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
  70. validmind/tests/data_validation/nlp/Mentions.py +3 -1
  71. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
  72. validmind/tests/data_validation/nlp/Punctuations.py +2 -1
  73. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  74. validmind/tests/data_validation/nlp/StopWords.py +2 -1
  75. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  76. validmind/tests/data_validation/nlp/Toxicity.py +3 -1
  77. validmind/tests/load.py +91 -17
  78. validmind/tests/model_validation/BertScore.py +6 -3
  79. validmind/tests/model_validation/BleuScore.py +6 -1
  80. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
  81. validmind/tests/model_validation/ContextualRecall.py +6 -1
  82. validmind/tests/model_validation/FeaturesAUC.py +5 -1
  83. validmind/tests/model_validation/MeteorScore.py +6 -1
  84. validmind/tests/model_validation/ModelMetadata.py +2 -1
  85. validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
  86. validmind/tests/model_validation/RegardScore.py +7 -1
  87. validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
  88. validmind/tests/model_validation/RougeScore.py +8 -1
  89. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
  90. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
  91. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
  92. validmind/tests/model_validation/TokenDisparity.py +6 -1
  93. validmind/tests/model_validation/ToxicityScore.py +6 -1
  94. validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
  95. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
  96. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
  97. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
  98. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
  99. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
  100. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
  101. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
  102. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
  103. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
  104. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  105. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
  106. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  107. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
  108. validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
  109. validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
  110. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
  111. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
  112. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
  113. validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
  114. validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
  115. validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
  116. validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
  117. validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
  118. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
  119. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
  120. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
  121. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
  122. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  123. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
  124. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
  125. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
  126. validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
  127. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
  128. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
  129. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
  130. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
  131. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
  132. validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
  133. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
  134. validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
  135. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
  136. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
  137. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
  138. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
  139. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
  140. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  141. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
  142. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
  143. validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
  144. validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
  145. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
  146. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
  147. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
  148. validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
  149. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  150. validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
  151. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
  152. validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
  153. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
  154. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  155. validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
  156. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
  157. validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
  158. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  159. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
  160. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
  161. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
  162. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
  163. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
  164. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
  165. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  166. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
  167. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
  168. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
  169. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
  170. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
  171. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
  172. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
  173. validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
  174. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
  175. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
  176. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
  177. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
  178. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
  179. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
  180. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
  181. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
  182. validmind/tests/output.py +9 -2
  183. validmind/tests/plots/BoxPlot.py +260 -0
  184. validmind/tests/plots/CorrelationHeatmap.py +235 -0
  185. validmind/tests/plots/HistogramPlot.py +233 -0
  186. validmind/tests/plots/ViolinPlot.py +125 -0
  187. validmind/tests/plots/__init__.py +0 -0
  188. validmind/tests/prompt_validation/Bias.py +5 -1
  189. validmind/tests/prompt_validation/Clarity.py +5 -1
  190. validmind/tests/prompt_validation/Conciseness.py +5 -1
  191. validmind/tests/prompt_validation/Delimitation.py +5 -1
  192. validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
  193. validmind/tests/prompt_validation/Robustness.py +5 -1
  194. validmind/tests/prompt_validation/Specificity.py +5 -1
  195. validmind/tests/stats/CorrelationAnalysis.py +251 -0
  196. validmind/tests/stats/DescriptiveStats.py +197 -0
  197. validmind/tests/stats/NormalityTests.py +147 -0
  198. validmind/tests/stats/OutlierDetection.py +173 -0
  199. validmind/tests/stats/__init__.py +0 -0
  200. validmind/unit_metrics/classification/Accuracy.py +2 -1
  201. validmind/unit_metrics/classification/F1.py +2 -1
  202. validmind/unit_metrics/classification/Precision.py +2 -1
  203. validmind/unit_metrics/classification/ROC_AUC.py +2 -1
  204. validmind/unit_metrics/classification/Recall.py +2 -1
  205. validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
  206. validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
  207. validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
  208. validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
  209. validmind/unit_metrics/classification/individual/Confidence.py +52 -0
  210. validmind/unit_metrics/classification/individual/Correctness.py +41 -0
  211. validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
  212. validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
  213. validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
  214. validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
  215. validmind/unit_metrics/classification/individual/__init__.py +0 -0
  216. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
  217. validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
  218. validmind/unit_metrics/regression/HuberLoss.py +2 -1
  219. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
  220. validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
  221. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
  222. validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
  223. validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
  224. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  225. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  226. validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
  227. validmind/vm_models/dataset/dataset.py +291 -38
  228. validmind/vm_models/result/result.py +26 -4
  229. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/METADATA +2 -2
  230. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/RECORD +233 -212
  231. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/LICENSE +0 -0
  232. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/WHEEL +0 -0
  233. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -2,17 +2,24 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
  from scipy.stats import norm
9
11
 
10
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("model_predictions", "visualization")
14
17
  @tasks("regression", "time_series_forecasting")
15
- def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
18
+ def TimeSeriesPredictionWithCI(
19
+ dataset: VMDataset,
20
+ model: VMModel,
21
+ confidence: float = 0.95,
22
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
23
  """
17
24
  Assesses predictive accuracy and uncertainty in time series models, highlighting breaches beyond confidence
18
25
  intervals.
@@ -2,14 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.graph_objects as go
6
8
 
7
9
  from validmind import RawData, tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
8
11
 
9
12
 
10
13
  @tags("model_predictions", "visualization")
11
14
  @tasks("regression", "time_series_forecasting")
12
- def TimeSeriesPredictionsPlot(dataset, model):
15
+ def TimeSeriesPredictionsPlot(
16
+ dataset: VMDataset,
17
+ model: VMModel,
18
+ ) -> Tuple[go.Figure, RawData]:
13
19
  """
14
20
  Plot actual vs predicted values for time series data and generate a visual comparison for the model.
15
21
 
@@ -2,17 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Optional, Tuple
5
6
 
6
7
  import pandas as pd
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from sklearn import metrics
9
11
 
10
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("model_performance", "sklearn")
14
17
  @tasks("regression", "time_series_forecasting")
15
- def TimeSeriesR2SquareBySegments(dataset, model, segments=None):
18
+ def TimeSeriesR2SquareBySegments(
19
+ dataset: VMDataset, model: VMModel, segments: Optional[int] = None
20
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
21
  """
17
22
  Evaluates the R-Squared values of regression models over specified time segments in time series data to assess
18
23
  segment-wise model performance.
@@ -2,15 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
8
10
  from validmind import RawData, tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
9
12
 
10
13
 
11
14
  @tags("nlp", "text_data", "visualization")
12
15
  @tasks("text_classification", "text_summarization")
13
- def TokenDisparity(dataset, model):
16
+ def TokenDisparity(
17
+ dataset: VMDataset, model: VMModel
18
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
14
19
  """
15
20
  Evaluates the token disparity between reference and generated texts, visualizing the results through histograms and
16
21
  bar charts, alongside compiling a comprehensive table of descriptive statistics for token counts.
@@ -2,16 +2,21 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import evaluate
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
12
+ from validmind.vm_models import VMDataset, VMModel
10
13
 
11
14
 
12
15
  @tags("nlp", "text_data", "visualization")
13
16
  @tasks("text_classification", "text_summarization")
14
- def ToxicityScore(dataset, model):
17
+ def ToxicityScore(
18
+ dataset: VMDataset, model: VMModel
19
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
15
20
  """
16
21
  Assesses the toxicity levels of texts generated by NLP models to identify and mitigate harmful or offensive content.
17
22
 
@@ -2,7 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
6
9
  from sklearn.cluster import KMeans
7
10
 
8
11
  from validmind import RawData, tags, tasks
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
  @tags("llm", "text_data", "embeddings", "visualization")
13
16
  @tasks("feature_extraction")
14
- def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int = 5):
17
+ def ClusterDistribution(
18
+ model: VMModel, dataset: VMDataset, num_clusters: int = 5
19
+ ) -> Tuple[go.Figure, RawData]:
15
20
  """
16
21
  Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering.
17
22
 
@@ -3,18 +3,23 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  from itertools import combinations
6
+ from typing import List, Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.metrics.pairwise import cosine_similarity
11
13
 
12
14
  from validmind import RawData, tags, tasks
15
+ from validmind.vm_models import VMDataset, VMModel
13
16
 
14
17
 
15
18
  @tags("visualization", "dimensionality_reduction", "embeddings")
16
19
  @tasks("text_qa", "text_generation", "text_summarization")
17
- def CosineSimilarityComparison(dataset, models):
20
+ def CosineSimilarityComparison(
21
+ dataset: VMDataset, models: List[VMModel]
22
+ ) -> Tuple[go.Figure, RawData, pd.DataFrame]:
18
23
  """
19
24
  Assesses the similarity between embeddings generated by different models using Cosine Similarity, providing both
20
25
  statistical and visual insights.
@@ -2,7 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
6
9
  from sklearn.metrics.pairwise import cosine_similarity
7
10
 
8
11
  from validmind import RawData, tags, tasks
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
  @tags("llm", "text_data", "embeddings", "visualization")
13
16
  @tasks("feature_extraction")
14
- def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
17
+ def CosineSimilarityDistribution(
18
+ dataset: VMDataset, model: VMModel
19
+ ) -> Tuple[go.Figure, RawData]:
15
20
  """
16
21
  Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution
17
22
  histogram.
@@ -2,24 +2,28 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
7
10
  from sklearn.metrics.pairwise import cosine_similarity
8
11
 
9
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
10
14
 
11
15
 
12
16
  @tags("visualization", "dimensionality_reduction", "embeddings")
13
17
  @tasks("text_qa", "text_generation", "text_summarization")
14
18
  def CosineSimilarityHeatmap(
15
- dataset,
16
- model,
19
+ dataset: VMDataset,
20
+ model: VMModel,
17
21
  title="Cosine Similarity Matrix",
18
22
  color="Cosine Similarity",
19
23
  xaxis_title="Index",
20
24
  yaxis_title="Index",
21
25
  color_scale="Blues",
22
- ):
26
+ ) -> Tuple[go.Figure, RawData]:
23
27
  """
24
28
  Generates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model.
25
29
 
@@ -2,8 +2,11 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
7
10
 
8
11
  from validmind import RawData, tags, tasks
9
12
  from validmind.vm_models import VMDataset, VMModel
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
  @tags("llm", "text_data", "embeddings", "visualization")
13
16
  @tasks("feature_extraction")
14
- def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
17
+ def DescriptiveAnalytics(
18
+ dataset: VMDataset, model: VMModel
19
+ ) -> Tuple[go.Figure, go.Figure, go.Figure, RawData]:
15
20
  """
16
21
  Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation
17
22
  histograms.
@@ -2,9 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import Union
5
+ from typing import Tuple, Union
6
6
 
7
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
8
9
  from sklearn.manifold import TSNE
9
10
 
10
11
  from validmind import RawData, tags, tasks
@@ -17,11 +18,11 @@ logger = get_logger(__name__)
17
18
  @tags("llm", "text_data", "embeddings", "visualization")
18
19
  @tasks("feature_extraction")
19
20
  def EmbeddingsVisualization2D(
20
- model: VMModel,
21
21
  dataset: VMDataset,
22
+ model: VMModel,
22
23
  cluster_column: Union[str, None] = None,
23
24
  perplexity: int = 30,
24
- ):
25
+ ) -> Tuple[go.Figure, RawData]:
25
26
  """
26
27
  Visualizes 2D representation of text embeddings generated by a model using t-SNE technique.
27
28
 
@@ -3,18 +3,23 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  from itertools import combinations
6
+ from typing import List, Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.metrics.pairwise import euclidean_distances
11
13
 
12
14
  from validmind import RawData, tags, tasks
15
+ from validmind.vm_models import VMDataset, VMModel
13
16
 
14
17
 
15
18
  @tags("visualization", "dimensionality_reduction", "embeddings")
16
19
  @tasks("text_qa", "text_generation", "text_summarization")
17
- def EuclideanDistanceComparison(dataset, models):
20
+ def EuclideanDistanceComparison(
21
+ dataset: VMDataset, models: List[VMModel]
22
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
18
23
  """
19
24
  Assesses and visualizes the dissimilarity between model embeddings using Euclidean distance, providing insights
20
25
  into model behavior and potential redundancy or diversity.
@@ -2,24 +2,28 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
7
10
  from sklearn.metrics.pairwise import euclidean_distances
8
11
 
9
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
10
14
 
11
15
 
12
16
  @tags("visualization", "dimensionality_reduction", "embeddings")
13
17
  @tasks("text_qa", "text_generation", "text_summarization")
14
18
  def EuclideanDistanceHeatmap(
15
- dataset,
16
- model,
19
+ dataset: VMDataset,
20
+ model: VMModel,
17
21
  title="Euclidean Distance Matrix",
18
22
  color="Euclidean Distance",
19
23
  xaxis_title="Index",
20
24
  yaxis_title="Index",
21
25
  color_scale="Blues",
22
- ):
26
+ ) -> Tuple[go.Figure, RawData]:
23
27
  """
24
28
  Generates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model.
25
29
 
@@ -3,19 +3,24 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import itertools
6
+ from typing import Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.decomposition import PCA
11
13
  from sklearn.preprocessing import StandardScaler
12
14
 
13
15
  from validmind import RawData, tags, tasks
16
+ from validmind.vm_models import VMDataset, VMModel
14
17
 
15
18
 
16
19
  @tags("visualization", "dimensionality_reduction", "embeddings")
17
20
  @tasks("text_qa", "text_generation", "text_summarization")
18
- def PCAComponentsPairwisePlots(dataset, model, n_components=3):
21
+ def PCAComponentsPairwisePlots(
22
+ dataset: VMDataset, model: VMModel, n_components: int = 3
23
+ ) -> Tuple[go.Figure, RawData]:
19
24
  """
20
25
  Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model
21
26
  embeddings.
@@ -3,7 +3,10 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import re
6
- from typing import Dict
6
+ from typing import Dict, Tuple
7
+
8
+ import pandas as pd
9
+ import plotly.graph_objects as go
7
10
 
8
11
  from validmind import RawData, tags, tasks
9
12
  from validmind.vm_models import VMDataset, VMModel
@@ -18,7 +21,7 @@ def StabilityAnalysisKeyword(
18
21
  model: VMModel,
19
22
  keyword_dict: Dict[str, str],
20
23
  mean_similarity_threshold: float = 0.7,
21
- ):
24
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
22
25
  """
23
26
  Evaluates robustness of embedding models to keyword swaps in the test dataset.
24
27
 
@@ -4,6 +4,10 @@
4
4
 
5
5
  import random
6
6
  import string
7
+ from typing import Tuple
8
+
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
7
11
 
8
12
  from validmind import RawData, tags, tasks
9
13
  from validmind.vm_models import VMDataset, VMModel
@@ -69,7 +73,7 @@ def StabilityAnalysisRandomNoise(
69
73
  model: VMModel,
70
74
  probability: float = 0.02,
71
75
  mean_similarity_threshold: float = 0.7,
72
- ):
76
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
73
77
  """
74
78
  Assesses the robustness of text embeddings models to random noise introduced via text perturbations.
75
79
 
@@ -3,8 +3,11 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import random
6
+ from typing import Tuple
6
7
 
7
8
  import nltk
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
8
11
  from nltk.corpus import wordnet as wn
9
12
 
10
13
  from validmind import RawData, tags, tasks
@@ -20,7 +23,7 @@ def StabilityAnalysisSynonyms(
20
23
  model: VMModel,
21
24
  probability: float = 0.02,
22
25
  mean_similarity_threshold: float = 0.7,
23
- ):
26
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
24
27
  """
25
28
  Evaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly.
26
29
 
@@ -2,6 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
7
+ import pandas as pd
8
+ import plotly.graph_objects as go
5
9
  from transformers import MarianMTModel, MarianTokenizer
6
10
 
7
11
  from validmind import RawData, tags, tasks
@@ -21,7 +25,7 @@ def StabilityAnalysisTranslation(
21
25
  source_lang: str = "en",
22
26
  target_lang: str = "fr",
23
27
  mean_similarity_threshold: float = 0.7,
24
- ):
28
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
25
29
  """
26
30
  Evaluates robustness of text embeddings models to noise introduced by translating the original text to another
27
31
  language and back.
@@ -3,25 +3,28 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import itertools
6
+ from typing import Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.manifold import TSNE
11
13
  from sklearn.preprocessing import StandardScaler
12
14
 
13
15
  from validmind import RawData, tags, tasks
16
+ from validmind.vm_models import VMDataset, VMModel
14
17
 
15
18
 
16
19
  @tags("visualization", "dimensionality_reduction", "embeddings")
17
20
  @tasks("text_qa", "text_generation", "text_summarization")
18
21
  def TSNEComponentsPairwisePlots(
19
- dataset,
20
- model,
21
- n_components=2,
22
- perplexity=30,
23
- title="t-SNE",
24
- ):
22
+ dataset: VMDataset,
23
+ model: VMModel,
24
+ n_components: int = 2,
25
+ perplexity: int = 30,
26
+ title: str = "t-SNE",
27
+ ) -> Tuple[go.Figure, RawData]:
25
28
  """
26
29
  Creates scatter plots for pairwise combinations of t-SNE components to visualize embeddings and highlight potential
27
30
  clustering structures.
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def AnswerCorrectness(
33
- dataset,
34
- user_input_column="user_input",
35
- response_column="response",
36
- reference_column="reference",
36
+ dataset: VMDataset,
37
+ user_input_column: str = "user_input",
38
+ response_column: str = "response",
39
+ reference_column: str = "reference",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ):
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Evaluates the correctness of answers in a dataset with respect to the provided ground
42
45
  truths and visualizes the results in a histogram.
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, List, Optional, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -39,21 +42,21 @@ LOWER_IS_BETTER_ASPECTS = ["harmfulness", "maliciousness"]
39
42
  @tags("ragas", "llm", "qualitative")
40
43
  @tasks("text_summarization", "text_generation", "text_qa")
41
44
  def AspectCritic(
42
- dataset,
43
- user_input_column="user_input",
44
- response_column="response",
45
- retrieved_contexts_column=None,
46
- aspects: list = [
45
+ dataset: VMDataset,
46
+ user_input_column: str = "user_input",
47
+ response_column: str = "response",
48
+ retrieved_contexts_column: Optional[str] = None,
49
+ aspects: List[str] = [
47
50
  "coherence",
48
51
  "conciseness",
49
52
  "correctness",
50
53
  "harmfulness",
51
54
  "maliciousness",
52
55
  ],
53
- additional_aspects: list = None,
56
+ additional_aspects: Optional[List[Tuple[str, str]]] = None,
54
57
  judge_llm=None,
55
58
  judge_embeddings=None,
56
- ):
59
+ ) -> Tuple[Dict[str, list], go.Figure, RawData]:
57
60
  """
58
61
  Evaluates generations against the following aspects: harmfulness, maliciousness,
59
62
  coherence, correctness, and conciseness.
@@ -146,8 +149,8 @@ def AspectCritic(
146
149
 
147
150
  if retrieved_contexts_column:
148
151
  required_columns["retrieved_contexts"] = retrieved_contexts_column
149
-
150
152
  df = get_renamed_columns(dataset._df, required_columns)
153
+ df = df[required_columns.keys()]
151
154
 
152
155
  custom_aspects = (
153
156
  [
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,12 +33,12 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def ContextEntityRecall(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  retrieved_contexts_column: str = "retrieved_contexts",
35
38
  reference_column: str = "reference",
36
39
  judge_llm=None,
37
40
  judge_embeddings=None,
38
- ):
41
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
39
42
  """
40
43
  Evaluates the context entity recall for dataset entries and visualizes the results.
41
44
 
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization", "text_classification")
32
35
  def ContextPrecision(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  user_input_column: str = "user_input",
35
38
  retrieved_contexts_column: str = "retrieved_contexts",
36
39
  reference_column: str = "reference",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ): # noqa: B950
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Context Precision is a metric that evaluates whether all of the ground-truth
42
45
  relevant items present in the contexts are ranked higher or not. Ideally all the
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization", "text_classification")
32
35
  def ContextPrecisionWithoutReference(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  user_input_column: str = "user_input",
35
38
  retrieved_contexts_column: str = "retrieved_contexts",
36
39
  response_column: str = "response",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ): # noqa: B950
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Context Precision Without Reference is a metric used to evaluate the relevance of
42
45
  retrieved contexts compared to the expected response for a given user input. This