validmind 2.8.27__py3-none-any.whl → 2.8.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/utils.py +1 -1
  3. validmind/models/function.py +11 -3
  4. validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
  5. validmind/tests/data_validation/ADF.py +3 -1
  6. validmind/tests/data_validation/AutoAR.py +3 -1
  7. validmind/tests/data_validation/AutoMA.py +5 -1
  8. validmind/tests/data_validation/AutoStationarity.py +5 -1
  9. validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
  10. validmind/tests/data_validation/BoxPierce.py +4 -1
  11. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  12. validmind/tests/data_validation/ClassImbalance.py +1 -1
  13. validmind/tests/data_validation/DatasetDescription.py +4 -1
  14. validmind/tests/data_validation/DatasetSplit.py +3 -2
  15. validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
  16. validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
  17. validmind/tests/data_validation/Duplicates.py +3 -1
  18. validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
  19. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  20. validmind/tests/data_validation/HighCardinality.py +3 -1
  21. validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
  22. validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
  23. validmind/tests/data_validation/IQROutliersTable.py +6 -1
  24. validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
  25. validmind/tests/data_validation/JarqueBera.py +3 -1
  26. validmind/tests/data_validation/KPSS.py +3 -1
  27. validmind/tests/data_validation/LJungBox.py +3 -1
  28. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
  29. validmind/tests/data_validation/MissingValues.py +5 -1
  30. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  31. validmind/tests/data_validation/MutualInformation.py +4 -1
  32. validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
  33. validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
  34. validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
  35. validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
  36. validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
  37. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
  38. validmind/tests/data_validation/RollingStatsPlot.py +5 -1
  39. validmind/tests/data_validation/RunsTest.py +1 -1
  40. validmind/tests/data_validation/ScatterPlot.py +2 -1
  41. validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
  42. validmind/tests/data_validation/SeasonalDecompose.py +6 -1
  43. validmind/tests/data_validation/ShapiroWilk.py +4 -1
  44. validmind/tests/data_validation/Skewness.py +3 -1
  45. validmind/tests/data_validation/SpreadPlot.py +3 -1
  46. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
  47. validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
  48. validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
  49. validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
  50. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  51. validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
  52. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
  53. validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
  54. validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
  55. validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
  56. validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
  57. validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
  58. validmind/tests/data_validation/TooManyZeroValues.py +6 -1
  59. validmind/tests/data_validation/UniqueRows.py +5 -1
  60. validmind/tests/data_validation/WOEBinPlots.py +4 -1
  61. validmind/tests/data_validation/WOEBinTable.py +5 -1
  62. validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
  63. validmind/tests/data_validation/nlp/CommonWords.py +2 -1
  64. validmind/tests/data_validation/nlp/Hashtags.py +2 -1
  65. validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
  66. validmind/tests/data_validation/nlp/Mentions.py +3 -1
  67. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
  68. validmind/tests/data_validation/nlp/Punctuations.py +2 -1
  69. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  70. validmind/tests/data_validation/nlp/StopWords.py +2 -1
  71. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  72. validmind/tests/data_validation/nlp/Toxicity.py +3 -1
  73. validmind/tests/load.py +91 -17
  74. validmind/tests/model_validation/BertScore.py +6 -3
  75. validmind/tests/model_validation/BleuScore.py +6 -1
  76. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
  77. validmind/tests/model_validation/ContextualRecall.py +6 -1
  78. validmind/tests/model_validation/FeaturesAUC.py +5 -1
  79. validmind/tests/model_validation/MeteorScore.py +6 -1
  80. validmind/tests/model_validation/ModelMetadata.py +2 -1
  81. validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
  82. validmind/tests/model_validation/RegardScore.py +7 -1
  83. validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
  84. validmind/tests/model_validation/RougeScore.py +8 -1
  85. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
  86. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
  87. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
  88. validmind/tests/model_validation/TokenDisparity.py +6 -1
  89. validmind/tests/model_validation/ToxicityScore.py +6 -1
  90. validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
  91. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
  92. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
  93. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
  94. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
  95. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
  97. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
  98. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
  105. validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
  106. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
  107. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
  108. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
  109. validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
  110. validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
  111. validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
  112. validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
  113. validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
  114. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
  115. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
  116. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
  117. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
  118. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  119. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
  120. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
  121. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
  122. validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
  123. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
  124. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
  125. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
  126. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
  127. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
  128. validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
  129. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
  130. validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
  131. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
  132. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
  133. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
  134. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
  135. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
  136. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  137. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
  138. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
  139. validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
  140. validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
  141. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
  142. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
  143. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
  144. validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
  145. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  146. validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
  147. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
  148. validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
  149. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
  150. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  151. validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
  153. validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
  154. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  155. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
  156. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
  157. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
  159. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
  160. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
  161. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  162. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
  163. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
  164. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
  165. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
  166. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
  167. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
  168. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
  169. validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
  170. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
  171. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
  172. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
  173. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
  174. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
  175. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
  176. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
  177. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
  178. validmind/tests/prompt_validation/Bias.py +5 -1
  179. validmind/tests/prompt_validation/Clarity.py +5 -1
  180. validmind/tests/prompt_validation/Conciseness.py +5 -1
  181. validmind/tests/prompt_validation/Delimitation.py +5 -1
  182. validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
  183. validmind/tests/prompt_validation/Robustness.py +5 -1
  184. validmind/tests/prompt_validation/Specificity.py +5 -1
  185. validmind/unit_metrics/classification/Accuracy.py +2 -1
  186. validmind/unit_metrics/classification/F1.py +2 -1
  187. validmind/unit_metrics/classification/Precision.py +2 -1
  188. validmind/unit_metrics/classification/ROC_AUC.py +2 -1
  189. validmind/unit_metrics/classification/Recall.py +2 -1
  190. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
  191. validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
  192. validmind/unit_metrics/regression/HuberLoss.py +2 -1
  193. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
  194. validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
  195. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
  196. validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
  197. validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
  198. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  199. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  200. validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
  201. validmind/vm_models/dataset/dataset.py +145 -38
  202. validmind/vm_models/result/result.py +14 -12
  203. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
  204. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/RECORD +207 -207
  205. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
  206. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
  207. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -2,24 +2,28 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
7
10
  from sklearn.metrics.pairwise import cosine_similarity
8
11
 
9
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
10
14
 
11
15
 
12
16
  @tags("visualization", "dimensionality_reduction", "embeddings")
13
17
  @tasks("text_qa", "text_generation", "text_summarization")
14
18
  def CosineSimilarityHeatmap(
15
- dataset,
16
- model,
19
+ dataset: VMDataset,
20
+ model: VMModel,
17
21
  title="Cosine Similarity Matrix",
18
22
  color="Cosine Similarity",
19
23
  xaxis_title="Index",
20
24
  yaxis_title="Index",
21
25
  color_scale="Blues",
22
- ):
26
+ ) -> Tuple[go.Figure, RawData]:
23
27
  """
24
28
  Generates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model.
25
29
 
@@ -2,8 +2,11 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
7
10
 
8
11
  from validmind import RawData, tags, tasks
9
12
  from validmind.vm_models import VMDataset, VMModel
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
  @tags("llm", "text_data", "embeddings", "visualization")
13
16
  @tasks("feature_extraction")
14
- def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
17
+ def DescriptiveAnalytics(
18
+ dataset: VMDataset, model: VMModel
19
+ ) -> Tuple[go.Figure, go.Figure, go.Figure, RawData]:
15
20
  """
16
21
  Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation
17
22
  histograms.
@@ -2,9 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import Union
5
+ from typing import Tuple, Union
6
6
 
7
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
8
9
  from sklearn.manifold import TSNE
9
10
 
10
11
  from validmind import RawData, tags, tasks
@@ -17,11 +18,11 @@ logger = get_logger(__name__)
17
18
  @tags("llm", "text_data", "embeddings", "visualization")
18
19
  @tasks("feature_extraction")
19
20
  def EmbeddingsVisualization2D(
20
- model: VMModel,
21
21
  dataset: VMDataset,
22
+ model: VMModel,
22
23
  cluster_column: Union[str, None] = None,
23
24
  perplexity: int = 30,
24
- ):
25
+ ) -> Tuple[go.Figure, RawData]:
25
26
  """
26
27
  Visualizes 2D representation of text embeddings generated by a model using t-SNE technique.
27
28
 
@@ -3,18 +3,23 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  from itertools import combinations
6
+ from typing import List, Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.metrics.pairwise import euclidean_distances
11
13
 
12
14
  from validmind import RawData, tags, tasks
15
+ from validmind.vm_models import VMDataset, VMModel
13
16
 
14
17
 
15
18
  @tags("visualization", "dimensionality_reduction", "embeddings")
16
19
  @tasks("text_qa", "text_generation", "text_summarization")
17
- def EuclideanDistanceComparison(dataset, models):
20
+ def EuclideanDistanceComparison(
21
+ dataset: VMDataset, models: List[VMModel]
22
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
18
23
  """
19
24
  Assesses and visualizes the dissimilarity between model embeddings using Euclidean distance, providing insights
20
25
  into model behavior and potential redundancy or diversity.
@@ -2,24 +2,28 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
7
10
  from sklearn.metrics.pairwise import euclidean_distances
8
11
 
9
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
10
14
 
11
15
 
12
16
  @tags("visualization", "dimensionality_reduction", "embeddings")
13
17
  @tasks("text_qa", "text_generation", "text_summarization")
14
18
  def EuclideanDistanceHeatmap(
15
- dataset,
16
- model,
19
+ dataset: VMDataset,
20
+ model: VMModel,
17
21
  title="Euclidean Distance Matrix",
18
22
  color="Euclidean Distance",
19
23
  xaxis_title="Index",
20
24
  yaxis_title="Index",
21
25
  color_scale="Blues",
22
- ):
26
+ ) -> Tuple[go.Figure, RawData]:
23
27
  """
24
28
  Generates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model.
25
29
 
@@ -3,19 +3,24 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import itertools
6
+ from typing import Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.decomposition import PCA
11
13
  from sklearn.preprocessing import StandardScaler
12
14
 
13
15
  from validmind import RawData, tags, tasks
16
+ from validmind.vm_models import VMDataset, VMModel
14
17
 
15
18
 
16
19
  @tags("visualization", "dimensionality_reduction", "embeddings")
17
20
  @tasks("text_qa", "text_generation", "text_summarization")
18
- def PCAComponentsPairwisePlots(dataset, model, n_components=3):
21
+ def PCAComponentsPairwisePlots(
22
+ dataset: VMDataset, model: VMModel, n_components: int = 3
23
+ ) -> Tuple[go.Figure, RawData]:
19
24
  """
20
25
  Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model
21
26
  embeddings.
@@ -3,7 +3,10 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import re
6
- from typing import Dict
6
+ from typing import Dict, Tuple
7
+
8
+ import pandas as pd
9
+ import plotly.graph_objects as go
7
10
 
8
11
  from validmind import RawData, tags, tasks
9
12
  from validmind.vm_models import VMDataset, VMModel
@@ -18,7 +21,7 @@ def StabilityAnalysisKeyword(
18
21
  model: VMModel,
19
22
  keyword_dict: Dict[str, str],
20
23
  mean_similarity_threshold: float = 0.7,
21
- ):
24
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
22
25
  """
23
26
  Evaluates robustness of embedding models to keyword swaps in the test dataset.
24
27
 
@@ -4,6 +4,10 @@
4
4
 
5
5
  import random
6
6
  import string
7
+ from typing import Tuple
8
+
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
7
11
 
8
12
  from validmind import RawData, tags, tasks
9
13
  from validmind.vm_models import VMDataset, VMModel
@@ -69,7 +73,7 @@ def StabilityAnalysisRandomNoise(
69
73
  model: VMModel,
70
74
  probability: float = 0.02,
71
75
  mean_similarity_threshold: float = 0.7,
72
- ):
76
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
73
77
  """
74
78
  Assesses the robustness of text embeddings models to random noise introduced via text perturbations.
75
79
 
@@ -3,8 +3,11 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import random
6
+ from typing import Tuple
6
7
 
7
8
  import nltk
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
8
11
  from nltk.corpus import wordnet as wn
9
12
 
10
13
  from validmind import RawData, tags, tasks
@@ -20,7 +23,7 @@ def StabilityAnalysisSynonyms(
20
23
  model: VMModel,
21
24
  probability: float = 0.02,
22
25
  mean_similarity_threshold: float = 0.7,
23
- ):
26
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
24
27
  """
25
28
  Evaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly.
26
29
 
@@ -2,6 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
7
+ import pandas as pd
8
+ import plotly.graph_objects as go
5
9
  from transformers import MarianMTModel, MarianTokenizer
6
10
 
7
11
  from validmind import RawData, tags, tasks
@@ -21,7 +25,7 @@ def StabilityAnalysisTranslation(
21
25
  source_lang: str = "en",
22
26
  target_lang: str = "fr",
23
27
  mean_similarity_threshold: float = 0.7,
24
- ):
28
+ ) -> Tuple[go.Figure, pd.DataFrame, RawData]:
25
29
  """
26
30
  Evaluates robustness of text embeddings models to noise introduced by translating the original text to another
27
31
  language and back.
@@ -3,25 +3,28 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import itertools
6
+ from typing import Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.manifold import TSNE
11
13
  from sklearn.preprocessing import StandardScaler
12
14
 
13
15
  from validmind import RawData, tags, tasks
16
+ from validmind.vm_models import VMDataset, VMModel
14
17
 
15
18
 
16
19
  @tags("visualization", "dimensionality_reduction", "embeddings")
17
20
  @tasks("text_qa", "text_generation", "text_summarization")
18
21
  def TSNEComponentsPairwisePlots(
19
- dataset,
20
- model,
21
- n_components=2,
22
- perplexity=30,
23
- title="t-SNE",
24
- ):
22
+ dataset: VMDataset,
23
+ model: VMModel,
24
+ n_components: int = 2,
25
+ perplexity: int = 30,
26
+ title: str = "t-SNE",
27
+ ) -> Tuple[go.Figure, RawData]:
25
28
  """
26
29
  Creates scatter plots for pairwise combinations of t-SNE components to visualize embeddings and highlight potential
27
30
  clustering structures.
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def AnswerCorrectness(
33
- dataset,
34
- user_input_column="user_input",
35
- response_column="response",
36
- reference_column="reference",
36
+ dataset: VMDataset,
37
+ user_input_column: str = "user_input",
38
+ response_column: str = "response",
39
+ reference_column: str = "reference",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ):
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Evaluates the correctness of answers in a dataset with respect to the provided ground
42
45
  truths and visualizes the results in a histogram.
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, List, Optional, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -39,21 +42,21 @@ LOWER_IS_BETTER_ASPECTS = ["harmfulness", "maliciousness"]
39
42
  @tags("ragas", "llm", "qualitative")
40
43
  @tasks("text_summarization", "text_generation", "text_qa")
41
44
  def AspectCritic(
42
- dataset,
43
- user_input_column="user_input",
44
- response_column="response",
45
- retrieved_contexts_column=None,
46
- aspects: list = [
45
+ dataset: VMDataset,
46
+ user_input_column: str = "user_input",
47
+ response_column: str = "response",
48
+ retrieved_contexts_column: Optional[str] = None,
49
+ aspects: List[str] = [
47
50
  "coherence",
48
51
  "conciseness",
49
52
  "correctness",
50
53
  "harmfulness",
51
54
  "maliciousness",
52
55
  ],
53
- additional_aspects: list = None,
56
+ additional_aspects: Optional[List[Tuple[str, str]]] = None,
54
57
  judge_llm=None,
55
58
  judge_embeddings=None,
56
- ):
59
+ ) -> Tuple[Dict[str, list], go.Figure, RawData]:
57
60
  """
58
61
  Evaluates generations against the following aspects: harmfulness, maliciousness,
59
62
  coherence, correctness, and conciseness.
@@ -146,8 +149,8 @@ def AspectCritic(
146
149
 
147
150
  if retrieved_contexts_column:
148
151
  required_columns["retrieved_contexts"] = retrieved_contexts_column
149
-
150
152
  df = get_renamed_columns(dataset._df, required_columns)
153
+ df = df[required_columns.keys()]
151
154
 
152
155
  custom_aspects = (
153
156
  [
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,12 +33,12 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def ContextEntityRecall(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  retrieved_contexts_column: str = "retrieved_contexts",
35
38
  reference_column: str = "reference",
36
39
  judge_llm=None,
37
40
  judge_embeddings=None,
38
- ):
41
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
39
42
  """
40
43
  Evaluates the context entity recall for dataset entries and visualizes the results.
41
44
 
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization", "text_classification")
32
35
  def ContextPrecision(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  user_input_column: str = "user_input",
35
38
  retrieved_contexts_column: str = "retrieved_contexts",
36
39
  reference_column: str = "reference",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ): # noqa: B950
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Context Precision is a metric that evaluates whether all of the ground-truth
42
45
  relevant items present in the contexts are ranked higher or not. Ideally all the
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization", "text_classification")
32
35
  def ContextPrecisionWithoutReference(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  user_input_column: str = "user_input",
35
38
  retrieved_contexts_column: str = "retrieved_contexts",
36
39
  response_column: str = "response",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ): # noqa: B950
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Context Precision Without Reference is a metric used to evaluate the relevance of
42
45
  retrieved contexts compared to the expected response for a given user input. This
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization", "text_classification")
32
35
  def ContextRecall(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  user_input_column: str = "user_input",
35
38
  retrieved_contexts_column: str = "retrieved_contexts",
36
39
  reference_column: str = "reference",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ):
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Context recall measures the extent to which the retrieved context aligns with the
42
45
  annotated answer, treated as the ground truth. It is computed based on the `ground
@@ -109,6 +112,7 @@ def ContextRecall(
109
112
  }
110
113
 
111
114
  df = get_renamed_columns(dataset._df, required_columns)
115
+ df = df[required_columns.keys()]
112
116
 
113
117
  result_df = evaluate(
114
118
  Dataset.from_pandas(df),
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "rag_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def Faithfulness(
33
- dataset,
34
- user_input_column="user_input",
35
- response_column="response",
36
- retrieved_contexts_column="retrieved_contexts",
36
+ dataset: VMDataset,
37
+ user_input_column: str = "user_input",
38
+ response_column: str = "response",
39
+ retrieved_contexts_column: str = "retrieved_contexts",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ): # noqa
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Evaluates the faithfulness of the generated answers with respect to retrieved contexts.
42
45
 
@@ -115,6 +118,7 @@ def Faithfulness(
115
118
 
116
119
  df = get_renamed_columns(dataset._df, required_columns)
117
120
 
121
+ df = df[required_columns.keys()]
118
122
  result_df = evaluate(
119
123
  Dataset.from_pandas(df),
120
124
  metrics=[faithfulness()],
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -32,15 +35,15 @@ VALID_FOCUS_VALUES = ["relevant", "irrelevant"]
32
35
  @tags("ragas", "llm", "rag_performance")
33
36
  @tasks("text_qa", "text_generation", "text_summarization")
34
37
  def NoiseSensitivity(
35
- dataset,
36
- response_column="response",
37
- retrieved_contexts_column="retrieved_contexts",
38
- reference_column="reference",
39
- focus="relevant",
40
- user_input_column="user_input",
38
+ dataset: VMDataset,
39
+ response_column: str = "response",
40
+ retrieved_contexts_column: str = "retrieved_contexts",
41
+ reference_column: str = "reference",
42
+ focus: str = "relevant",
43
+ user_input_column: str = "user_input",
41
44
  judge_llm=None,
42
45
  judge_embeddings=None,
43
- ):
46
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
44
47
  """
45
48
  Assesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it
46
49
  generates incorrect responses.
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "rag_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def ResponseRelevancy(
33
- dataset,
34
- user_input_column="user_input",
35
- retrieved_contexts_column=None,
36
- response_column="response",
36
+ dataset: VMDataset,
37
+ user_input_column: str = "user_input",
38
+ retrieved_contexts_column: str = None,
39
+ response_column: str = "response",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ):
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Assesses how pertinent the generated answer is to the given prompt.
42
45
 
@@ -124,6 +127,7 @@ def ResponseRelevancy(
124
127
  required_columns["retrieved_contexts"] = retrieved_contexts_column
125
128
 
126
129
  df = get_renamed_columns(dataset._df, required_columns)
130
+ df = df[required_columns.keys()]
127
131
 
128
132
  metrics = [response_relevancy()]
129
133
 
@@ -134,7 +138,6 @@ def ResponseRelevancy(
134
138
  ).to_pandas()
135
139
 
136
140
  score_column = "answer_relevancy"
137
-
138
141
  fig_histogram = px.histogram(
139
142
  x=result_df[score_column].to_list(), nbins=10, title="Response Relevancy"
140
143
  )