validmind 2.5.8__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -20,37 +20,44 @@ class Hashtags(ThresholdTest):
20
20
  """
21
21
  Assesses hashtag frequency in a text column, highlighting usage trends and potential dataset bias or spam.
22
22
 
23
- **Purpose**: The Hashtags test is designed to measure the frequency of hashtags used within a given text column in
24
- a dataset. It is particularly useful for natural language processing tasks such as text classification and text
25
- summarization. The goal is to identify common trends and patterns in the use of hashtags, which can serve as
26
- critical indicators or features within a machine learning model.
23
+ ### Purpose
27
24
 
28
- **Test Mechanism**: The test implements a regular expression (regex) to extract all hashtags from the specified
29
- text column. For each hashtag found, it makes a tally of its occurrences. It then outputs a list of the top N
30
- hashtags (default is 25, but customizable), sorted by their counts in descending order. The results are also
31
- visualized in a bar plot, with frequency counts on the y-axis and the corresponding hashtags on the x-axis.
25
+ The Hashtags test is designed to measure the frequency of hashtags used within a given text column in a dataset. It
26
+ is particularly useful for natural language processing tasks such as text classification and text summarization.
27
+ The goal is to identify common trends and patterns in the use of hashtags, which can serve as critical indicators
28
+ or features within a machine learning model.
29
+
30
+ ### Test Mechanism
31
+
32
+ The test implements a regular expression (regex) to extract all hashtags from the specified text column. For each
33
+ hashtag found, it makes a tally of its occurrences. It then outputs a list of the top N hashtags (default is 25,
34
+ but customizable), sorted by their counts in descending order. The results are also visualized in a bar plot, with
35
+ frequency counts on the y-axis and the corresponding hashtags on the x-axis.
36
+
37
+ ### Signs of High Risk
32
38
 
33
- **Signs of High Risk**:
34
39
  - A low diversity in the usage of hashtags, as indicated by a few hashtags being used disproportionately more than
35
40
  others.
36
41
  - Repeated usage of one or few hashtags can be indicative of spam or a biased dataset.
37
42
  - If there are no or extremely few hashtags found in the dataset, it perhaps signifies that the text data does not
38
43
  contain structured social media data.
39
44
 
40
- **Strengths**:
41
- - It provides a concise visual representation of the frequency of hashtags, which can be critical for understanding
45
+ ### Strengths
46
+
47
+ - Provides a concise visual representation of the frequency of hashtags, which can be critical for understanding
42
48
  trends about a particular topic in text data.
43
- - It is instrumental in tasks specifically related to social media text analytics, such as opinion analysis and
44
- trend discovery.
45
- - The test is adaptable, allowing the flexibility to determine the number of top hashtags to be analyzed.
49
+ - Instrumental in tasks specifically related to social media text analytics, such as opinion analysis and trend
50
+ discovery.
51
+ - Adaptable, allowing the flexibility to determine the number of top hashtags to be analyzed.
52
+
53
+ ### Limitations
46
54
 
47
- **Limitations**:
48
- - The test assumes the presence of hashtags and therefore may not be applicable for text datasets that do not
49
- contain hashtags (e.g., formal documents, scientific literature).
55
+ - Assumes the presence of hashtags and therefore may not be applicable for text datasets that do not contain
56
+ hashtags (e.g., formal documents, scientific literature).
50
57
  - Language-specific limitations of hashtag formulations are not taken into account.
51
- - It does not account for typographical errors, variations, or synonyms in hashtags.
52
- - This test does not provide context or sentiment associated with the hashtags, so the information provided may
53
- have limited utility on its own.
58
+ - Does not account for typographical errors, variations, or synonyms in hashtags.
59
+ - Does not provide context or sentiment associated with the hashtags, so the information provided may have limited
60
+ utility on its own.
54
61
  """
55
62
 
56
63
  name = "hashtags"
@@ -17,24 +17,43 @@ from validmind import tags, tasks
17
17
  @tasks("text_classification", "text_summarization")
18
18
  def LanguageDetection(dataset):
19
19
  """
20
- Detects the language of each text entry in a dataset and visualizes the distribution of languages
21
- as a histogram.
20
+ Assesses the diversity of languages in a textual dataset by detecting and visualizing the distribution of languages.
22
21
 
23
- This method checks for a specified text column in the dataset's dataframe, uses a language detection
24
- library to determine the language of each text entry, and returns a histogram plot of the language
25
- distribution.
22
+ ### Purpose
26
23
 
27
- Args:
28
- dataset (Dataset): A dataset object which must have a `df` attribute (a pandas DataFrame)
29
- and a `text_column` attribute indicating the name of the column containing text. If the
30
- `text_column` attribute is not set, a ValueError is raised.
24
+ The Language Detection test aims to identify and visualize the distribution of languages present within a textual
25
+ dataset. This test helps in understanding the diversity of languages in the data, which is crucial for developing
26
+ and validating multilingual models.
31
27
 
32
- Returns:
33
- plotly.graph_objs._figure.Figure: A Plotly histogram plot showing the distribution of detected
34
- languages across the dataset's text entries.
28
+ ### Test Mechanism
35
29
 
36
- Raises:
37
- ValueError: If the `text_column` is not specified in the dataset object.
30
+ This test operates by:
31
+
32
+ - Checking if the dataset has a specified text column.
33
+ - Using a language detection library to determine the language of each text entry in the dataset.
34
+ - Generating a histogram plot of the language distribution, with language codes on the x-axis and their frequencies
35
+ on the y-axis.
36
+
37
+ If the text column is not specified, a ValueError is raised to ensure proper dataset configuration.
38
+
39
+ ### Signs of High Risk
40
+
41
+ - A high proportion of entries returning "Unknown" language codes.
42
+ - Detection of unexpectedly diverse or incorrect language codes, indicating potential data quality issues.
43
+ - Significant imbalance in language distribution, which might indicate potential biases in the dataset.
44
+
45
+ ### Strengths
46
+
47
+ - Provides a visual representation of language diversity within the dataset.
48
+ - Helps identify data quality issues related to incorrect or unknown language detection.
49
+ - Useful for ensuring that multilingual models have adequate and appropriate representation from various languages.
50
+
51
+ ### Limitations
52
+
53
+ - Dependency on the accuracy of the language detection library, which may not be perfect.
54
+ - Languages with similar structures or limited text length may be incorrectly classified.
55
+ - The test returns "Unknown" for entries where language detection fails, which might mask underlying issues with
56
+ certain languages or text formats.
38
57
  """
39
58
  # check text column
40
59
  if not dataset.text_column:
@@ -20,31 +20,37 @@ class Mentions(ThresholdTest):
20
20
  """
21
21
  Calculates and visualizes frequencies of '@' prefixed mentions in a text-based dataset for NLP model analysis.
22
22
 
23
- **Purpose**: This test, termed "Mentions", is designed to gauge the quality of data in a Natural Language
24
- Processing (NLP) or text-focused Machine Learning model. The primary objective is to identify and calculate the
25
- frequency of 'mentions' within a chosen text column of a dataset. A 'mention' in this context refers to individual
26
- text elements that are prefixed by '@'. The output of this test reveals the most frequently mentioned entities or
27
- usernames, which can be integral for applications such as social media analyses, customer sentiment analyses, and
28
- so on.
29
-
30
- **Test Mechanism**: The test first verifies the existence of a text column in the provided dataset. It then employs
31
- a regular expression pattern to extract mentions from the text. Subsequently, the frequency of each unique mention
32
- is calculated. The test selects the most frequent mentions based on default or user-defined parameters, the default
23
+ ### Purpose
24
+
25
+ The "Mentions" test is designed to gauge the quality of data in a Natural Language Processing (NLP) or text-focused
26
+ Machine Learning model. The primary objective is to identify and calculate the frequency of 'mentions' within a
27
+ chosen text column of a dataset. A 'mention' in this context refers to individual text elements that are prefixed
28
+ by '@'. The output of this test reveals the most frequently mentioned entities or usernames, which can be integral
29
+ for applications such as social media analyses or customer sentiment analyses.
30
+
31
+ ### Test Mechanism
32
+
33
+ The test first verifies the existence of a text column in the provided dataset. It then employs a regular
34
+ expression pattern to extract mentions from the text. Subsequently, the frequency of each unique mention is
35
+ calculated. The test selects the most frequent mentions based on default or user-defined parameters, the default
33
36
  being the top 25, for representation. This process of thresholding forms the core of the test. A treemap plot
34
37
  visualizes the test results, where the size of each rectangle corresponds to the frequency of a particular mention.
35
38
 
36
- **Signs of High Risk**:
39
+ ### Signs of High Risk
40
+
37
41
  - The lack of a valid text column in the dataset, which would result in the failure of the test execution.
38
42
  - The absence of any mentions within the text data, indicating that there might not be any text associated with
39
- '@'. This situation could point towards sparse or poor-quality data, thereby hampering the model's generalization
40
- or learning capabilities.
43
+ '@'. This situation could point toward sparse or poor-quality data, thereby hampering the model's generalization or
44
+ learning capabilities.
45
+
46
+ ### Strengths
41
47
 
42
- **Strengths**:
43
48
  - The test is specifically optimized for text-based datasets which gives it distinct power in the context of NLP.
44
49
  - It enables quick identification and visually appealing representation of the predominant elements or mentions.
45
50
  - It can provide crucial insights about the most frequently mentioned entities or usernames.
46
51
 
47
- **Limitations**:
52
+ ### Limitations
53
+
48
54
  - The test only recognizes mentions that are prefixed by '@', hence useful textual aspects not preceded by '@'
49
55
  might be ignored.
50
56
  - This test isn't suited for datasets devoid of textual data.
@@ -14,18 +14,41 @@ from validmind import tags, tasks
14
14
  @tasks("nlp")
15
15
  def PolarityAndSubjectivity(dataset):
16
16
  """
17
- Analyzes the polarity and subjectivity of text data within a dataset.
17
+ Analyzes the polarity and subjectivity of text data within a given dataset to visualize the sentiment distribution.
18
18
 
19
- This method processes a dataset containing textual data to compute the polarity and
20
- subjectivity scores using TextBlob, and returns a Plotly scatter plot visualizing
21
- these scores.
19
+ ### Purpose
22
20
 
23
- Args:
24
- dataset (Dataset): A dataset object which must have a `df` attribute (a pandas DataFrame)
25
- and a `text_column` attribute indicating the name of the column containing text.
21
+ The Polarity and Subjectivity test is designed to evaluate the sentiment expressed in textual data. By analyzing
22
+ these aspects, it helps to identify the emotional tone and subjectivity of the dataset, which could be crucial in
23
+ understanding customer feedback, social media sentiments, or other text-related data.
26
24
 
27
- Returns:
28
- plotly.graph_objs._figure.Figure: A Plotly scatter plot of polarity vs subjectivity.
25
+ ### Test Mechanism
26
+
27
+ This test uses TextBlob to compute the polarity and subjectivity scores of textual data in a given dataset. The
28
+ mechanism includes:
29
+
30
+ - Iterating through each text entry in the specified column of the dataset.
31
+ - Applying the TextBlob library to compute the polarity (ranging from -1 for negative sentiment to +1 for positive
32
+ sentiment) and subjectivity (ranging from 0 for objective to 1 for subjective) for each entry.
33
+ - Creating a scatter plot using Plotly to visualize the relationship between polarity and subjectivity.
34
+
35
+ ### Signs of High Risk
36
+
37
+ - High concentration of negative polarity values indicating prevalent negative sentiments.
38
+ - High subjectivity scores suggesting the text data is largely opinion-based rather than factual.
39
+ - Disproportionate clusters of extreme scores (e.g., many points near -1 or +1 polarity).
40
+
41
+ ### Strengths
42
+
43
+ - Quantifies sentiment and subjectivity which can provide actionable insights.
44
+ - Visualizes sentiment distribution, aiding in easy interpretation.
45
+ - Utilizes well-established TextBlob library for sentiment analysis.
46
+
47
+ ### Limitations
48
+
49
+ - Polarity and subjectivity calculations may oversimplify nuanced text sentiments.
50
+ - Reliance on TextBlob which may not be accurate for all domains or contexts.
51
+ - Visualization could become cluttered with very large datasets, making interpretation difficult.
29
52
  """
30
53
 
31
54
  # Function to calculate sentiment and subjectivity
@@ -20,34 +20,38 @@ class Punctuations(Metric):
20
20
  """
21
21
  Analyzes and visualizes the frequency distribution of punctuation usage in a given text dataset.
22
22
 
23
- **1. Purpose:** The Punctuations Metric's primary purpose is to analyze the frequency of punctuation usage within a
24
- given text dataset. This is often used in Natural Language Processing tasks, such as text classification and text
23
+ ### Purpose
24
+
25
+ The Punctuations Metric's primary purpose is to analyze the frequency of punctuation usage within a given text
26
+ dataset. This is often used in Natural Language Processing tasks, such as text classification and text
25
27
  summarization.
26
28
 
27
- **2. Test Mechanism:** The test begins by verifying that the input "dataset" is of the type VMDataset. Following
28
- that, a corpus is created from the dataset by splitting its text on spaces. Each unique punctuation character in
29
- the text corpus is then tallied. Then, the frequency distribution of each punctuation symbol is visualized as a bar
30
- graph, with these results being stored as Figures and associated with the main Punctuations object.
29
+ ### Test Mechanism
30
+
31
+ The test begins by verifying that the input "dataset" is of the type VMDataset. Following that, a corpus is created
32
+ from the dataset by splitting its text on spaces. Each unique punctuation character in the text corpus is then
33
+ tallied. The frequency distribution of each punctuation symbol is visualized as a bar graph, with these results
34
+ being stored as Figures and associated with the main Punctuations object.
31
35
 
32
- **3. Signs of High Risk:**
36
+ ### Signs of High Risk
33
37
 
34
- - High risk can be indicated by the excessive or unusual frequency of specific punctuation marks, potentially
35
- denoting dubious quality, data corruption, or skewed data.
38
+ - Excessive or unusual frequency of specific punctuation marks, potentially denoting dubious quality, data
39
+ corruption, or skewed data.
36
40
 
37
- **4. Strengths:**
41
+ ### Strengths
38
42
 
39
- - The Punctuations Metric provides valuable insights into the distribution of punctuation usage in a text dataset.
40
- - This insight can be important in validating the quality, consistency, and nature of the data.
41
- - It can provide hints about the style or tonality of the text corpus. For example, frequent usage of exclamation
42
- marks may suggest a more informal and emotional context.
43
+ - Provides valuable insights into the distribution of punctuation usage in a text dataset.
44
+ - Important in validating the quality, consistency, and nature of the data.
45
+ - Can provide hints about the style or tonality of the text corpus, such as informal and emotional context
46
+ indicated by frequent exclamation marks.
43
47
 
44
- **5. Limitations:**
48
+ ### Limitations
45
49
 
46
- - The metric focuses solely on punctuation usage and can miss other important textual characteristics.
47
- - It's important not to make general cultural or tonality assumptions based solely on punctuation distribution,
48
- since these can vary greatly across different languages and contexts.
49
- - The metric may be less effective with languages that use non-standard or different punctuation.
50
- - The visualization may lack interpretability when there are many unique punctuation marks in the dataset.
50
+ - Focuses solely on punctuation usage, potentially missing other important textual characteristics.
51
+ - General cultural or tonality assumptions based on punctuation distribution can be misguiding, as these vary
52
+ across different languages and contexts.
53
+ - Less effective with languages that use non-standard or different punctuation.
54
+ - Visualization may lack interpretability when there are many unique punctuation marks in the dataset.
51
55
  """
52
56
 
53
57
  name = "punctuations"
@@ -17,16 +17,35 @@ def Sentiment(dataset):
17
17
  """
18
18
  Analyzes the sentiment of text data within a dataset using the VADER sentiment analysis tool.
19
19
 
20
- This method initializes the VADER SentimentIntensityAnalyzer and applies it to each text entry
21
- in the specified column of the dataset's dataframe. It returns a KDE plot visualizing the distribution
22
- of sentiment scores across the dataset.
20
+ ### Purpose
23
21
 
24
- Args:
25
- dataset (Dataset): A dataset object which must have a `df` attribute (a pandas DataFrame)
26
- and a `text_column` attribute indicating the name of the column containing text.
22
+ The Sentiment test evaluates the overall sentiment of text data within a dataset. By analyzing sentiment scores, it
23
+ aims to ensure that the model is interpreting text data accurately and is not biased towards a particular sentiment.
27
24
 
28
- Returns:
29
- matplotlib.figure.Figure: A KDE plot visualizing the distribution of sentiment scores.
25
+ ### Test Mechanism
26
+
27
+ This test uses the VADER (Valence Aware Dictionary and sEntiment Reasoner) SentimentIntensityAnalyzer. It processes
28
+ each text entry in a specified column of the dataset to calculate the compound sentiment score, which represents
29
+ the overall sentiment polarity. The distribution of these sentiment scores is then visualized using a KDE (Kernel
30
+ Density Estimation) plot, highlighting any skewness or concentration in sentiment.
31
+
32
+ ### Signs of High Risk
33
+
34
+ - Extreme polarity in sentiment scores, indicating potential bias.
35
+ - Unusual concentration of sentiment scores in a specific range.
36
+ - Significant deviation from expected sentiment distribution for the given text data.
37
+
38
+ ### Strengths
39
+
40
+ - Provides a clear visual representation of sentiment distribution.
41
+ - Uses a well-established sentiment analysis tool (VADER).
42
+ - Can handle a wide range of text data, making it flexible for various applications.
43
+
44
+ ### Limitations
45
+
46
+ - May not capture nuanced or context-specific sentiments.
47
+ - Relies heavily on the accuracy of the VADER sentiment analysis tool.
48
+ - Visualization alone may not provide comprehensive insights into underlying causes of sentiment distribution.
30
49
  """
31
50
  nltk.download("vader_lexicon", quiet=True)
32
51
  # Initialize VADER
@@ -30,40 +30,47 @@ class StopWords(ThresholdTest):
30
30
  """
31
31
  Evaluates and visualizes the frequency of English stop words in a text dataset against a defined threshold.
32
32
 
33
- **Purpose**: The StopWords threshold test is a tool designed for assessing the quality of text data in an ML model.
34
- It focuses on the identification and analysis of "stop words" in a given dataset. Stop words are frequent, common,
35
- yet semantically insignificant words (for example: "the", "and", "is") in a language. This test evaluates the
33
+ ### Purpose
34
+
35
+ The StopWords threshold test is a tool designed for assessing the quality of text data in an ML model. It focuses
36
+ on the identification and analysis of "stop words" in a given dataset. Stop words are frequent, common, yet
37
+ semantically insignificant words (for example: "the", "and", "is") in a language. This test evaluates the
36
38
  proportion of stop words to the total word count in the dataset, in essence, scrutinizing the frequency of stop
37
39
  word usage. The core objective is to highlight the prevalent stop words based on their usage frequency, which can
38
40
  be instrumental in cleaning the data from noise and improving ML model performance.
39
41
 
40
- **Test Mechanism**: The StopWords test initiates on receiving an input of a 'VMDataset' object. Absence of such an
41
- object will trigger an error. The methodology involves inspection of the text column of the VMDataset to create a
42
- 'corpus' (a collection of written texts). Leveraging the Natural Language Toolkit's (NLTK) stop word repository,
43
- the test screens the corpus for any stop words and documents their frequency. It further calculates the percentage
44
- usage of each stop word compared to the total word count in the corpus. This percentage is evaluated against a
45
- predefined 'min_percent_threshold'. If this threshold is breached, the test returns a failed output. Top prevailing
46
- stop words along with their usage percentages are returned, facilitated by a bar chart visualization of these stop
47
- words and their frequency.
42
+ ### Test Mechanism
43
+
44
+ The StopWords test initiates on receiving an input of a 'VMDataset' object. Absence of such an object will trigger
45
+ an error. The methodology involves inspection of the text column of the VMDataset to create a 'corpus' (a
46
+ collection of written texts). Leveraging the Natural Language Toolkit's (NLTK) stop word repository, the test
47
+ screens the corpus for any stop words and documents their frequency. It further calculates the percentage usage of
48
+ each stop word compared to the total word count in the corpus. This percentage is evaluated against a predefined
49
+ 'min_percent_threshold'. If this threshold is breached, the test returns a failed output. Top prevailing stop words
50
+ along with their usage percentages are returned, facilitated by a bar chart visualization of these stop words and
51
+ their frequency.
52
+
53
+ ### Signs of High Risk
48
54
 
49
- **Signs of High Risk**:
50
55
  - A percentage of any stop words exceeding the predefined 'min_percent_threshold'.
51
56
  - High frequency of stop words in the dataset which may adversely affect the application's analytical performance
52
57
  due to noise creation.
53
58
 
54
- **Strengths**:
59
+ ### Strengths
60
+
55
61
  - The ability to scrutinize and quantify the usage of stop words.
56
- - Provides insights into potential noise in the text data due to stop words. This can directly aid in enhancing
57
- model training efficiency.
58
- - The test includes a bar chart visualization feature to easily interpret and action upon the stop words frequency
62
+ - Provides insights into potential noise in the text data due to stop words.
63
+ - Directly aids in enhancing model training efficiency.
64
+ - Includes a bar chart visualization feature to easily interpret and action upon the stop words frequency
59
65
  information.
60
66
 
61
- **Limitations**:
67
+ ### Limitations
68
+
62
69
  - The test only supports English stop words, making it less effective with datasets of other languages.
63
70
  - The 'min_percent_threshold' parameter may require fine-tuning for different datasets, impacting the overall
64
71
  effectiveness of the test.
65
- - Contextual use of the stop words within the dataset is not considered which may lead to overlooking their
66
- significance in certain contexts.
72
+ - Contextual use of the stop words within the dataset is not considered, potentially overlooking their significance
73
+ in certain contexts.
67
74
  - The test focuses specifically on the frequency of stop words, not providing direct measures of model performance
68
75
  or predictive accuracy.
69
76
  """
@@ -17,46 +17,47 @@ from ....vm_models import Figure, Metric, VMDataset
17
17
  @dataclass
18
18
  class TextDescription(Metric):
19
19
  """
20
- Performs comprehensive textual analysis on a dataset using NLTK, evaluating various parameters and generating
20
+ Conducts comprehensive textual analysis on a dataset using NLTK to evaluate various parameters and generate
21
21
  visualizations.
22
22
 
23
- **Purpose**: This test uses the TextDescription metric to conduct a comprehensive textual analysis of a given
24
- dataset. Various parameters such as total words, total sentences, average sentence length, total paragraphs, total
25
- unique words, most common words, total punctuations, and lexical diversity are evaluated. This metric aids in
26
- comprehending the nature of the text and evaluating the potential challenges that machine learning algorithms
27
- deployed for textual analysis, language processing, or summarization might face.
28
-
29
- **Test Mechanism**: The test works by parsing the given dataset and utilizes the NLTK (Natural Language Toolkit)
30
- library for tokenizing the text into words, sentences, and paragraphs. Subsequently, it processes the text further
31
- by eliminating stopwords declared in 'unwanted_tokens' and punctuations. Next, it determines parameters like the
32
- total count of words, sentences, paragraphs, punctuations alongside the average sentence length and lexical
33
- diversity. Lastly, the result from these calculations is condensed and scatter plots for certain variable
34
- combinations (e.g. Total Words vs Total Sentences, Total Words vs Total Unique Words) are produced, providing a
35
- visual representation of the text's structure.
36
-
37
- **Signs of High Risk**:
38
- - Anomalies or an increase in complexity within the lexical diversity results.
23
+ ### Purpose
24
+
25
+ The TextDescription test aims to conduct a thorough textual analysis of a dataset using the NLTK (Natural Language
26
+ Toolkit) library. It evaluates various metrics such as total words, total sentences, average sentence length, total
27
+ paragraphs, total unique words, most common words, total punctuations, and lexical diversity. The goal is to
28
+ understand the nature of the text and anticipate challenges machine learning models might face in text processing,
29
+ language understanding, or summarization tasks.
30
+
31
+ ### Test Mechanism
32
+
33
+ The test works by:
34
+
35
+ - Parsing the dataset and tokenizing the text into words, sentences, and paragraphs using NLTK.
36
+ - Removing stopwords and unwanted tokens.
37
+ - Calculating parameters like total words, total sentences, average sentence length, total paragraphs, total unique
38
+ words, total punctuations, and lexical diversity.
39
+ - Generating scatter plots to visualize correlations between various metrics (e.g., Total Words vs Total Sentences).
40
+
41
+ ### Signs of High Risk
42
+
43
+ - Anomalies or increased complexity in lexical diversity.
39
44
  - Longer sentences and paragraphs.
40
45
  - High uniqueness of words.
41
- - Presence of a significant amount of unwanted tokens.
46
+ - Large number of unwanted tokens.
42
47
  - Missing or erroneous visualizations.
43
- These signs suggest potential risk in text processing ML models, indicating that the ability of the model to
44
- absorb and process text could be compromised.
45
-
46
- **Strengths**:
47
- - An essential pre-processing tool, specifically for textual analysis in machine learning model data.
48
- - Provides a comprehensive breakdown of a text dataset, which aids in understanding both structural and vocabulary
49
- complexity.
50
- - Generates visualizations of correlations between chosen variables to further comprehend the text's structure and
51
- complexity.
52
-
53
- **Limitations**:
54
- - Heavy reliance on the NLTK library, restricting its use to only the languages that NLTK supports.
55
- - Limited customization capacity as the undesirable tokens and stop words are predefined.
56
- - Lacks the ability to consider semantics or grammatical complexities, which could be crucial aspects in language
57
- processing.
58
- - Assumes that the document is well-structured (includes sentences and paragraphs); therefore, unstructured or
59
- poorly formatted text may distort the results.
48
+
49
+ ### Strengths
50
+
51
+ - Essential for pre-processing text data in machine learning models.
52
+ - Provides a comprehensive breakdown of text data, aiding in understanding its complexity.
53
+ - Generates visualizations to help comprehend text structure and complexity.
54
+
55
+ ### Limitations
56
+
57
+ - Highly dependent on the NLTK library, limiting the test to supported languages.
58
+ - Limited customization for removing undesirable tokens and stop words.
59
+ - Does not consider semantic or grammatical complexities.
60
+ - Assumes well-structured documents, which may result in inaccuracies with poorly formatted text.
60
61
  """
61
62
 
62
63
  name = "text_description"
@@ -13,18 +13,41 @@ from validmind import tags, tasks
13
13
  @tasks("nlp")
14
14
  def Toxicity(dataset):
15
15
  """
16
- Analyzes the toxicity of text data within a dataset using a pre-trained toxicity model.
16
+ Assesses the toxicity of text data within a dataset to visualize the distribution of toxicity scores.
17
17
 
18
- This method loads a toxicity evaluation model and applies it to each text entry
19
- in the specified column of the dataset's dataframe. It returns a KDE plot visualizing the distribution
20
- of toxicity scores across the dataset.
18
+ ### Purpose
21
19
 
22
- Args:
23
- dataset (Dataset): A dataset object which must have a `df` attribute (a pandas DataFrame)
24
- and a `text_column` attribute indicating the name of the column containing text.
20
+ The Toxicity test aims to evaluate the level of toxic content present in a text dataset by leveraging a pre-trained
21
+ toxicity model. It helps in identifying potentially harmful or offensive language that may negatively impact users
22
+ or stakeholders.
25
23
 
26
- Returns:
27
- matplotlib.figure.Figure: A KDE plot visualizing the distribution of toxicity scores.
24
+ ### Test Mechanism
25
+
26
+ This test uses a pre-trained toxicity evaluation model and applies it to each text entry in the specified column of
27
+ a dataset’s dataframe. The procedure involves:
28
+
29
+ - Loading a pre-trained toxicity model.
30
+ - Extracting the text from the specified column in the dataset.
31
+ - Computing toxicity scores for each text entry.
32
+ - Generating a KDE (Kernel Density Estimate) plot to visualize the distribution of these toxicity scores.
33
+
34
+ ### Signs of High Risk
35
+
36
+ - High concentration of high toxicity scores in the KDE plot.
37
+ - A significant proportion of text entries with toxicity scores above a predefined threshold.
38
+ - Wide distribution of toxicity scores, indicating inconsistency in content quality.
39
+
40
+ ### Strengths
41
+
42
+ - Provides a visual representation of toxicity distribution, making it easier to identify outliers.
43
+ - Uses a robust pre-trained model for toxicity evaluation.
44
+ - Can process large text datasets efficiently.
45
+
46
+ ### Limitations
47
+
48
+ - Depends on the accuracy and bias of the pre-trained toxicity model.
49
+ - Does not provide context-specific insights, which may be necessary for nuanced understanding.
50
+ - May not capture all forms of subtle or indirect toxic language.
28
51
  """
29
52
  toxicity = evaluate.load("toxicity")
30
53
  input_text = dataset.df[dataset.text_column]