validmind 2.5.6__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +113 -73
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -12,22 +12,28 @@ class TimeSeriesLinePlot(Metric):
12
12
  """
13
13
  Generates and analyses time-series data through line plots revealing trends, patterns, anomalies over time.
14
14
 
15
- **Purpose**: The TimeSeriesLinePlot metric is designed to generate and analyze time series data through the
16
- creation of line plots. This assists in the initial inspection of the data by providing a visual representation of
17
- patterns, trends, seasonality, irregularity, and anomalies that may be present in the dataset over a period of time.
15
+ ### Purpose
18
16
 
19
- **Test Mechanism**: The mechanism for this Python class involves extracting the column names from the provided
20
- dataset and subsequently generates line plots for each column using the Plotly Python library. For every column in
21
- the dataset, a time-series line plot is created where the values are plotted against the dataset's datetime index.
22
- It is important to note that indexes that are not of datetime type will result in a ValueError.
17
+ The TimeSeriesLinePlot metric is designed to generate and analyze time series data through the creation of line
18
+ plots. This assists in the initial inspection of the data by providing a visual representation of patterns, trends,
19
+ seasonality, irregularity, and anomalies that may be present in the dataset over a period of time.
20
+
21
+ ### Test Mechanism
22
+
23
+ The mechanism for this Python class involves extracting the column names from the provided dataset and subsequently
24
+ generating line plots for each column using the Plotly Python library. For every column in the dataset, a
25
+ time-series line plot is created where the values are plotted against the dataset's datetime index. It is important
26
+ to note that indexes that are not of datetime type will result in a ValueError.
27
+
28
+ ### Signs of High Risk
23
29
 
24
- **Signs of High Risk**:
25
30
  - Presence of time-series data that does not have datetime indices.
26
31
  - Provided columns do not exist in the provided dataset.
27
32
  - The detection of anomalous patterns or irregularities in the time-series plots, indicating potential high model
28
33
  instability or probable predictive error.
29
34
 
30
- **Strengths**:
35
+ ### Strengths
36
+
31
37
  - The visual representation of complex time series data, which simplifies understanding and helps in recognizing
32
38
  temporal trends, patterns, and anomalies.
33
39
  - The adaptability of the metric, which allows it to effectively work with multiple time series within the same
@@ -35,7 +41,8 @@ class TimeSeriesLinePlot(Metric):
35
41
  - Enables the identification of anomalies and irregular patterns through visual inspection, assisting in spotting
36
42
  potential data or model performance problems.
37
43
 
38
- **Limitations**:
44
+ ### Limitations
45
+
39
46
  - The effectiveness of the metric is heavily reliant on the quality and patterns of the provided time series data.
40
47
  - Exclusively a visual tool, it lacks the capability to provide quantitative measurements, making it less effective
41
48
  for comparing and ranking multiple models or when specific numerical diagnostics are needed.
@@ -23,37 +23,42 @@ class TimeSeriesMissingValues(ThresholdTest):
23
23
  """
24
24
  Validates time-series data quality by confirming the count of missing values is below a certain threshold.
25
25
 
26
- **Purpose**:
26
+ ### Purpose
27
+
27
28
  This test is designed to validate the quality of a historical time-series dataset by verifying that the number of
28
29
  missing values is below a specified threshold. As time-series models greatly depend on the continuity and
29
30
  temporality of data points, missing values could compromise the model's performance. Consequently, this test aims
30
31
  to ensure data quality and readiness for the machine learning model, safeguarding its predictive capacity.
31
32
 
32
- **Test Mechanism**:
33
- The test method commences by validating if the dataset has a datetime index, if not, an error is raised. It
33
+ ### Test Mechanism
34
+
35
+ The test method commences by validating if the dataset has a datetime index; if not, an error is raised. It
34
36
  establishes a lower limit threshold for missing values and performs a missing values check on each column of the
35
37
  dataset. An object for the test result is created stating whether the number of missing values is within the
36
38
  specified threshold. Additionally, the test calculates the percentage of missing values alongside the raw count.
37
39
 
38
- To aid in data visualization, the test generates two plots - a bar plot and a heatmap, to better illustrate the
39
- distribution and quantity of missing values per variable. The test results, a count of missing values, the
40
- percentage of missing values, and a pass/fail status are returned in a results table.
40
+ To aid in data visualization, the test generates two plots - a bar plot and a heatmap - to better illustrate the
41
+ distribution and quantity of missing values per variable. The test results, including a count of missing values,
42
+ the percentage of missing values, and a pass/fail status, are returned in a results table.
43
+
44
+ ### Signs of High Risk
41
45
 
42
- **Signs of High Risk**:
43
46
  - The number of missing values in any column of the dataset surpasses the threshold, marking a failure and a
44
47
  high-risk scenario. The reasons could range from incomplete data collection, faulty sensors to data preprocessing
45
48
  errors.
46
49
  - A continuous visual 'streak' in the heatmap may indicate a systematic error during data collection, pointing
47
50
  towards another potential risk source.
48
51
 
49
- **Strengths**:
50
- - Effectively identifies missing values which could adversely affect the model's performance.
52
+ ### Strengths
53
+
54
+ - Effectively identifies missing values which could adversely affect the model’s performance.
51
55
  - Applicable and customizable through the threshold parameter across different data sets.
52
56
  - Goes beyond raw numbers by calculating the percentage of missing values, offering a more relative understanding
53
57
  of data scarcity.
54
58
  - Includes a robust visualization mechanism for easy and fast understanding of data quality.
55
59
 
56
- **Limitations**:
60
+ ### Limitations
61
+
57
62
  - Although it identifies missing values, the test does not provide solutions to handle them.
58
63
  - The test demands that the dataset should have a datetime index, hence limiting its use only to time series
59
64
  analysis.
@@ -20,43 +20,47 @@ from validmind.vm_models import (
20
20
  @dataclass
21
21
  class TimeSeriesOutliers(ThresholdTest):
22
22
  """
23
- Identifies and visualizes outliers in time-series data using z-score method.
24
-
25
- **Purpose**: This test is designed to identify outliers in time-series data using the z-score method. It's vital
26
- for ensuring data quality before modeling, as outliers can skew predictive models and significantly impact their
27
- overall performance.
28
-
29
- **Test Mechanism**: The test processes a given dataset which must have datetime indexing, checks if a
30
- 'zscore_threshold' parameter has been supplied, and identifies columns with numeric data types. After finding
31
- numeric columns, the implementer then applies the z-score method to each numeric column, identifying outliers based
32
- on the threshold provided. Each outlier is listed together with their variable name, z-score, timestamp and
33
- relative threshold in a dictionary and converted to a DataFrame for convenient output. Additionally, it produces
34
- visual plots for each time series illustrating outliers in the context of the broader dataset. The
35
- 'zscore_threshold' parameter sets the limit beyond which a data point will be labeled as an outlier. The default
36
- threshold is set at 3, indicating that any data point that falls 3 standard deviations away from the mean will be
37
- marked as an outlier.
38
-
39
- **Signs of High Risk**:
40
- - If many or substantial outliers are present within a dataset, this may be an indicator of high risk as it
41
- suggests that the dataset contains significant anomalies.
42
- - This could potentially affect the performance of the machine learning models, if not properly addressed.
43
- - Data points with z-scores higher than the set threshold would be flagged as outliers and could be considered as
44
- high risk.
45
-
46
- **Strengths**:
23
+ Identifies and visualizes outliers in time-series data using the z-score method.
24
+
25
+ ### Purpose
26
+
27
+ This test is designed to identify outliers in time-series data using the z-score method. It's vital for ensuring
28
+ data quality before modeling, as outliers can skew predictive models and significantly impact their overall
29
+ performance.
30
+
31
+ ### Test Mechanism
32
+
33
+ The test processes a given dataset which must have datetime indexing, checks if a 'zscore_threshold' parameter has
34
+ been supplied, and identifies columns with numeric data types. After finding numeric columns, the implementer then
35
+ applies the z-score method to each numeric column, identifying outliers based on the threshold provided. Each
36
+ outlier is listed together with their variable name, z-score, timestamp, and relative threshold in a dictionary and
37
+ converted to a DataFrame for convenient output. Additionally, it produces visual plots for each time series
38
+ illustrating outliers in the context of the broader dataset. The 'zscore_threshold' parameter sets the limit beyond
39
+ which a data point will be labeled as an outlier. The default threshold is set at 3, indicating that any data point
40
+ that falls 3 standard deviations away from the mean will be marked as an outlier.
41
+
42
+ ### Signs of High Risk
43
+
44
+ - Many or substantial outliers are present within the dataset, indicating significant anomalies.
45
+ - Data points with z-scores higher than the set threshold.
46
+ - Potential impact on the performance of machine learning models if outliers are not properly addressed.
47
+
48
+ ### Strengths
49
+
47
50
  - The z-score method is a popular and robust method for identifying outliers in a dataset.
48
- - Time series maintenance is simplified through requiring a datetime index.
49
- - Outliers are identified for each numeric feature individually.
50
- - Provides an elaborate report which shows variables, date, z-score and whether the test passed or failed.
51
- - Offers visual inspection for detected outliers in the respective time-series through plots.
51
+ - Simplifies time series maintenance by requiring a datetime index.
52
+ - Identifies outliers for each numeric feature individually.
53
+ - Provides an elaborate report showing variables, dates, z-scores, and pass/fail tests.
54
+ - Offers visual inspection for detected outliers through plots.
55
+
56
+ ### Limitations
52
57
 
53
- **Limitations**:
54
- - This test only identifies outliers in numeric columns, and won't identify outliers in categorical variables.
58
+ - The test only identifies outliers in numeric columns, not in categorical variables.
55
59
  - The utility and accuracy of z-scores can be limited if the data doesn't follow a normal distribution.
56
60
  - The method relies on a subjective z-score threshold for deciding what constitutes an outlier, which might not
57
- always be suitable depending on the dataset and the use case.
61
+ always be suitable depending on the dataset and use case.
58
62
  - It does not address possible ways to handle identified outliers in the data.
59
- - The necessity for a datetime index could limit the extent of its application.
63
+ - The requirement for a datetime index could limit its application.
60
64
  """
61
65
 
62
66
  name = "time_series_outliers"
@@ -215,7 +219,7 @@ class TimeSeriesOutliers(ThresholdTest):
215
219
  )
216
220
 
217
221
  fig.update_layout(
218
- title=f"Time Series with Outliers for {col}",
222
+ title=f"Outliers for {col}",
219
223
  xaxis_title="Date",
220
224
  yaxis_title=col,
221
225
  )
@@ -23,27 +23,31 @@ class TooManyZeroValues(ThresholdTest):
23
23
  Identifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold
24
24
  percentage.
25
25
 
26
- **Purpose**:
26
+ ### Purpose
27
+
27
28
  The 'TooManyZeroValues' test is utilized to identify numerical columns in the dataset that may present a quantity
28
29
  of zero values considered excessive. The aim is to detect situations where these may implicate data sparsity or a
29
30
  lack of variation, limiting their effectiveness within a machine learning model. The definition of 'too many' is
30
31
  quantified as a percentage of total values, with a default set to 3%.
31
32
 
32
- **Test Mechanism**:
33
+ ### Test Mechanism
34
+
33
35
  This test is conducted by looping through each column in the dataset and categorizing those that pertain to
34
36
  numerical data. On identifying a numerical column, the function computes the total quantity of zero values and
35
37
  their ratio to the total row count. Should the proportion exceed a pre-set threshold parameter, set by default at
36
- 0.03 or 3%, the column is considered to have failed the test. The results for each column are summarised and
38
+ 0.03 or 3%, the column is considered to have failed the test. The results for each column are summarized and
37
39
  reported, indicating the count and percentage of zero values for each numerical column, alongside a status
38
40
  indicating whether the column has passed or failed the test.
39
41
 
40
- **Signs of High Risk**:
41
- - Indicators evidencing a high risk connected with this test would include numerical columns showing a high ratio
42
- of zero values when compared to the total count of rows (exceeding a pre-determined threshold).
42
+ ### Signs of High Risk
43
+
44
+ - Numerical columns showing a high ratio of zero values when compared to the total count of rows (exceeding the
45
+ predetermined threshold).
43
46
  - Columns characterized by zero values across the board suggest a complete lack of data variation, signifying high
44
47
  risk.
45
48
 
46
- **Strengths**:
49
+ ### Strengths
50
+
47
51
  - Assists in highlighting columns featuring an excess of zero values that could otherwise go unnoticed within a
48
52
  large dataset.
49
53
  - Provides the flexibility to alter the threshold that determines when the quantity of zero values becomes 'too
@@ -53,12 +57,13 @@ class TooManyZeroValues(ThresholdTest):
53
57
  - Targets specifically numerical data, thereby avoiding inappropriate application to non-numerical columns and
54
58
  mitigating the risk of false test failures.
55
59
 
56
- **Limitations**:
57
- - Is exclusively designed to check for zero values, and doesn’t assesses the potential impact of other values that
58
- could affect the dataset, such as extremely high or low figures, missing values or outliers.
60
+ ### Limitations
61
+
62
+ - Is exclusively designed to check for zero values and doesn’t assess the potential impact of other values that
63
+ could affect the dataset, such as extremely high or low figures, missing values, or outliers.
59
64
  - Lacks the ability to detect a repetitive pattern of zeros, which could be significant in time-series or
60
65
  longitudinal data.
61
- - Zero values can actually be meaningful in some contexts, therefore tagging them as 'too many' could potentially
66
+ - Zero values can actually be meaningful in some contexts; therefore, tagging them as 'too many' could potentially
62
67
  misinterpret the data to some extent.
63
68
  - This test does not take into consideration the context of the dataset, and fails to recognize that within certain
64
69
  columns, a high number of zero values could be quite normal and not necessarily an indicator of poor data quality.
@@ -19,31 +19,36 @@ class UniqueRows(ThresholdTest):
19
19
  """
20
20
  Verifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold.
21
21
 
22
- **Purpose**:
22
+ ### Purpose
23
+
23
24
  The UniqueRows test is designed to gauge the quality of the data supplied to the machine learning model by
24
25
  verifying that the count of distinct rows in the dataset exceeds a specific threshold, thereby ensuring a varied
25
26
  collection of data. Diversity in data is essential for training an unbiased and robust model that excels when faced
26
27
  with novel data.
27
28
 
28
- **Test Mechanism**:
29
+ ### Test Mechanism
30
+
29
31
  The testing process starts with calculating the total number of rows in the dataset. Subsequently, the count of
30
32
  unique rows is determined for each column in the dataset. If the percentage of unique rows (calculated as the ratio
31
33
  of unique rows to the overall row count) is less than the prescribed minimum percentage threshold given as a
32
- function parameter, the test is passed. The results are cached and a final pass or fail verdict is given based on
34
+ function parameter, the test passes. The results are cached and a final pass or fail verdict is given based on
33
35
  whether all columns have successfully passed the test.
34
36
 
35
- **Signs of High Risk**:
37
+ ### Signs of High Risk
38
+
36
39
  - A lack of diversity in data columns, demonstrated by a count of unique rows that falls short of the preset
37
40
  minimum percentage threshold, is indicative of high risk.
38
41
  - This lack of variety in the data signals potential issues with data quality, possibly leading to overfitting in
39
42
  the model and issues with generalization, thus posing a significant risk.
40
43
 
41
- **Strengths**:
44
+ ### Strengths
45
+
42
46
  - The UniqueRows test is efficient in evaluating the data's diversity across each information column in the dataset.
43
47
  - This test provides a quick, systematic method to assess data quality based on uniqueness, which can be pivotal in
44
48
  developing effective and unbiased machine learning models.
45
49
 
46
- **Limitations**:
50
+ ### Limitations
51
+
47
52
  - A limitation of the UniqueRows test is its assumption that the data's quality is directly proportionate to its
48
53
  uniqueness, which may not always hold true. There might be contexts where certain non-unique rows are essential and
49
54
  should not be overlooked.
@@ -20,34 +20,41 @@ class WOEBinPlots(Metric):
20
20
  Generates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power
21
21
  of categorical variables in a data set.
22
22
 
23
- **Purpose**: This test is designed to visualize the Weight of Evidence (WoE) and Information Value (IV) for
24
- categorical variables in a provided dataset. By showcasing the data distribution across different categories of
25
- each feature, it aids in understanding each variable's predictive power in the context of a classification-based
26
- machine learning model. Commonly used in credit scoring models, WoE and IV are robust statistical methods for
27
- evaluating a variable's predictive power.
28
-
29
- **Test Mechanism**: The test implementation follows defined steps. Initially, it selects non-numeric columns from
30
- the dataset and changes them to string type, paving the way for accurate binning. It then performs an automated WoE
31
- binning operation on these selected features, effectively categorizing the potential values of a variable into
32
- distinct bins. After the binning process, the function generates two separate visualizations (a scatter chart for
33
- WoE values and a bar chart for IV) for each variable. These visual presentations are formed according to the spread
34
- of each metric across various categories of each feature.
35
-
36
- **Signs of High Risk**:
23
+ ### Purpose
24
+
25
+ This test is designed to visualize the Weight of Evidence (WoE) and Information Value (IV) for categorical
26
+ variables in a provided dataset. By showcasing the data distribution across different categories of each feature,
27
+ it aids in understanding each variable's predictive power in the context of a classification-based machine learning
28
+ model. Commonly used in credit scoring models, WoE and IV are robust statistical methods for evaluating a
29
+ variable's predictive power.
30
+
31
+ ### Test Mechanism
32
+
33
+ The test implementation follows defined steps. Initially, it selects non-numeric columns from the dataset and
34
+ changes them to string type, paving the way for accurate binning. It then performs an automated WoE binning
35
+ operation on these selected features, effectively categorizing the potential values of a variable into distinct
36
+ bins. After the binning process, the function generates two separate visualizations (a scatter chart for WoE values
37
+ and a bar chart for IV) for each variable. These visual presentations are formed according to the spread of each
38
+ metric across various categories of each feature.
39
+
40
+ ### Signs of High Risk
41
+
37
42
  - Errors occurring during the binning process.
38
43
  - Challenges in converting non-numeric columns into string data type.
39
44
  - Misbalance in the distribution of WoE and IV, with certain bins overtaking others conspicuously. This could
40
45
  denote that the model is disproportionately dependent on certain variables or categories for predictions, an
41
46
  indication of potential risks to its robustness and generalizability.
42
47
 
43
- **Strengths**:
48
+ ### Strengths
49
+
44
50
  - Provides a detailed visual representation of the relationship between feature categories and the target variable.
45
51
  This grants an intuitive understanding of each feature's contribution to the model.
46
52
  - Allows for easy identification of features with high impact, facilitating feature selection and enhancing
47
53
  comprehension of the model's decision logic.
48
54
  - WoE conversions are monotonic, upholding the rank ordering of the original data points, which simplifies analysis.
49
55
 
50
- **Limitations**:
56
+ ### Limitations
57
+
51
58
  - The method is largely reliant on the binning process, and an inappropriate binning threshold or bin number choice
52
59
  might result in a misrepresentation of the variable's distribution.
53
60
  - While excellent for categorical data, the encoding of continuous variables into categorical can sometimes lead to
@@ -13,36 +13,41 @@ from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableM
13
13
  @dataclass
14
14
  class WOEBinTable(Metric):
15
15
  """
16
- Calculates and assesses the Weight of Evidence (WoE) and Information Value (IV) of each feature in a ML model.
17
-
18
- **Purpose**: The Weight of Evidence (WoE) and Information Value (IV) test is intended to evaluate the predictive
19
- power of each feature in the machine learning model. The test generates binned groups of values from each feature
20
- in a dataset, computes the WoE value and the IV value for each bin. These values provide insights on the
21
- relationship between each feature and the target variable and their contribution towards the predictive output of
22
- the model.
23
-
24
- **Test Mechanism**: The metric leverages the `scorecardpy.woebin` method to perform WoE-based automatic binning on
25
- the dataset. Depending on the parameter `breaks_adj`, the method adjusts the cut-off points for binning numeric
26
- variables. The bins are then used to calculate the WoE and IV. The metric requires a dataset with the target
27
- variable defined. The metric outputs a dataframe that comprises the bin boundaries, WoE, and IV values for each
28
- feature.
29
-
30
- **Signs of High Risk**:
31
- - High IV values, which denote variables with too much predictive power which might lead to overfitting
32
- - Errors during the binning process, which might be due to inappropriate data types or poorly defined bins
33
-
34
- **Strengths**:
35
- - The WoE and IV test is highly effective for feature selection in binary classification problems, as it quantifies
36
- how much predictive information is packed within each feature regarding the binary outcome
37
- - The WoE transformation creates a monotonic relationship between the target and independent variables
38
-
39
- **Limitations**:
40
- - Mainly designed for binary classification tasks, therefore it might not be applicable or reliable for multi-class
41
- classification or regression tasks
42
- - If the dataset has many features or the features are not binnable or they are non-numeric, this process might
43
- encounter difficulties
44
- - This metric doesn't help in identifying if the predictive factor being observed is a coincidence or a real
45
- phenomenon due to data randomness
16
+ Assesses the Weight of Evidence (WoE) and Information Value (IV) of each feature to evaluate its predictive power
17
+ in a binary classification model.
18
+
19
+ ### Purpose
20
+
21
+ The Weight of Evidence (WoE) and Information Value (IV) test is designed to evaluate the predictive power of each
22
+ feature in a machine learning model. This test generates binned groups of values from each feature, computes the
23
+ WoE and IV for each bin, and provides insights into the relationship between each feature and the target variable,
24
+ illustrating their contribution to the model's predictive capabilities.
25
+
26
+ ### Test Mechanism
27
+
28
+ The test uses the `scorecardpy.woebin` method to perform automatic binning of the dataset based on WoE. The method
29
+ adjusts the cut-off points for binning numeric variables based on the parameter `breaks_adj`. The bins are then
30
+ used to calculate the WoE and IV values, effectively creating a dataframe that includes the bin boundaries, WoE,
31
+ and IV values for each feature. A target variable is required in the dataset to perform this analysis.
32
+
33
+ ### Signs of High Risk
34
+
35
+ - High IV values, indicating variables with excessive predictive power which might lead to overfitting.
36
+ - Errors during the binning process, potentially due to inappropriate data types or poorly defined bins.
37
+
38
+ ### Strengths
39
+
40
+ - Highly effective for feature selection in binary classification problems, as it quantifies the predictive
41
+ information within each feature concerning the binary outcome.
42
+ - The WoE transformation creates a monotonic relationship between the target and independent variables.
43
+
44
+ ### Limitations
45
+
46
+ - Primarily designed for binary classification tasks, making it less applicable or reliable for multi-class
47
+ classification or regression tasks.
48
+ - Potential difficulties if the dataset has many features, non-binnable features, or non-numeric features.
49
+ - The metric does not help in distinguishing whether the observed predictive factor is due to data randomness or a
50
+ true phenomenon.
46
51
  """
47
52
 
48
53
  name = "woe_bin_table"
@@ -17,36 +17,42 @@ logger = get_logger(__name__)
17
17
  @dataclass
18
18
  class ZivotAndrewsArch(Metric):
19
19
  """
20
- Evaluates the order of integration and stationarity of time series data using Zivot-Andrews unit root test.
21
-
22
- **Purpose**: The Zivot-Andrews Arch metric is used to evaluate the order of integration for a time series data in a
23
- machine learning model. It's designed to test for stationarity, a crucial aspect in time series analysis where data
24
- points are not dependent on time. Stationarity means that the statistical properties such as mean, variance and
25
- autocorrelation are all constant over time.
26
-
27
- **Test Mechanism**: The Zivot-Andrews unit root test is performed on each feature in the dataset using the
28
- `ZivotAndrews` function from the `arch.unitroot` module. This function returns the Zivot-Andrews metric for each
29
- feature, which includes the statistical value, p-value (probability value), the number of used lags, and the number
30
- of observations. The p-value is later used to decide on the null hypothesis (the time series has a unit root and is
31
- non-stationary) based on a chosen level of significance.
32
-
33
- **Signs of High Risk**:
34
- - A high p-value can suggest high risk. This might indicate that there's insufficient evidence to reject the null
35
- hypothesis, which would mean the time series has a unit root and is therefore non-stationary.
20
+ Evaluates the order of integration and stationarity of time series data using the Zivot-Andrews unit root test.
21
+
22
+ ### Purpose
23
+
24
+ The Zivot-Andrews Arch metric is used to evaluate the order of integration for time series data in a machine
25
+ learning model. It's designed to test for stationarity, a crucial aspect of time series analysis, where data points
26
+ are independent of time. Stationarity means that the statistical properties such as mean, variance, and
27
+ autocorrelation are constant over time.
28
+
29
+ ### Test Mechanism
30
+
31
+ The Zivot-Andrews unit root test is performed on each feature in the dataset using the `ZivotAndrews` function from
32
+ the `arch.unitroot` module. This function returns several metrics for each feature, including the statistical
33
+ value, p-value (probability value), the number of lags used, and the number of observations. The p-value is used to
34
+ decide on the null hypothesis (the time series has a unit root and is non-stationary) based on a chosen level of
35
+ significance.
36
+
37
+ ### Signs of High Risk
38
+
39
+ - A high p-value suggests high risk, indicating insufficient evidence to reject the null hypothesis, implying that
40
+ the time series has a unit root and is non-stationary.
36
41
  - Non-stationary time series data can lead to misleading statistics and unreliable machine learning models.
37
42
 
38
- **Strengths**:
39
- - The Zivot-Andrews Arch metric dynamically tests for stationarity against structural breaks in time series data,
40
- offering robust evaluation of stationarity in features.
41
- - This metric is especially beneficial with financial, economic, or other time-series data where data observations
42
- lack a consistent pattern and structural breaks may occur.
43
-
44
- **Limitations**:
45
- - The Zivot-Andrews Arch metric assumes that data is derived from a single-equation, autoregressive model. It may,
46
- therefore, not be appropriate for multivariate time series data or data which does not align with the
47
- autoregressive model assumption.
48
- - It might not take into account unexpected shocks or changes in the series trend which can both have a significant
49
- impact on the stationarity of the data.
43
+ ### Strengths
44
+
45
+ - Dynamically tests for stationarity against structural breaks in time series data, offering robust evaluation of
46
+ stationarity in features.
47
+ - Especially beneficial with financial, economic, or other time-series data where data observations lack a
48
+ consistent pattern and structural breaks may occur.
49
+
50
+ ### Limitations
51
+
52
+ - Assumes data is derived from a single-equation, autoregressive model, making it less appropriate for multivariate
53
+ time series data or data not aligning with this model.
54
+ - May not account for unexpected shocks or changes in the series trend, both of which can significantly impact data
55
+ stationarity.
50
56
  """
51
57
 
52
58
  name = "zivot_andrews"
@@ -19,33 +19,40 @@ from ....vm_models import Figure, Metric, VMDataset
19
19
  @dataclass
20
20
  class CommonWords(Metric):
21
21
  """
22
- Identifies and visualizes the 40 most frequent non-stopwords in a specified text column within a dataset.
22
+ Assesses the most frequent non-stopwords in a text column for identifying prevalent language patterns.
23
23
 
24
- **Purpose**: The CommonWords metric is used to identify and visualize the most prevalent words within a specified
25
- text column of a dataset. This provides insights into the prevalent language patterns and vocabulary, especially
26
- useful in Natural Language Processing (NLP) tasks such as text classification and text summarization.
24
+ ### Purpose
27
25
 
28
- **Test Mechanism**: The test methodology involves splitting the specified text column's entries into words,
29
- collating them into a corpus, and then counting the frequency of each word using the Counter. The forty most
30
- frequently occurring non-stopwords are then visualized in a bar chart, where the x-axis represents the words, and
31
- the y-axis indicates their frequency of occurrence.
26
+ The CommonWords metric is used to identify and visualize the most prevalent words within a specified text column of
27
+ a dataset. This provides insights into the prevalent language patterns and vocabulary, especially useful in Natural
28
+ Language Processing (NLP) tasks such as text classification and text summarization.
29
+
30
+ ### Test Mechanism
31
+
32
+ The test methodology involves splitting the specified text column's entries into words, collating them into a
33
+ corpus, and then counting the frequency of each word using the Counter. The forty most frequently occurring
34
+ non-stopwords are then visualized in a bar chart, where the x-axis represents the words, and the y-axis indicates
35
+ their frequency of occurrence.
36
+
37
+ ### Signs of High Risk
32
38
 
33
- **Signs of High Risk**:
34
39
  - A lack of distinct words within the list, or the most common words being stopwords.
35
40
  - Frequent occurrence of irrelevant or inappropriate words could point out a poorly curated or noisy dataset.
36
- - An error returned due to the absence of a valid Dataset object indicates high risk as the metric cannot be
41
+ - An error returned due to the absence of a valid Dataset object, indicating high risk as the metric cannot be
37
42
  effectively implemented without it.
38
43
 
39
- **Strengths**:
44
+ ### Strengths
45
+
40
46
  - The metric provides clear insights into the language features – specifically word frequency – of unstructured
41
47
  text data.
42
48
  - It can reveal prominent vocabulary and language patterns, which prove vital for feature extraction in NLP tasks.
43
49
  - The visualization helps in quickly capturing the patterns and understanding the data intuitively.
44
50
 
45
- **Limitations**:
51
+ ### Limitations
52
+
46
53
  - The test disregards semantic or context-related information as it solely focuses on word frequency.
47
- - It intentionally ignores stopwords which might carry necessary significance in certain scenarios.
48
- - The applicability is limited to English language text data as English stopwords are used for filtering, hence
54
+ - It intentionally ignores stopwords, which might carry necessary significance in certain scenarios.
55
+ - The applicability is limited to English-language text data as English stopwords are used for filtering, hence
49
56
  cannot account for data in other languages.
50
57
  - The metric requires a valid Dataset object, indicating a dependency condition that limits its broader
51
58
  applicability.