validmind 2.9.1__py3-none-any.whl → 2.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. validmind/__init__.py +16 -5
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +24 -17
  4. validmind/ai/utils.py +2 -2
  5. validmind/api_client.py +0 -2
  6. validmind/datasets/credit_risk/lending_club.py +13 -1
  7. validmind/datasets/nlp/cnn_dailymail.py +15 -1
  8. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +14 -2
  9. validmind/tests/data_validation/DickeyFullerGLS.py +13 -2
  10. validmind/tests/data_validation/PhillipsPerronArch.py +13 -2
  11. validmind/tests/data_validation/ScoreBandDefaultRates.py +1 -1
  12. validmind/tests/data_validation/SeasonalDecompose.py +14 -2
  13. validmind/tests/data_validation/ShapiroWilk.py +14 -1
  14. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +14 -1
  15. validmind/tests/data_validation/WOEBinPlots.py +14 -1
  16. validmind/tests/data_validation/WOEBinTable.py +13 -2
  17. validmind/tests/data_validation/ZivotAndrewsArch.py +13 -2
  18. validmind/tests/data_validation/nlp/CommonWords.py +14 -2
  19. validmind/tests/data_validation/nlp/LanguageDetection.py +14 -1
  20. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +13 -1
  21. validmind/tests/data_validation/nlp/Sentiment.py +13 -1
  22. validmind/tests/data_validation/nlp/StopWords.py +14 -2
  23. validmind/tests/data_validation/nlp/TextDescription.py +14 -2
  24. validmind/tests/data_validation/nlp/Toxicity.py +13 -1
  25. validmind/tests/model_validation/BertScore.py +13 -2
  26. validmind/tests/model_validation/BleuScore.py +13 -2
  27. validmind/tests/model_validation/ContextualRecall.py +13 -1
  28. validmind/tests/model_validation/MeteorScore.py +13 -2
  29. validmind/tests/model_validation/ModelPredictionResiduals.py +14 -1
  30. validmind/tests/model_validation/RegardScore.py +13 -2
  31. validmind/tests/model_validation/RougeScore.py +14 -1
  32. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +14 -1
  33. validmind/tests/model_validation/ToxicityScore.py +13 -1
  34. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +1 -1
  35. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +1 -1
  36. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +14 -2
  37. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +1 -1
  38. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +13 -2
  39. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +1 -1
  40. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +1 -1
  41. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +14 -2
  42. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +1 -1
  43. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +1 -1
  44. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +14 -1
  45. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +15 -2
  46. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +1 -1
  47. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +15 -2
  48. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +14 -1
  49. validmind/tests/plots/BoxPlot.py +2 -2
  50. validmind/tests/plots/HistogramPlot.py +4 -4
  51. validmind/tests/stats/DescriptiveStats.py +2 -2
  52. validmind/vm_models/result/pii_filter.py +202 -0
  53. validmind/vm_models/result/result.py +34 -8
  54. validmind/vm_models/result/utils.py +0 -27
  55. validmind-2.9.3.dist-info/METADATA +848 -0
  56. {validmind-2.9.1.dist-info → validmind-2.9.3.dist-info}/RECORD +59 -58
  57. validmind-2.9.1.dist-info/METADATA +0 -137
  58. {validmind-2.9.1.dist-info → validmind-2.9.3.dist-info}/LICENSE +0 -0
  59. {validmind-2.9.1.dist-info → validmind-2.9.3.dist-info}/WHEEL +0 -0
  60. {validmind-2.9.1.dist-info → validmind-2.9.3.dist-info}/entry_points.txt +0 -0
validmind/__init__.py CHANGED
@@ -32,15 +32,21 @@ After you have pasted the code snippet into your development source code and exe
32
32
  """
33
33
  import threading
34
34
  import warnings
35
+ from importlib import metadata
35
36
 
36
- import pkg_resources
37
37
  from IPython.display import HTML, display
38
38
 
39
39
  # Ignore Numba warnings. We are not requiring this package directly
40
- from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
40
+ try:
41
+ from numba.core.errors import (
42
+ NumbaDeprecationWarning,
43
+ NumbaPendingDeprecationWarning,
44
+ )
41
45
 
42
- warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
43
- warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
46
+ warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
47
+ warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
48
+ except ImportError:
49
+ ...
44
50
 
45
51
  from .__version__ import __version__ # noqa: E402
46
52
  from .api_client import init, log_metric, log_text, reload
@@ -81,7 +87,12 @@ def check_version():
81
87
  # get the installed vs running version of validmind
82
88
  # to make sure we are using the latest installed version
83
89
  # in case user has updated the package but forgot to restart the kernel
84
- installed = pkg_resources.get_distribution("validmind").version
90
+ try:
91
+ installed = metadata.version("validmind")
92
+ except metadata.PackageNotFoundError:
93
+ # Package metadata not found, skip version check
94
+ return
95
+
85
96
  running = __version__
86
97
 
87
98
  if parse_version(installed) > parse_version(running):
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.9.1"
1
+ __version__ = "2.9.2"
@@ -14,6 +14,11 @@ from ..logging import get_logger
14
14
  from ..utils import NumpyEncoder, md_to_html, test_id_to_name
15
15
  from ..vm_models.figure import Figure
16
16
  from ..vm_models.result import ResultTable
17
+ from ..vm_models.result.pii_filter import (
18
+ PIIDetectionMode,
19
+ get_pii_detection_mode,
20
+ scan_df,
21
+ )
17
22
  from .utils import DescriptionFuture
18
23
 
19
24
  __executor = ThreadPoolExecutor()
@@ -92,6 +97,13 @@ def generate_description(
92
97
  )
93
98
 
94
99
  if tables:
100
+ if get_pii_detection_mode() in [
101
+ PIIDetectionMode.TEST_DESCRIPTIONS,
102
+ PIIDetectionMode.ALL,
103
+ ]:
104
+ for table in tables:
105
+ scan_df(table.data)
106
+
95
107
  summary = "\n---\n".join(
96
108
  [
97
109
  json.dumps(table.serialize(), cls=NumpyEncoder, separators=(",", ":"))
@@ -125,13 +137,16 @@ def background_generate_description(
125
137
  ):
126
138
  def wrapped():
127
139
  try:
128
- return generate_description(
129
- test_id=test_id,
130
- test_description=test_description,
131
- tables=tables,
132
- figures=figures,
133
- metric=metric,
134
- title=title,
140
+ return (
141
+ generate_description(
142
+ test_id=test_id,
143
+ test_description=test_description,
144
+ tables=tables,
145
+ figures=figures,
146
+ metric=metric,
147
+ title=title,
148
+ ),
149
+ True,
135
150
  )
136
151
  except Exception as e:
137
152
  if "maximum context length" in str(e):
@@ -146,7 +161,7 @@ def background_generate_description(
146
161
  logger.warning(f"Failed to generate description for {test_id}: {e}")
147
162
  logger.warning(f"Using default description for {test_id}")
148
163
 
149
- return test_description
164
+ return test_description, False
150
165
 
151
166
  return DescriptionFuture(__executor.submit(wrapped))
152
167
 
@@ -191,15 +206,7 @@ def get_result_description(
191
206
  not in ["0", "false"]
192
207
  )
193
208
 
194
- # TODO: fix circular import
195
- from validmind.ai.utils import is_configured
196
-
197
- if (
198
- should_generate
199
- and (tables or figures)
200
- and llm_descriptions_enabled
201
- and is_configured()
202
- ):
209
+ if should_generate and (tables or figures) and llm_descriptions_enabled:
203
210
  # get description future and set it as the description in the metadata
204
211
  # this will lazily retrieved so it can run in the background in parallel
205
212
  description = background_generate_description(
validmind/ai/utils.py CHANGED
@@ -35,13 +35,13 @@ class DescriptionFuture:
35
35
  self._future = future
36
36
 
37
37
  def get_description(self):
38
- if isinstance(self._future, str):
38
+ if isinstance(self._future, tuple):
39
39
  description = self._future
40
40
  else:
41
41
  # This will block until the future is completed
42
42
  description = self._future.result()
43
43
 
44
- return md_to_html(description, mathml=True)
44
+ return md_to_html(description[0], mathml=True), description[1]
45
45
 
46
46
 
47
47
  def get_client_and_model():
validmind/api_client.py CHANGED
@@ -40,8 +40,6 @@ __api_session: Optional[aiohttp.ClientSession] = None
40
40
  @atexit.register
41
41
  def _close_session():
42
42
  """Closes the async client session at exit."""
43
- global __api_session
44
-
45
43
  if __api_session and not __api_session.closed:
46
44
  try:
47
45
  loop = asyncio.get_event_loop()
@@ -9,13 +9,25 @@ from typing import Any, Dict, Optional, Tuple
9
9
 
10
10
  import numpy as np
11
11
  import pandas as pd
12
- import scorecardpy as sc
13
12
  import statsmodels.api as sm
14
13
  import xgboost as xgb
15
14
  from sklearn.ensemble import RandomForestClassifier
16
15
  from sklearn.model_selection import train_test_split
17
16
 
18
17
  import validmind as vm
18
+ from validmind.errors import MissingDependencyError
19
+
20
+ try:
21
+ import scorecardpy as sc
22
+ except ImportError as e:
23
+ if "scorecardpy" in str(e):
24
+ raise MissingDependencyError(
25
+ "Missing required package `scorecardpy` for credit risk demos. "
26
+ "Please run `pip install validmind[credit_risk]` or `pip install scorecardpy`.",
27
+ required_dependencies=["scorecardpy"],
28
+ extra="credit_risk",
29
+ ) from e
30
+ raise e
19
31
 
20
32
  current_path = os.path.dirname(os.path.abspath(__file__))
21
33
  dataset_path = os.path.join(current_path, "datasets")
@@ -7,10 +7,11 @@ import textwrap
7
7
  from typing import Optional, Tuple
8
8
 
9
9
  import pandas as pd
10
- from datasets import load_dataset
11
10
  from IPython.display import HTML, display
12
11
  from tabulate import tabulate
13
12
 
13
+ from validmind.errors import MissingDependencyError
14
+
14
15
  # Define column names
15
16
  text_column = "article"
16
17
  target_column = "highlights"
@@ -37,6 +38,19 @@ def load_data(
37
38
  Tuple containing (train_df, test_df) DataFrames with the loaded data.
38
39
  """
39
40
  if source == "online":
41
+ try:
42
+ from datasets import load_dataset
43
+ except ImportError as e:
44
+ if "datasets" in str(e):
45
+ raise MissingDependencyError(
46
+ "Missing required package `datasets` for CNN Daily Mail. "
47
+ "Please run `pip install validmind[datasets]` or "
48
+ "`pip install datasets` to use CNN Daily Mail dataset",
49
+ required_dependencies=["datasets"],
50
+ extra="datasets",
51
+ ) from e
52
+ raise e
53
+
40
54
  # Load online data without predictions
41
55
  cnn_dataset = load_dataset("cnn_dailymail", "3.0.0")
42
56
  train_df = cnn_dataset["train"].to_pandas()
@@ -4,10 +4,22 @@
4
4
 
5
5
 
6
6
  import pandas as pd
7
- from scipy.stats import chi2_contingency
8
7
 
9
8
  from validmind import tags, tasks
10
- from validmind.errors import SkipTestError
9
+ from validmind.errors import MissingDependencyError, SkipTestError
10
+
11
+ try:
12
+ from scipy.stats import chi2_contingency
13
+ except ImportError as e:
14
+ if "scipy" in str(e):
15
+ raise MissingDependencyError(
16
+ "Missing required package `scipy` for ChiSquaredFeaturesTable. "
17
+ "Please run `pip install validmind[stats]` to use statistical tests",
18
+ required_dependencies=["scipy"],
19
+ extra="stats",
20
+ ) from e
21
+
22
+ raise e
11
23
 
12
24
 
13
25
  @tags("tabular_data", "categorical_data", "statistical_test")
@@ -5,14 +5,25 @@
5
5
  from typing import Any, Dict, Tuple
6
6
 
7
7
  import pandas as pd
8
- from arch.unitroot import DFGLS
9
8
  from numpy.linalg import LinAlgError
10
9
 
11
10
  from validmind import RawData, tags, tasks
12
- from validmind.errors import SkipTestError
11
+ from validmind.errors import MissingDependencyError, SkipTestError
13
12
  from validmind.logging import get_logger
14
13
  from validmind.vm_models import VMDataset
15
14
 
15
+ try:
16
+ from arch.unitroot import DFGLS
17
+ except ImportError as e:
18
+ if "arch" in str(e):
19
+ raise MissingDependencyError(
20
+ "Missing required package `arch` for DickeyFullerGLS. "
21
+ "Please run `pip install validmind[stats]` to use statistical tests",
22
+ required_dependencies=["arch"],
23
+ extra="stats",
24
+ ) from e
25
+ raise e
26
+
16
27
  logger = get_logger(__name__)
17
28
 
18
29
 
@@ -6,14 +6,25 @@ from typing import Any, Dict
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
9
- from arch.unitroot import PhillipsPerron
10
9
  from numpy.linalg import LinAlgError
11
10
 
12
11
  from validmind import tags, tasks
13
- from validmind.errors import SkipTestError
12
+ from validmind.errors import MissingDependencyError, SkipTestError
14
13
  from validmind.logging import get_logger
15
14
  from validmind.vm_models import VMDataset
16
15
 
16
+ try:
17
+ from arch.unitroot import PhillipsPerron
18
+ except ImportError as e:
19
+ if "arch" in str(e):
20
+ raise MissingDependencyError(
21
+ "Missing required package `arch` for PhillipsPerronArch. "
22
+ "Please run `pip install validmind[stats]` to use statistical tests",
23
+ required_dependencies=["arch"],
24
+ extra="stats",
25
+ ) from e
26
+ raise e
27
+
17
28
  logger = get_logger(__name__)
18
29
 
19
30
 
@@ -83,7 +83,7 @@ def ScoreBandDefaultRates(
83
83
 
84
84
  # Create band labels
85
85
  band_labels = [
86
- f"{score_bands[i]}-{score_bands[i+1]}" for i in range(len(score_bands) - 1)
86
+ f"{score_bands[i]}-{score_bands[i + 1]}" for i in range(len(score_bands) - 1)
87
87
  ]
88
88
  band_labels.insert(0, f"<{score_bands[0]}")
89
89
  band_labels.append(f">{score_bands[-1]}")
@@ -9,11 +9,23 @@ import numpy as np
9
9
  import pandas as pd
10
10
  import plotly.graph_objects as go
11
11
  from plotly.subplots import make_subplots
12
- from scipy import stats
13
12
  from statsmodels.tsa.seasonal import seasonal_decompose
14
13
 
15
14
  from validmind import RawData, tags, tasks
16
- from validmind.errors import SkipTestError
15
+ from validmind.errors import MissingDependencyError, SkipTestError
16
+
17
+ try:
18
+ from scipy import stats
19
+ except ImportError as e:
20
+ if "scipy" in str(e):
21
+ raise MissingDependencyError(
22
+ "Missing required package `scipy` for SeasonalDecompose. "
23
+ "Please run `pip install validmind[stats]` to use statistical tests",
24
+ required_dependencies=["scipy"],
25
+ extra="stats",
26
+ ) from e
27
+
28
+ raise e
17
29
  from validmind.logging import get_logger
18
30
  from validmind.vm_models import VMDataset
19
31
 
@@ -6,9 +6,22 @@
6
6
  from typing import Tuple
7
7
 
8
8
  import pandas as pd
9
- from scipy import stats
10
9
 
11
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
12
+
13
+ try:
14
+ from scipy import stats
15
+ except ImportError as e:
16
+ if "scipy" in str(e):
17
+ raise MissingDependencyError(
18
+ "Missing required package `scipy` for ShapiroWilk. "
19
+ "Please run `pip install validmind[stats]` to use statistical tests",
20
+ required_dependencies=["scipy"],
21
+ extra="stats",
22
+ ) from e
23
+
24
+ raise e
12
25
 
13
26
 
14
27
  @tasks("classification", "regression")
@@ -3,9 +3,22 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import pandas as pd
6
- from scipy.stats import kurtosis, skew
7
6
 
8
7
  from validmind import tags, tasks
8
+ from validmind.errors import MissingDependencyError
9
+
10
+ try:
11
+ from scipy.stats import kurtosis, skew
12
+ except ImportError as e:
13
+ if "scipy" in str(e):
14
+ raise MissingDependencyError(
15
+ "Missing required package `scipy` for TimeSeriesDescriptiveStatistics. "
16
+ "Please run `pip install validmind[stats]` to use statistical tests",
17
+ required_dependencies=["scipy"],
18
+ extra="stats",
19
+ ) from e
20
+
21
+ raise e
9
22
 
10
23
 
11
24
  @tags("time_series_data", "analysis")
@@ -9,7 +9,20 @@ import numpy as np
9
9
  import pandas as pd
10
10
  import plotly.express as px
11
11
  import plotly.graph_objects as go
12
- import scorecardpy as sc
12
+
13
+ from validmind.errors import MissingDependencyError
14
+
15
+ try:
16
+ import scorecardpy as sc
17
+ except ImportError as e:
18
+ if "scorecardpy" in str(e):
19
+ raise MissingDependencyError(
20
+ "Missing required package `scorecardpy` for WOEBinPlots. "
21
+ "Please run `pip install validmind[credit_risk]` to use these tests",
22
+ required_dependencies=["scorecardpy"],
23
+ extra="credit_risk",
24
+ ) from e
25
+ raise e
13
26
  from plotly.subplots import make_subplots
14
27
 
15
28
  from validmind import RawData, tags, tasks
@@ -5,12 +5,23 @@
5
5
  from typing import Dict, Tuple
6
6
 
7
7
  import pandas as pd
8
- import scorecardpy as sc
9
8
 
10
9
  from validmind import RawData, tags, tasks
11
- from validmind.errors import SkipTestError
10
+ from validmind.errors import MissingDependencyError, SkipTestError
12
11
  from validmind.vm_models import VMDataset
13
12
 
13
+ try:
14
+ import scorecardpy as sc
15
+ except ImportError as e:
16
+ if "scorecardpy" in str(e):
17
+ raise MissingDependencyError(
18
+ "Missing required package `scorecardpy` for WOEBinTable. "
19
+ "Please run `pip install validmind[credit_risk]` to use these tests",
20
+ required_dependencies=["scorecardpy"],
21
+ extra="credit_risk",
22
+ ) from e
23
+ raise e
24
+
14
25
 
15
26
  @tags("tabular_data", "categorical_data")
16
27
  @tasks("classification")
@@ -5,14 +5,25 @@
5
5
  from typing import Any, Dict, Tuple
6
6
 
7
7
  import pandas as pd
8
- from arch.unitroot import ZivotAndrews
9
8
  from numpy.linalg import LinAlgError
10
9
 
11
10
  from validmind import RawData, tags, tasks
12
- from validmind.errors import SkipTestError
11
+ from validmind.errors import MissingDependencyError, SkipTestError
13
12
  from validmind.logging import get_logger
14
13
  from validmind.vm_models import VMDataset
15
14
 
15
+ try:
16
+ from arch.unitroot import ZivotAndrews
17
+ except ImportError as e:
18
+ if "arch" in str(e):
19
+ raise MissingDependencyError(
20
+ "Missing required package `arch` for ZivotAndrewsArch. "
21
+ "Please run `pip install validmind[stats]` to use statistical tests",
22
+ required_dependencies=["arch"],
23
+ extra="stats",
24
+ ) from e
25
+ raise e
26
+
16
27
  logger = get_logger(__name__)
17
28
 
18
29
 
@@ -5,13 +5,25 @@
5
5
  from collections import Counter
6
6
  from typing import Tuple
7
7
 
8
- import nltk
9
8
  import plotly.graph_objects as go
10
- from nltk.corpus import stopwords
11
9
 
12
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
13
12
  from validmind.vm_models import VMDataset
14
13
 
14
+ try:
15
+ import nltk
16
+ from nltk.corpus import stopwords
17
+ except ImportError as e:
18
+ if "nltk" in str(e).lower():
19
+ raise MissingDependencyError(
20
+ "Missing required package `nltk` for CommonWords. "
21
+ "Please run `pip install validmind[nlp]` to use NLP tests",
22
+ required_dependencies=["nltk"],
23
+ extra="nlp",
24
+ ) from e
25
+ raise e
26
+
15
27
 
16
28
  @tags("nlp", "text_data", "visualization", "frequency_analysis")
17
29
  @tasks("text_classification", "text_summarization")
@@ -6,9 +6,22 @@ from typing import Tuple
6
6
 
7
7
  import plotly.express as px
8
8
  import plotly.graph_objects as go
9
- from langdetect import LangDetectException, detect
10
9
 
11
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
12
+
13
+ try:
14
+ from langdetect import LangDetectException, detect
15
+ except ImportError as e:
16
+ if "langdetect" in str(e):
17
+ raise MissingDependencyError(
18
+ "Missing required package `langdetect` for LanguageDetection. "
19
+ "Please run `pip install validmind[nlp]` to use NLP tests",
20
+ required_dependencies=["langdetect"],
21
+ extra="nlp",
22
+ ) from e
23
+
24
+ raise e
12
25
 
13
26
 
14
27
  @tags("nlp", "text_data", "visualization")
@@ -8,9 +8,21 @@ from typing import Dict, Tuple
8
8
  import pandas as pd
9
9
  import plotly.express as px
10
10
  import plotly.graph_objects as go
11
- from textblob import TextBlob
12
11
 
13
12
  from validmind import RawData, tags, tasks
13
+ from validmind.errors import MissingDependencyError
14
+
15
+ try:
16
+ from textblob import TextBlob
17
+ except ImportError as e:
18
+ if "textblob" in str(e).lower():
19
+ raise MissingDependencyError(
20
+ "Missing required package `textblob` for PolarityAndSubjectivity. "
21
+ "Please run `pip install validmind[nlp]` to use NLP tests",
22
+ required_dependencies=["textblob"],
23
+ extra="nlp",
24
+ ) from e
25
+ raise e
14
26
 
15
27
 
16
28
  @tags("nlp", "text_data", "data_validation")
@@ -8,9 +8,21 @@ from typing import Tuple
8
8
  import matplotlib.pyplot as plt
9
9
  import nltk
10
10
  import seaborn as sns
11
- from nltk.sentiment import SentimentIntensityAnalyzer
12
11
 
13
12
  from validmind import RawData, tags, tasks
13
+ from validmind.errors import MissingDependencyError
14
+
15
+ try:
16
+ from nltk.sentiment import SentimentIntensityAnalyzer
17
+ except ImportError as e:
18
+ if "nltk" in str(e).lower():
19
+ raise MissingDependencyError(
20
+ "Missing required package `nltk` for Sentiment. "
21
+ "Please run `pip install validmind[nlp]` to use NLP tests",
22
+ required_dependencies=["nltk"],
23
+ extra="nlp",
24
+ ) from e
25
+ raise e
14
26
 
15
27
 
16
28
  @tags("nlp", "text_data", "data_validation")
@@ -9,14 +9,26 @@ Threshold based tests
9
9
  from collections import defaultdict
10
10
  from typing import Dict, Tuple
11
11
 
12
- import nltk
13
12
  import pandas as pd
14
13
  import plotly.graph_objects as go
15
- from nltk.corpus import stopwords
16
14
 
17
15
  from validmind import RawData, tags, tasks
16
+ from validmind.errors import MissingDependencyError
18
17
  from validmind.vm_models import VMDataset
19
18
 
19
+ try:
20
+ import nltk
21
+ from nltk.corpus import stopwords
22
+ except ImportError as e:
23
+ if "nltk" in str(e).lower():
24
+ raise MissingDependencyError(
25
+ "Missing required package `nltk` for StopWords. "
26
+ "Please run `pip install validmind[nlp]` to use NLP tests",
27
+ required_dependencies=["nltk"],
28
+ extra="nlp",
29
+ ) from e
30
+ raise e
31
+
20
32
 
21
33
  @tags("nlp", "text_data", "frequency_analysis", "visualization")
22
34
  @tasks("text_classification", "text_summarization")
@@ -5,15 +5,27 @@
5
5
  import string
6
6
  from typing import Tuple
7
7
 
8
- import nltk
9
8
  import pandas as pd
10
9
  import plotly.express as px
11
10
  import plotly.graph_objects as go
12
- from nltk.corpus import stopwords
13
11
 
14
12
  from validmind import RawData, tags, tasks
13
+ from validmind.errors import MissingDependencyError
15
14
  from validmind.vm_models import VMDataset
16
15
 
16
+ try:
17
+ import nltk
18
+ from nltk.corpus import stopwords
19
+ except ImportError as e:
20
+ if "nltk" in str(e).lower():
21
+ raise MissingDependencyError(
22
+ "Missing required package `nltk` for TextDescription. "
23
+ "Please run `pip install validmind[nlp]` to use NLP tests",
24
+ required_dependencies=["nltk"],
25
+ extra="nlp",
26
+ ) from e
27
+ raise e
28
+
17
29
 
18
30
  def create_metrics_df(df, text_column, unwanted_tokens, lang):
19
31
  stop_words = set(word.lower() for word in stopwords.words(lang))
@@ -4,11 +4,23 @@
4
4
 
5
5
  from typing import Tuple
6
6
 
7
- import evaluate
8
7
  import matplotlib.pyplot as plt
9
8
  import seaborn as sns
10
9
 
11
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
12
+
13
+ try:
14
+ import evaluate
15
+ except ImportError as e:
16
+ if "evaluate" in str(e):
17
+ raise MissingDependencyError(
18
+ "Missing required package `evaluate` for Toxicity. "
19
+ "Please run `pip install validmind[nlp]` to use NLP tests",
20
+ required_dependencies=["evaluate"],
21
+ extra="nlp",
22
+ ) from e
23
+ raise e
12
24
 
13
25
 
14
26
  @tags("nlp", "text_data", "data_validation")
@@ -4,14 +4,26 @@
4
4
 
5
5
  from typing import Tuple
6
6
 
7
- import evaluate
8
7
  import pandas as pd
9
8
  import plotly.graph_objects as go
10
9
 
11
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
12
12
  from validmind.tests.utils import validate_prediction
13
13
  from validmind.vm_models import VMDataset, VMModel
14
14
 
15
+ try:
16
+ import evaluate
17
+ except ImportError as e:
18
+ if "evaluate" in str(e):
19
+ raise MissingDependencyError(
20
+ "Missing required package `evaluate` for BertScore. "
21
+ "Please run `pip install validmind[nlp]` to use NLP tests",
22
+ required_dependencies=["evaluate"],
23
+ extra="nlp",
24
+ ) from e
25
+ raise e
26
+
15
27
 
16
28
  @tags("nlp", "text_data", "visualization")
17
29
  @tasks("text_classification", "text_summarization")
@@ -75,7 +87,6 @@ def BertScore(
75
87
  # Ensure equal lengths and get truncated data if necessary
76
88
  y_true, y_pred = validate_prediction(y_true, y_pred)
77
89
 
78
- # Load the BERT evaluation metric
79
90
  bert = evaluate.load("bertscore")
80
91
 
81
92
  # Compute the BERT score