validmind 2.8.29__py3-none-any.whl → 2.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. validmind/__init__.py +16 -5
  2. validmind/__version__.py +1 -1
  3. validmind/ai/utils.py +4 -24
  4. validmind/api_client.py +6 -17
  5. validmind/datasets/credit_risk/lending_club.py +13 -1
  6. validmind/datasets/nlp/cnn_dailymail.py +15 -1
  7. validmind/logging.py +48 -0
  8. validmind/tests/__init__.py +2 -0
  9. validmind/tests/__types__.py +18 -0
  10. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +14 -2
  11. validmind/tests/data_validation/DickeyFullerGLS.py +13 -2
  12. validmind/tests/data_validation/PhillipsPerronArch.py +13 -2
  13. validmind/tests/data_validation/SeasonalDecompose.py +14 -2
  14. validmind/tests/data_validation/ShapiroWilk.py +14 -1
  15. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +14 -1
  16. validmind/tests/data_validation/WOEBinPlots.py +14 -1
  17. validmind/tests/data_validation/WOEBinTable.py +13 -2
  18. validmind/tests/data_validation/ZivotAndrewsArch.py +13 -2
  19. validmind/tests/data_validation/nlp/CommonWords.py +14 -2
  20. validmind/tests/data_validation/nlp/LanguageDetection.py +14 -1
  21. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +13 -1
  22. validmind/tests/data_validation/nlp/Sentiment.py +13 -1
  23. validmind/tests/data_validation/nlp/StopWords.py +14 -2
  24. validmind/tests/data_validation/nlp/TextDescription.py +14 -2
  25. validmind/tests/data_validation/nlp/Toxicity.py +13 -1
  26. validmind/tests/model_validation/BertScore.py +13 -2
  27. validmind/tests/model_validation/BleuScore.py +13 -2
  28. validmind/tests/model_validation/ContextualRecall.py +13 -1
  29. validmind/tests/model_validation/MeteorScore.py +13 -2
  30. validmind/tests/model_validation/ModelPredictionResiduals.py +14 -1
  31. validmind/tests/model_validation/RegardScore.py +13 -2
  32. validmind/tests/model_validation/RougeScore.py +14 -1
  33. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +14 -1
  34. validmind/tests/model_validation/ToxicityScore.py +13 -1
  35. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +14 -2
  36. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +13 -2
  37. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +14 -2
  38. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +14 -1
  39. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +14 -1
  40. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +14 -1
  41. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +14 -1
  42. validmind/tests/output.py +9 -2
  43. validmind/tests/plots/BoxPlot.py +260 -0
  44. validmind/tests/plots/CorrelationHeatmap.py +235 -0
  45. validmind/tests/plots/HistogramPlot.py +233 -0
  46. validmind/tests/plots/ViolinPlot.py +125 -0
  47. validmind/tests/plots/__init__.py +0 -0
  48. validmind/tests/stats/CorrelationAnalysis.py +251 -0
  49. validmind/tests/stats/DescriptiveStats.py +197 -0
  50. validmind/tests/stats/NormalityTests.py +147 -0
  51. validmind/tests/stats/OutlierDetection.py +173 -0
  52. validmind/tests/stats/__init__.py +0 -0
  53. validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
  54. validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
  55. validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
  56. validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
  57. validmind/unit_metrics/classification/individual/Confidence.py +52 -0
  58. validmind/unit_metrics/classification/individual/Correctness.py +41 -0
  59. validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
  60. validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
  61. validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
  62. validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
  63. validmind/unit_metrics/classification/individual/__init__.py +0 -0
  64. validmind/vm_models/dataset/dataset.py +147 -1
  65. validmind/vm_models/result/result.py +30 -6
  66. validmind-2.10.0rc1.dist-info/METADATA +845 -0
  67. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/RECORD +70 -49
  68. validmind-2.8.29.dist-info/METADATA +0 -137
  69. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/LICENSE +0 -0
  70. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/WHEEL +0 -0
  71. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/entry_points.txt +0 -0
validmind/__init__.py CHANGED
@@ -32,15 +32,21 @@ After you have pasted the code snippet into your development source code and exe
32
32
  """
33
33
  import threading
34
34
  import warnings
35
+ from importlib import metadata
35
36
 
36
- import pkg_resources
37
37
  from IPython.display import HTML, display
38
38
 
39
39
  # Ignore Numba warnings. We are not requiring this package directly
40
- from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
40
+ try:
41
+ from numba.core.errors import (
42
+ NumbaDeprecationWarning,
43
+ NumbaPendingDeprecationWarning,
44
+ )
41
45
 
42
- warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
43
- warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
46
+ warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
47
+ warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
48
+ except ImportError:
49
+ ...
44
50
 
45
51
  from .__version__ import __version__ # noqa: E402
46
52
  from .api_client import init, log_metric, log_text, reload
@@ -81,7 +87,12 @@ def check_version():
81
87
  # get the installed vs running version of validmind
82
88
  # to make sure we are using the latest installed version
83
89
  # in case user has updated the package but forgot to restart the kernel
84
- installed = pkg_resources.get_distribution("validmind").version
90
+ try:
91
+ installed = metadata.version("validmind")
92
+ except metadata.PackageNotFoundError:
93
+ # Package metadata not found, skip version check
94
+ return
95
+
85
96
  running = __version__
86
97
 
87
98
  if parse_version(installed) > parse_version(running):
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.8.28"
1
+ __version__ = "2.10.0-rc.1"
validmind/ai/utils.py CHANGED
@@ -3,9 +3,8 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import os
6
- from urllib.parse import urljoin
7
6
 
8
- from openai import AzureOpenAI, Client, OpenAI
7
+ from openai import AzureOpenAI, OpenAI
9
8
 
10
9
  from ..logging import get_logger
11
10
  from ..utils import md_to_html
@@ -83,28 +82,9 @@ def get_client_and_model():
83
82
  logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
84
83
 
85
84
  else:
86
- try:
87
- # TODO: fix circular import
88
- from ..api_client import get_ai_key, get_api_host
89
-
90
- response = get_ai_key()
91
- __client = Client(
92
- base_url=(
93
- # TODO: improve this to be a bit more dynamic
94
- "http://localhost:4000/genai"
95
- if "localhost" in get_api_host()
96
- else urljoin(get_api_host(), "/genai")
97
- ),
98
- api_key=response["key"],
99
- )
100
- __model = "gpt-4o" # TODO: backend should tell us which model to use
101
- logger.debug(f"Using ValidMind {__model} for generating descriptions")
102
- except Exception as e:
103
- logger.debug(f"Failed to get API key: {e}")
104
- raise ValueError(
105
- "OPENAI_API_KEY, AZURE_OPENAI_KEY must be set, or your account "
106
- "must be setup to use ValidMind's LLM in order to use LLM features"
107
- )
85
+ raise ValueError(
86
+ "OPENAI_API_KEY, AZURE_OPENAI_KEY must be setup to use LLM features"
87
+ )
108
88
 
109
89
  return __client, __model
110
90
 
validmind/api_client.py CHANGED
@@ -22,7 +22,7 @@ from ipywidgets import HTML, Accordion
22
22
 
23
23
  from .client_config import client_config
24
24
  from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
25
- from .logging import get_logger, init_sentry, send_single_error
25
+ from .logging import get_logger, init_sentry, log_api_operation, send_single_error
26
26
  from .utils import NumpyEncoder, is_html, md_to_html, run_async
27
27
  from .vm_models import Figure
28
28
 
@@ -85,7 +85,7 @@ def _get_session() -> aiohttp.ClientSession:
85
85
  if not __api_session or __api_session.closed:
86
86
  __api_session = aiohttp.ClientSession(
87
87
  headers=_get_api_headers(),
88
- timeout=aiohttp.ClientTimeout(total=30),
88
+ timeout=aiohttp.ClientTimeout(total=int(os.getenv("VM_API_TIMEOUT", 30))),
89
89
  )
90
90
 
91
91
  return __api_session
@@ -304,6 +304,10 @@ async def alog_metadata(
304
304
  raise e
305
305
 
306
306
 
307
+ @log_api_operation(
308
+ operation_name="Sending figure to ValidMind API",
309
+ extract_key=lambda figure: figure.key,
310
+ )
307
311
  async def alog_figure(figure: Figure) -> Dict[str, Any]:
308
312
  """Logs a figure.
309
313
 
@@ -525,21 +529,6 @@ def log_metric(
525
529
  )
526
530
 
527
531
 
528
- def get_ai_key() -> Dict[str, Any]:
529
- """Calls the API to get an API key for our LLM proxy."""
530
- r = requests.get(
531
- url=_get_url("ai/key"),
532
- headers=_get_api_headers(),
533
- )
534
-
535
- if r.status_code != 200:
536
- # TODO: improve error handling when there's no Open AI API or AI key available
537
- # logger.error("Could not get AI key from ValidMind API")
538
- raise_api_error(r.text)
539
-
540
- return r.json()
541
-
542
-
543
532
  def generate_test_result_description(test_result_data: Dict[str, Any]) -> str:
544
533
  r = requests.post(
545
534
  url=_get_url("ai/generate/test_result_description"),
@@ -9,13 +9,25 @@ from typing import Any, Dict, Optional, Tuple
9
9
 
10
10
  import numpy as np
11
11
  import pandas as pd
12
- import scorecardpy as sc
13
12
  import statsmodels.api as sm
14
13
  import xgboost as xgb
15
14
  from sklearn.ensemble import RandomForestClassifier
16
15
  from sklearn.model_selection import train_test_split
17
16
 
18
17
  import validmind as vm
18
+ from validmind.errors import MissingDependencyError
19
+
20
+ try:
21
+ import scorecardpy as sc
22
+ except ImportError as e:
23
+ if "scorecardpy" in str(e):
24
+ raise MissingDependencyError(
25
+ "Missing required package `scorecardpy` for credit risk demos. "
26
+ "Please run `pip install validmind[credit_risk]` or `pip install scorecardpy`.",
27
+ required_dependencies=["scorecardpy"],
28
+ extra="credit_risk",
29
+ ) from e
30
+ raise e
19
31
 
20
32
  current_path = os.path.dirname(os.path.abspath(__file__))
21
33
  dataset_path = os.path.join(current_path, "datasets")
@@ -7,10 +7,11 @@ import textwrap
7
7
  from typing import Optional, Tuple
8
8
 
9
9
  import pandas as pd
10
- from datasets import load_dataset
11
10
  from IPython.display import HTML, display
12
11
  from tabulate import tabulate
13
12
 
13
+ from validmind.errors import MissingDependencyError
14
+
14
15
  # Define column names
15
16
  text_column = "article"
16
17
  target_column = "highlights"
@@ -37,6 +38,19 @@ def load_data(
37
38
  Tuple containing (train_df, test_df) DataFrames with the loaded data.
38
39
  """
39
40
  if source == "online":
41
+ try:
42
+ from datasets import load_dataset
43
+ except ImportError as e:
44
+ if "datasets" in str(e):
45
+ raise MissingDependencyError(
46
+ "Missing required package `datasets` for CNN Daily Mail. "
47
+ "Please run `pip install validmind[datasets]` or "
48
+ "`pip install datasets` to use CNN Daily Mail dataset",
49
+ required_dependencies=["datasets"],
50
+ extra="datasets",
51
+ ) from e
52
+ raise e
53
+
40
54
  # Load online data without predictions
41
55
  cnn_dataset = load_dataset("cnn_dailymail", "3.0.0")
42
56
  train_df = cnn_dataset["train"].to_pandas()
validmind/logging.py CHANGED
@@ -170,6 +170,54 @@ async def log_performance_async(
170
170
  return wrap
171
171
 
172
172
 
173
+ def log_api_operation(
174
+ operation_name: Optional[str] = None,
175
+ logger: Optional[logging.Logger] = None,
176
+ extract_key: Optional[Callable] = None,
177
+ force: bool = False,
178
+ ) -> Callable[[F], F]:
179
+ """Decorator to log API operations like figure uploads.
180
+
181
+ Args:
182
+ operation_name (str, optional): The name of the operation. Defaults to function name.
183
+ logger (logging.Logger, optional): The logger to use. Defaults to None.
184
+ extract_key (Callable, optional): Function to extract a key from args for logging.
185
+ force (bool, optional): Whether to force logging even if env var is off.
186
+
187
+ Returns:
188
+ Callable: The decorated function.
189
+ """
190
+
191
+ def decorator(func: F) -> F:
192
+ # check if log level is set to debug
193
+ if _get_log_level() != logging.DEBUG and not force:
194
+ return func
195
+
196
+ nonlocal logger
197
+ if logger is None:
198
+ logger = get_logger()
199
+
200
+ nonlocal operation_name
201
+ if operation_name is None:
202
+ operation_name = func.__name__
203
+
204
+ async def wrapped(*args: Any, **kwargs: Any) -> Any:
205
+ # Try to extract a meaningful identifier from the arguments
206
+ identifier = ""
207
+ if extract_key and args:
208
+ try:
209
+ identifier = f": {extract_key(args[0])}"
210
+ except (AttributeError, IndexError):
211
+ pass
212
+
213
+ logger.debug(f"{operation_name}{identifier}")
214
+ return await func(*args, **kwargs)
215
+
216
+ return wrapped
217
+
218
+ return decorator
219
+
220
+
173
221
  def send_single_error(error: Exception) -> None:
174
222
  """Send a single error to Sentry.
175
223
 
@@ -43,6 +43,8 @@ __all__ = [
43
43
  "data_validation",
44
44
  "model_validation",
45
45
  "prompt_validation",
46
+ "plots",
47
+ "stats",
46
48
  "list_tests",
47
49
  "load_test",
48
50
  "describe_test",
@@ -187,6 +187,10 @@ TestID = Union[
187
187
  "validmind.ongoing_monitoring.ScoreBandsDrift",
188
188
  "validmind.ongoing_monitoring.ScorecardHistogramDrift",
189
189
  "validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
190
+ "validmind.plots.BoxPlot",
191
+ "validmind.plots.CorrelationHeatmap",
192
+ "validmind.plots.HistogramPlot",
193
+ "validmind.plots.ViolinPlot",
190
194
  "validmind.prompt_validation.Bias",
191
195
  "validmind.prompt_validation.Clarity",
192
196
  "validmind.prompt_validation.Conciseness",
@@ -194,11 +198,25 @@ TestID = Union[
194
198
  "validmind.prompt_validation.NegativeInstruction",
195
199
  "validmind.prompt_validation.Robustness",
196
200
  "validmind.prompt_validation.Specificity",
201
+ "validmind.stats.CorrelationAnalysis",
202
+ "validmind.stats.DescriptiveStats",
203
+ "validmind.stats.NormalityTests",
204
+ "validmind.stats.OutlierDetection",
197
205
  "validmind.unit_metrics.classification.Accuracy",
198
206
  "validmind.unit_metrics.classification.F1",
199
207
  "validmind.unit_metrics.classification.Precision",
200
208
  "validmind.unit_metrics.classification.ROC_AUC",
201
209
  "validmind.unit_metrics.classification.Recall",
210
+ "validmind.unit_metrics.classification.individual.AbsoluteError",
211
+ "validmind.unit_metrics.classification.individual.BrierScore",
212
+ "validmind.unit_metrics.classification.individual.CalibrationError",
213
+ "validmind.unit_metrics.classification.individual.ClassBalance",
214
+ "validmind.unit_metrics.classification.individual.Confidence",
215
+ "validmind.unit_metrics.classification.individual.Correctness",
216
+ "validmind.unit_metrics.classification.individual.LogLoss",
217
+ "validmind.unit_metrics.classification.individual.OutlierScore",
218
+ "validmind.unit_metrics.classification.individual.ProbabilityError",
219
+ "validmind.unit_metrics.classification.individual.Uncertainty",
202
220
  "validmind.unit_metrics.regression.AdjustedRSquaredScore",
203
221
  "validmind.unit_metrics.regression.GiniCoefficient",
204
222
  "validmind.unit_metrics.regression.HuberLoss",
@@ -4,10 +4,22 @@
4
4
 
5
5
 
6
6
  import pandas as pd
7
- from scipy.stats import chi2_contingency
8
7
 
9
8
  from validmind import tags, tasks
10
- from validmind.errors import SkipTestError
9
+ from validmind.errors import MissingDependencyError, SkipTestError
10
+
11
+ try:
12
+ from scipy.stats import chi2_contingency
13
+ except ImportError as e:
14
+ if "scipy" in str(e):
15
+ raise MissingDependencyError(
16
+ "Missing required package `scipy` for ChiSquaredFeaturesTable. "
17
+ "Please run `pip install validmind[stats]` to use statistical tests",
18
+ required_dependencies=["scipy"],
19
+ extra="stats",
20
+ ) from e
21
+
22
+ raise e
11
23
 
12
24
 
13
25
  @tags("tabular_data", "categorical_data", "statistical_test")
@@ -5,14 +5,25 @@
5
5
  from typing import Any, Dict, Tuple
6
6
 
7
7
  import pandas as pd
8
- from arch.unitroot import DFGLS
9
8
  from numpy.linalg import LinAlgError
10
9
 
11
10
  from validmind import RawData, tags, tasks
12
- from validmind.errors import SkipTestError
11
+ from validmind.errors import MissingDependencyError, SkipTestError
13
12
  from validmind.logging import get_logger
14
13
  from validmind.vm_models import VMDataset
15
14
 
15
+ try:
16
+ from arch.unitroot import DFGLS
17
+ except ImportError as e:
18
+ if "arch" in str(e):
19
+ raise MissingDependencyError(
20
+ "Missing required package `arch` for DickeyFullerGLS. "
21
+ "Please run `pip install validmind[stats]` to use statistical tests",
22
+ required_dependencies=["arch"],
23
+ extra="stats",
24
+ ) from e
25
+ raise e
26
+
16
27
  logger = get_logger(__name__)
17
28
 
18
29
 
@@ -6,14 +6,25 @@ from typing import Any, Dict
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
9
- from arch.unitroot import PhillipsPerron
10
9
  from numpy.linalg import LinAlgError
11
10
 
12
11
  from validmind import tags, tasks
13
- from validmind.errors import SkipTestError
12
+ from validmind.errors import MissingDependencyError, SkipTestError
14
13
  from validmind.logging import get_logger
15
14
  from validmind.vm_models import VMDataset
16
15
 
16
+ try:
17
+ from arch.unitroot import PhillipsPerron
18
+ except ImportError as e:
19
+ if "arch" in str(e):
20
+ raise MissingDependencyError(
21
+ "Missing required package `arch` for PhillipsPerronArch. "
22
+ "Please run `pip install validmind[stats]` to use statistical tests",
23
+ required_dependencies=["arch"],
24
+ extra="stats",
25
+ ) from e
26
+ raise e
27
+
17
28
  logger = get_logger(__name__)
18
29
 
19
30
 
@@ -9,11 +9,23 @@ import numpy as np
9
9
  import pandas as pd
10
10
  import plotly.graph_objects as go
11
11
  from plotly.subplots import make_subplots
12
- from scipy import stats
13
12
  from statsmodels.tsa.seasonal import seasonal_decompose
14
13
 
15
14
  from validmind import RawData, tags, tasks
16
- from validmind.errors import SkipTestError
15
+ from validmind.errors import MissingDependencyError, SkipTestError
16
+
17
+ try:
18
+ from scipy import stats
19
+ except ImportError as e:
20
+ if "scipy" in str(e):
21
+ raise MissingDependencyError(
22
+ "Missing required package `scipy` for SeasonalDecompose. "
23
+ "Please run `pip install validmind[stats]` to use statistical tests",
24
+ required_dependencies=["scipy"],
25
+ extra="stats",
26
+ ) from e
27
+
28
+ raise e
17
29
  from validmind.logging import get_logger
18
30
  from validmind.vm_models import VMDataset
19
31
 
@@ -6,9 +6,22 @@
6
6
  from typing import Tuple
7
7
 
8
8
  import pandas as pd
9
- from scipy import stats
10
9
 
11
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
12
+
13
+ try:
14
+ from scipy import stats
15
+ except ImportError as e:
16
+ if "scipy" in str(e):
17
+ raise MissingDependencyError(
18
+ "Missing required package `scipy` for ShapiroWilk. "
19
+ "Please run `pip install validmind[stats]` to use statistical tests",
20
+ required_dependencies=["scipy"],
21
+ extra="stats",
22
+ ) from e
23
+
24
+ raise e
12
25
 
13
26
 
14
27
  @tasks("classification", "regression")
@@ -3,9 +3,22 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import pandas as pd
6
- from scipy.stats import kurtosis, skew
7
6
 
8
7
  from validmind import tags, tasks
8
+ from validmind.errors import MissingDependencyError
9
+
10
+ try:
11
+ from scipy.stats import kurtosis, skew
12
+ except ImportError as e:
13
+ if "scipy" in str(e):
14
+ raise MissingDependencyError(
15
+ "Missing required package `scipy` for TimeSeriesDescriptiveStatistics. "
16
+ "Please run `pip install validmind[stats]` to use statistical tests",
17
+ required_dependencies=["scipy"],
18
+ extra="stats",
19
+ ) from e
20
+
21
+ raise e
9
22
 
10
23
 
11
24
  @tags("time_series_data", "analysis")
@@ -9,7 +9,20 @@ import numpy as np
9
9
  import pandas as pd
10
10
  import plotly.express as px
11
11
  import plotly.graph_objects as go
12
- import scorecardpy as sc
12
+
13
+ from validmind.errors import MissingDependencyError
14
+
15
+ try:
16
+ import scorecardpy as sc
17
+ except ImportError as e:
18
+ if "scorecardpy" in str(e):
19
+ raise MissingDependencyError(
20
+ "Missing required package `scorecardpy` for WOEBinPlots. "
21
+ "Please run `pip install validmind[credit_risk]` to use these tests",
22
+ required_dependencies=["scorecardpy"],
23
+ extra="credit_risk",
24
+ ) from e
25
+ raise e
13
26
  from plotly.subplots import make_subplots
14
27
 
15
28
  from validmind import RawData, tags, tasks
@@ -5,12 +5,23 @@
5
5
  from typing import Dict, Tuple
6
6
 
7
7
  import pandas as pd
8
- import scorecardpy as sc
9
8
 
10
9
  from validmind import RawData, tags, tasks
11
- from validmind.errors import SkipTestError
10
+ from validmind.errors import MissingDependencyError, SkipTestError
12
11
  from validmind.vm_models import VMDataset
13
12
 
13
+ try:
14
+ import scorecardpy as sc
15
+ except ImportError as e:
16
+ if "scorecardpy" in str(e):
17
+ raise MissingDependencyError(
18
+ "Missing required package `scorecardpy` for WOEBinTable. "
19
+ "Please run `pip install validmind[credit_risk]` to use these tests",
20
+ required_dependencies=["scorecardpy"],
21
+ extra="credit_risk",
22
+ ) from e
23
+ raise e
24
+
14
25
 
15
26
  @tags("tabular_data", "categorical_data")
16
27
  @tasks("classification")
@@ -5,14 +5,25 @@
5
5
  from typing import Any, Dict, Tuple
6
6
 
7
7
  import pandas as pd
8
- from arch.unitroot import ZivotAndrews
9
8
  from numpy.linalg import LinAlgError
10
9
 
11
10
  from validmind import RawData, tags, tasks
12
- from validmind.errors import SkipTestError
11
+ from validmind.errors import MissingDependencyError, SkipTestError
13
12
  from validmind.logging import get_logger
14
13
  from validmind.vm_models import VMDataset
15
14
 
15
+ try:
16
+ from arch.unitroot import ZivotAndrews
17
+ except ImportError as e:
18
+ if "arch" in str(e):
19
+ raise MissingDependencyError(
20
+ "Missing required package `arch` for ZivotAndrewsArch. "
21
+ "Please run `pip install validmind[stats]` to use statistical tests",
22
+ required_dependencies=["arch"],
23
+ extra="stats",
24
+ ) from e
25
+ raise e
26
+
16
27
  logger = get_logger(__name__)
17
28
 
18
29
 
@@ -5,13 +5,25 @@
5
5
  from collections import Counter
6
6
  from typing import Tuple
7
7
 
8
- import nltk
9
8
  import plotly.graph_objects as go
10
- from nltk.corpus import stopwords
11
9
 
12
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
13
12
  from validmind.vm_models import VMDataset
14
13
 
14
+ try:
15
+ import nltk
16
+ from nltk.corpus import stopwords
17
+ except ImportError as e:
18
+ if "nltk" in str(e).lower():
19
+ raise MissingDependencyError(
20
+ "Missing required package `nltk` for CommonWords. "
21
+ "Please run `pip install validmind[nlp]` to use NLP tests",
22
+ required_dependencies=["nltk"],
23
+ extra="nlp",
24
+ ) from e
25
+ raise e
26
+
15
27
 
16
28
  @tags("nlp", "text_data", "visualization", "frequency_analysis")
17
29
  @tasks("text_classification", "text_summarization")
@@ -6,9 +6,22 @@ from typing import Tuple
6
6
 
7
7
  import plotly.express as px
8
8
  import plotly.graph_objects as go
9
- from langdetect import LangDetectException, detect
10
9
 
11
10
  from validmind import RawData, tags, tasks
11
+ from validmind.errors import MissingDependencyError
12
+
13
+ try:
14
+ from langdetect import LangDetectException, detect
15
+ except ImportError as e:
16
+ if "langdetect" in str(e):
17
+ raise MissingDependencyError(
18
+ "Missing required package `langdetect` for LanguageDetection. "
19
+ "Please run `pip install validmind[nlp]` to use NLP tests",
20
+ required_dependencies=["langdetect"],
21
+ extra="nlp",
22
+ ) from e
23
+
24
+ raise e
12
25
 
13
26
 
14
27
  @tags("nlp", "text_data", "visualization")
@@ -8,9 +8,21 @@ from typing import Dict, Tuple
8
8
  import pandas as pd
9
9
  import plotly.express as px
10
10
  import plotly.graph_objects as go
11
- from textblob import TextBlob
12
11
 
13
12
  from validmind import RawData, tags, tasks
13
+ from validmind.errors import MissingDependencyError
14
+
15
+ try:
16
+ from textblob import TextBlob
17
+ except ImportError as e:
18
+ if "textblob" in str(e).lower():
19
+ raise MissingDependencyError(
20
+ "Missing required package `textblob` for PolarityAndSubjectivity. "
21
+ "Please run `pip install validmind[nlp]` to use NLP tests",
22
+ required_dependencies=["textblob"],
23
+ extra="nlp",
24
+ ) from e
25
+ raise e
14
26
 
15
27
 
16
28
  @tags("nlp", "text_data", "data_validation")
@@ -8,9 +8,21 @@ from typing import Tuple
8
8
  import matplotlib.pyplot as plt
9
9
  import nltk
10
10
  import seaborn as sns
11
- from nltk.sentiment import SentimentIntensityAnalyzer
12
11
 
13
12
  from validmind import RawData, tags, tasks
13
+ from validmind.errors import MissingDependencyError
14
+
15
+ try:
16
+ from nltk.sentiment import SentimentIntensityAnalyzer
17
+ except ImportError as e:
18
+ if "nltk" in str(e).lower():
19
+ raise MissingDependencyError(
20
+ "Missing required package `nltk` for Sentiment. "
21
+ "Please run `pip install validmind[nlp]` to use NLP tests",
22
+ required_dependencies=["nltk"],
23
+ extra="nlp",
24
+ ) from e
25
+ raise e
14
26
 
15
27
 
16
28
  @tags("nlp", "text_data", "data_validation")
@@ -9,14 +9,26 @@ Threshold based tests
9
9
  from collections import defaultdict
10
10
  from typing import Dict, Tuple
11
11
 
12
- import nltk
13
12
  import pandas as pd
14
13
  import plotly.graph_objects as go
15
- from nltk.corpus import stopwords
16
14
 
17
15
  from validmind import RawData, tags, tasks
16
+ from validmind.errors import MissingDependencyError
18
17
  from validmind.vm_models import VMDataset
19
18
 
19
+ try:
20
+ import nltk
21
+ from nltk.corpus import stopwords
22
+ except ImportError as e:
23
+ if "nltk" in str(e).lower():
24
+ raise MissingDependencyError(
25
+ "Missing required package `nltk` for StopWords. "
26
+ "Please run `pip install validmind[nlp]` to use NLP tests",
27
+ required_dependencies=["nltk"],
28
+ extra="nlp",
29
+ ) from e
30
+ raise e
31
+
20
32
 
21
33
  @tags("nlp", "text_data", "frequency_analysis", "visualization")
22
34
  @tasks("text_classification", "text_summarization")