validmind 2.8.29__py3-none-any.whl → 2.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +16 -5
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +4 -24
- validmind/api_client.py +6 -17
- validmind/datasets/credit_risk/lending_club.py +13 -1
- validmind/datasets/nlp/cnn_dailymail.py +15 -1
- validmind/logging.py +48 -0
- validmind/tests/__init__.py +2 -0
- validmind/tests/__types__.py +18 -0
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +14 -2
- validmind/tests/data_validation/DickeyFullerGLS.py +13 -2
- validmind/tests/data_validation/PhillipsPerronArch.py +13 -2
- validmind/tests/data_validation/SeasonalDecompose.py +14 -2
- validmind/tests/data_validation/ShapiroWilk.py +14 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +14 -1
- validmind/tests/data_validation/WOEBinPlots.py +14 -1
- validmind/tests/data_validation/WOEBinTable.py +13 -2
- validmind/tests/data_validation/ZivotAndrewsArch.py +13 -2
- validmind/tests/data_validation/nlp/CommonWords.py +14 -2
- validmind/tests/data_validation/nlp/LanguageDetection.py +14 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +13 -1
- validmind/tests/data_validation/nlp/Sentiment.py +13 -1
- validmind/tests/data_validation/nlp/StopWords.py +14 -2
- validmind/tests/data_validation/nlp/TextDescription.py +14 -2
- validmind/tests/data_validation/nlp/Toxicity.py +13 -1
- validmind/tests/model_validation/BertScore.py +13 -2
- validmind/tests/model_validation/BleuScore.py +13 -2
- validmind/tests/model_validation/ContextualRecall.py +13 -1
- validmind/tests/model_validation/MeteorScore.py +13 -2
- validmind/tests/model_validation/ModelPredictionResiduals.py +14 -1
- validmind/tests/model_validation/RegardScore.py +13 -2
- validmind/tests/model_validation/RougeScore.py +14 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +14 -1
- validmind/tests/model_validation/ToxicityScore.py +13 -1
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +14 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +13 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +14 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +14 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +14 -1
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +14 -1
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +14 -1
- validmind/tests/output.py +9 -2
- validmind/tests/plots/BoxPlot.py +260 -0
- validmind/tests/plots/CorrelationHeatmap.py +235 -0
- validmind/tests/plots/HistogramPlot.py +233 -0
- validmind/tests/plots/ViolinPlot.py +125 -0
- validmind/tests/plots/__init__.py +0 -0
- validmind/tests/stats/CorrelationAnalysis.py +251 -0
- validmind/tests/stats/DescriptiveStats.py +197 -0
- validmind/tests/stats/NormalityTests.py +147 -0
- validmind/tests/stats/OutlierDetection.py +173 -0
- validmind/tests/stats/__init__.py +0 -0
- validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
- validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
- validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
- validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
- validmind/unit_metrics/classification/individual/Confidence.py +52 -0
- validmind/unit_metrics/classification/individual/Correctness.py +41 -0
- validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
- validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
- validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
- validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
- validmind/unit_metrics/classification/individual/__init__.py +0 -0
- validmind/vm_models/dataset/dataset.py +147 -1
- validmind/vm_models/result/result.py +30 -6
- validmind-2.10.0rc1.dist-info/METADATA +845 -0
- {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/RECORD +70 -49
- validmind-2.8.29.dist-info/METADATA +0 -137
- {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/LICENSE +0 -0
- {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/WHEEL +0 -0
- {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/entry_points.txt +0 -0
validmind/__init__.py
CHANGED
@@ -32,15 +32,21 @@ After you have pasted the code snippet into your development source code and exe
|
|
32
32
|
"""
|
33
33
|
import threading
|
34
34
|
import warnings
|
35
|
+
from importlib import metadata
|
35
36
|
|
36
|
-
import pkg_resources
|
37
37
|
from IPython.display import HTML, display
|
38
38
|
|
39
39
|
# Ignore Numba warnings. We are not requiring this package directly
|
40
|
-
|
40
|
+
try:
|
41
|
+
from numba.core.errors import (
|
42
|
+
NumbaDeprecationWarning,
|
43
|
+
NumbaPendingDeprecationWarning,
|
44
|
+
)
|
41
45
|
|
42
|
-
warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
|
43
|
-
warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
|
46
|
+
warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
|
47
|
+
warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
|
48
|
+
except ImportError:
|
49
|
+
...
|
44
50
|
|
45
51
|
from .__version__ import __version__ # noqa: E402
|
46
52
|
from .api_client import init, log_metric, log_text, reload
|
@@ -81,7 +87,12 @@ def check_version():
|
|
81
87
|
# get the installed vs running version of validmind
|
82
88
|
# to make sure we are using the latest installed version
|
83
89
|
# in case user has updated the package but forgot to restart the kernel
|
84
|
-
|
90
|
+
try:
|
91
|
+
installed = metadata.version("validmind")
|
92
|
+
except metadata.PackageNotFoundError:
|
93
|
+
# Package metadata not found, skip version check
|
94
|
+
return
|
95
|
+
|
85
96
|
running = __version__
|
86
97
|
|
87
98
|
if parse_version(installed) > parse_version(running):
|
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.
|
1
|
+
__version__ = "2.10.0-rc.1"
|
validmind/ai/utils.py
CHANGED
@@ -3,9 +3,8 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import os
|
6
|
-
from urllib.parse import urljoin
|
7
6
|
|
8
|
-
from openai import AzureOpenAI,
|
7
|
+
from openai import AzureOpenAI, OpenAI
|
9
8
|
|
10
9
|
from ..logging import get_logger
|
11
10
|
from ..utils import md_to_html
|
@@ -83,28 +82,9 @@ def get_client_and_model():
|
|
83
82
|
logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
|
84
83
|
|
85
84
|
else:
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
response = get_ai_key()
|
91
|
-
__client = Client(
|
92
|
-
base_url=(
|
93
|
-
# TODO: improve this to be a bit more dynamic
|
94
|
-
"http://localhost:4000/genai"
|
95
|
-
if "localhost" in get_api_host()
|
96
|
-
else urljoin(get_api_host(), "/genai")
|
97
|
-
),
|
98
|
-
api_key=response["key"],
|
99
|
-
)
|
100
|
-
__model = "gpt-4o" # TODO: backend should tell us which model to use
|
101
|
-
logger.debug(f"Using ValidMind {__model} for generating descriptions")
|
102
|
-
except Exception as e:
|
103
|
-
logger.debug(f"Failed to get API key: {e}")
|
104
|
-
raise ValueError(
|
105
|
-
"OPENAI_API_KEY, AZURE_OPENAI_KEY must be set, or your account "
|
106
|
-
"must be setup to use ValidMind's LLM in order to use LLM features"
|
107
|
-
)
|
85
|
+
raise ValueError(
|
86
|
+
"OPENAI_API_KEY, AZURE_OPENAI_KEY must be setup to use LLM features"
|
87
|
+
)
|
108
88
|
|
109
89
|
return __client, __model
|
110
90
|
|
validmind/api_client.py
CHANGED
@@ -22,7 +22,7 @@ from ipywidgets import HTML, Accordion
|
|
22
22
|
|
23
23
|
from .client_config import client_config
|
24
24
|
from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
|
25
|
-
from .logging import get_logger, init_sentry, send_single_error
|
25
|
+
from .logging import get_logger, init_sentry, log_api_operation, send_single_error
|
26
26
|
from .utils import NumpyEncoder, is_html, md_to_html, run_async
|
27
27
|
from .vm_models import Figure
|
28
28
|
|
@@ -85,7 +85,7 @@ def _get_session() -> aiohttp.ClientSession:
|
|
85
85
|
if not __api_session or __api_session.closed:
|
86
86
|
__api_session = aiohttp.ClientSession(
|
87
87
|
headers=_get_api_headers(),
|
88
|
-
timeout=aiohttp.ClientTimeout(total=30),
|
88
|
+
timeout=aiohttp.ClientTimeout(total=int(os.getenv("VM_API_TIMEOUT", 30))),
|
89
89
|
)
|
90
90
|
|
91
91
|
return __api_session
|
@@ -304,6 +304,10 @@ async def alog_metadata(
|
|
304
304
|
raise e
|
305
305
|
|
306
306
|
|
307
|
+
@log_api_operation(
|
308
|
+
operation_name="Sending figure to ValidMind API",
|
309
|
+
extract_key=lambda figure: figure.key,
|
310
|
+
)
|
307
311
|
async def alog_figure(figure: Figure) -> Dict[str, Any]:
|
308
312
|
"""Logs a figure.
|
309
313
|
|
@@ -525,21 +529,6 @@ def log_metric(
|
|
525
529
|
)
|
526
530
|
|
527
531
|
|
528
|
-
def get_ai_key() -> Dict[str, Any]:
|
529
|
-
"""Calls the API to get an API key for our LLM proxy."""
|
530
|
-
r = requests.get(
|
531
|
-
url=_get_url("ai/key"),
|
532
|
-
headers=_get_api_headers(),
|
533
|
-
)
|
534
|
-
|
535
|
-
if r.status_code != 200:
|
536
|
-
# TODO: improve error handling when there's no Open AI API or AI key available
|
537
|
-
# logger.error("Could not get AI key from ValidMind API")
|
538
|
-
raise_api_error(r.text)
|
539
|
-
|
540
|
-
return r.json()
|
541
|
-
|
542
|
-
|
543
532
|
def generate_test_result_description(test_result_data: Dict[str, Any]) -> str:
|
544
533
|
r = requests.post(
|
545
534
|
url=_get_url("ai/generate/test_result_description"),
|
@@ -9,13 +9,25 @@ from typing import Any, Dict, Optional, Tuple
|
|
9
9
|
|
10
10
|
import numpy as np
|
11
11
|
import pandas as pd
|
12
|
-
import scorecardpy as sc
|
13
12
|
import statsmodels.api as sm
|
14
13
|
import xgboost as xgb
|
15
14
|
from sklearn.ensemble import RandomForestClassifier
|
16
15
|
from sklearn.model_selection import train_test_split
|
17
16
|
|
18
17
|
import validmind as vm
|
18
|
+
from validmind.errors import MissingDependencyError
|
19
|
+
|
20
|
+
try:
|
21
|
+
import scorecardpy as sc
|
22
|
+
except ImportError as e:
|
23
|
+
if "scorecardpy" in str(e):
|
24
|
+
raise MissingDependencyError(
|
25
|
+
"Missing required package `scorecardpy` for credit risk demos. "
|
26
|
+
"Please run `pip install validmind[credit_risk]` or `pip install scorecardpy`.",
|
27
|
+
required_dependencies=["scorecardpy"],
|
28
|
+
extra="credit_risk",
|
29
|
+
) from e
|
30
|
+
raise e
|
19
31
|
|
20
32
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
21
33
|
dataset_path = os.path.join(current_path, "datasets")
|
@@ -7,10 +7,11 @@ import textwrap
|
|
7
7
|
from typing import Optional, Tuple
|
8
8
|
|
9
9
|
import pandas as pd
|
10
|
-
from datasets import load_dataset
|
11
10
|
from IPython.display import HTML, display
|
12
11
|
from tabulate import tabulate
|
13
12
|
|
13
|
+
from validmind.errors import MissingDependencyError
|
14
|
+
|
14
15
|
# Define column names
|
15
16
|
text_column = "article"
|
16
17
|
target_column = "highlights"
|
@@ -37,6 +38,19 @@ def load_data(
|
|
37
38
|
Tuple containing (train_df, test_df) DataFrames with the loaded data.
|
38
39
|
"""
|
39
40
|
if source == "online":
|
41
|
+
try:
|
42
|
+
from datasets import load_dataset
|
43
|
+
except ImportError as e:
|
44
|
+
if "datasets" in str(e):
|
45
|
+
raise MissingDependencyError(
|
46
|
+
"Missing required package `datasets` for CNN Daily Mail. "
|
47
|
+
"Please run `pip install validmind[datasets]` or "
|
48
|
+
"`pip install datasets` to use CNN Daily Mail dataset",
|
49
|
+
required_dependencies=["datasets"],
|
50
|
+
extra="datasets",
|
51
|
+
) from e
|
52
|
+
raise e
|
53
|
+
|
40
54
|
# Load online data without predictions
|
41
55
|
cnn_dataset = load_dataset("cnn_dailymail", "3.0.0")
|
42
56
|
train_df = cnn_dataset["train"].to_pandas()
|
validmind/logging.py
CHANGED
@@ -170,6 +170,54 @@ async def log_performance_async(
|
|
170
170
|
return wrap
|
171
171
|
|
172
172
|
|
173
|
+
def log_api_operation(
|
174
|
+
operation_name: Optional[str] = None,
|
175
|
+
logger: Optional[logging.Logger] = None,
|
176
|
+
extract_key: Optional[Callable] = None,
|
177
|
+
force: bool = False,
|
178
|
+
) -> Callable[[F], F]:
|
179
|
+
"""Decorator to log API operations like figure uploads.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
operation_name (str, optional): The name of the operation. Defaults to function name.
|
183
|
+
logger (logging.Logger, optional): The logger to use. Defaults to None.
|
184
|
+
extract_key (Callable, optional): Function to extract a key from args for logging.
|
185
|
+
force (bool, optional): Whether to force logging even if env var is off.
|
186
|
+
|
187
|
+
Returns:
|
188
|
+
Callable: The decorated function.
|
189
|
+
"""
|
190
|
+
|
191
|
+
def decorator(func: F) -> F:
|
192
|
+
# check if log level is set to debug
|
193
|
+
if _get_log_level() != logging.DEBUG and not force:
|
194
|
+
return func
|
195
|
+
|
196
|
+
nonlocal logger
|
197
|
+
if logger is None:
|
198
|
+
logger = get_logger()
|
199
|
+
|
200
|
+
nonlocal operation_name
|
201
|
+
if operation_name is None:
|
202
|
+
operation_name = func.__name__
|
203
|
+
|
204
|
+
async def wrapped(*args: Any, **kwargs: Any) -> Any:
|
205
|
+
# Try to extract a meaningful identifier from the arguments
|
206
|
+
identifier = ""
|
207
|
+
if extract_key and args:
|
208
|
+
try:
|
209
|
+
identifier = f": {extract_key(args[0])}"
|
210
|
+
except (AttributeError, IndexError):
|
211
|
+
pass
|
212
|
+
|
213
|
+
logger.debug(f"{operation_name}{identifier}")
|
214
|
+
return await func(*args, **kwargs)
|
215
|
+
|
216
|
+
return wrapped
|
217
|
+
|
218
|
+
return decorator
|
219
|
+
|
220
|
+
|
173
221
|
def send_single_error(error: Exception) -> None:
|
174
222
|
"""Send a single error to Sentry.
|
175
223
|
|
validmind/tests/__init__.py
CHANGED
validmind/tests/__types__.py
CHANGED
@@ -187,6 +187,10 @@ TestID = Union[
|
|
187
187
|
"validmind.ongoing_monitoring.ScoreBandsDrift",
|
188
188
|
"validmind.ongoing_monitoring.ScorecardHistogramDrift",
|
189
189
|
"validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
|
190
|
+
"validmind.plots.BoxPlot",
|
191
|
+
"validmind.plots.CorrelationHeatmap",
|
192
|
+
"validmind.plots.HistogramPlot",
|
193
|
+
"validmind.plots.ViolinPlot",
|
190
194
|
"validmind.prompt_validation.Bias",
|
191
195
|
"validmind.prompt_validation.Clarity",
|
192
196
|
"validmind.prompt_validation.Conciseness",
|
@@ -194,11 +198,25 @@ TestID = Union[
|
|
194
198
|
"validmind.prompt_validation.NegativeInstruction",
|
195
199
|
"validmind.prompt_validation.Robustness",
|
196
200
|
"validmind.prompt_validation.Specificity",
|
201
|
+
"validmind.stats.CorrelationAnalysis",
|
202
|
+
"validmind.stats.DescriptiveStats",
|
203
|
+
"validmind.stats.NormalityTests",
|
204
|
+
"validmind.stats.OutlierDetection",
|
197
205
|
"validmind.unit_metrics.classification.Accuracy",
|
198
206
|
"validmind.unit_metrics.classification.F1",
|
199
207
|
"validmind.unit_metrics.classification.Precision",
|
200
208
|
"validmind.unit_metrics.classification.ROC_AUC",
|
201
209
|
"validmind.unit_metrics.classification.Recall",
|
210
|
+
"validmind.unit_metrics.classification.individual.AbsoluteError",
|
211
|
+
"validmind.unit_metrics.classification.individual.BrierScore",
|
212
|
+
"validmind.unit_metrics.classification.individual.CalibrationError",
|
213
|
+
"validmind.unit_metrics.classification.individual.ClassBalance",
|
214
|
+
"validmind.unit_metrics.classification.individual.Confidence",
|
215
|
+
"validmind.unit_metrics.classification.individual.Correctness",
|
216
|
+
"validmind.unit_metrics.classification.individual.LogLoss",
|
217
|
+
"validmind.unit_metrics.classification.individual.OutlierScore",
|
218
|
+
"validmind.unit_metrics.classification.individual.ProbabilityError",
|
219
|
+
"validmind.unit_metrics.classification.individual.Uncertainty",
|
202
220
|
"validmind.unit_metrics.regression.AdjustedRSquaredScore",
|
203
221
|
"validmind.unit_metrics.regression.GiniCoefficient",
|
204
222
|
"validmind.unit_metrics.regression.HuberLoss",
|
@@ -4,10 +4,22 @@
|
|
4
4
|
|
5
5
|
|
6
6
|
import pandas as pd
|
7
|
-
from scipy.stats import chi2_contingency
|
8
7
|
|
9
8
|
from validmind import tags, tasks
|
10
|
-
from validmind.errors import SkipTestError
|
9
|
+
from validmind.errors import MissingDependencyError, SkipTestError
|
10
|
+
|
11
|
+
try:
|
12
|
+
from scipy.stats import chi2_contingency
|
13
|
+
except ImportError as e:
|
14
|
+
if "scipy" in str(e):
|
15
|
+
raise MissingDependencyError(
|
16
|
+
"Missing required package `scipy` for ChiSquaredFeaturesTable. "
|
17
|
+
"Please run `pip install validmind[stats]` to use statistical tests",
|
18
|
+
required_dependencies=["scipy"],
|
19
|
+
extra="stats",
|
20
|
+
) from e
|
21
|
+
|
22
|
+
raise e
|
11
23
|
|
12
24
|
|
13
25
|
@tags("tabular_data", "categorical_data", "statistical_test")
|
@@ -5,14 +5,25 @@
|
|
5
5
|
from typing import Any, Dict, Tuple
|
6
6
|
|
7
7
|
import pandas as pd
|
8
|
-
from arch.unitroot import DFGLS
|
9
8
|
from numpy.linalg import LinAlgError
|
10
9
|
|
11
10
|
from validmind import RawData, tags, tasks
|
12
|
-
from validmind.errors import SkipTestError
|
11
|
+
from validmind.errors import MissingDependencyError, SkipTestError
|
13
12
|
from validmind.logging import get_logger
|
14
13
|
from validmind.vm_models import VMDataset
|
15
14
|
|
15
|
+
try:
|
16
|
+
from arch.unitroot import DFGLS
|
17
|
+
except ImportError as e:
|
18
|
+
if "arch" in str(e):
|
19
|
+
raise MissingDependencyError(
|
20
|
+
"Missing required package `arch` for DickeyFullerGLS. "
|
21
|
+
"Please run `pip install validmind[stats]` to use statistical tests",
|
22
|
+
required_dependencies=["arch"],
|
23
|
+
extra="stats",
|
24
|
+
) from e
|
25
|
+
raise e
|
26
|
+
|
16
27
|
logger = get_logger(__name__)
|
17
28
|
|
18
29
|
|
@@ -6,14 +6,25 @@ from typing import Any, Dict
|
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
|
-
from arch.unitroot import PhillipsPerron
|
10
9
|
from numpy.linalg import LinAlgError
|
11
10
|
|
12
11
|
from validmind import tags, tasks
|
13
|
-
from validmind.errors import SkipTestError
|
12
|
+
from validmind.errors import MissingDependencyError, SkipTestError
|
14
13
|
from validmind.logging import get_logger
|
15
14
|
from validmind.vm_models import VMDataset
|
16
15
|
|
16
|
+
try:
|
17
|
+
from arch.unitroot import PhillipsPerron
|
18
|
+
except ImportError as e:
|
19
|
+
if "arch" in str(e):
|
20
|
+
raise MissingDependencyError(
|
21
|
+
"Missing required package `arch` for PhillipsPerronArch. "
|
22
|
+
"Please run `pip install validmind[stats]` to use statistical tests",
|
23
|
+
required_dependencies=["arch"],
|
24
|
+
extra="stats",
|
25
|
+
) from e
|
26
|
+
raise e
|
27
|
+
|
17
28
|
logger = get_logger(__name__)
|
18
29
|
|
19
30
|
|
@@ -9,11 +9,23 @@ import numpy as np
|
|
9
9
|
import pandas as pd
|
10
10
|
import plotly.graph_objects as go
|
11
11
|
from plotly.subplots import make_subplots
|
12
|
-
from scipy import stats
|
13
12
|
from statsmodels.tsa.seasonal import seasonal_decompose
|
14
13
|
|
15
14
|
from validmind import RawData, tags, tasks
|
16
|
-
from validmind.errors import SkipTestError
|
15
|
+
from validmind.errors import MissingDependencyError, SkipTestError
|
16
|
+
|
17
|
+
try:
|
18
|
+
from scipy import stats
|
19
|
+
except ImportError as e:
|
20
|
+
if "scipy" in str(e):
|
21
|
+
raise MissingDependencyError(
|
22
|
+
"Missing required package `scipy` for SeasonalDecompose. "
|
23
|
+
"Please run `pip install validmind[stats]` to use statistical tests",
|
24
|
+
required_dependencies=["scipy"],
|
25
|
+
extra="stats",
|
26
|
+
) from e
|
27
|
+
|
28
|
+
raise e
|
17
29
|
from validmind.logging import get_logger
|
18
30
|
from validmind.vm_models import VMDataset
|
19
31
|
|
@@ -6,9 +6,22 @@
|
|
6
6
|
from typing import Tuple
|
7
7
|
|
8
8
|
import pandas as pd
|
9
|
-
from scipy import stats
|
10
9
|
|
11
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.errors import MissingDependencyError
|
12
|
+
|
13
|
+
try:
|
14
|
+
from scipy import stats
|
15
|
+
except ImportError as e:
|
16
|
+
if "scipy" in str(e):
|
17
|
+
raise MissingDependencyError(
|
18
|
+
"Missing required package `scipy` for ShapiroWilk. "
|
19
|
+
"Please run `pip install validmind[stats]` to use statistical tests",
|
20
|
+
required_dependencies=["scipy"],
|
21
|
+
extra="stats",
|
22
|
+
) from e
|
23
|
+
|
24
|
+
raise e
|
12
25
|
|
13
26
|
|
14
27
|
@tasks("classification", "regression")
|
@@ -3,9 +3,22 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import pandas as pd
|
6
|
-
from scipy.stats import kurtosis, skew
|
7
6
|
|
8
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.errors import MissingDependencyError
|
9
|
+
|
10
|
+
try:
|
11
|
+
from scipy.stats import kurtosis, skew
|
12
|
+
except ImportError as e:
|
13
|
+
if "scipy" in str(e):
|
14
|
+
raise MissingDependencyError(
|
15
|
+
"Missing required package `scipy` for TimeSeriesDescriptiveStatistics. "
|
16
|
+
"Please run `pip install validmind[stats]` to use statistical tests",
|
17
|
+
required_dependencies=["scipy"],
|
18
|
+
extra="stats",
|
19
|
+
) from e
|
20
|
+
|
21
|
+
raise e
|
9
22
|
|
10
23
|
|
11
24
|
@tags("time_series_data", "analysis")
|
@@ -9,7 +9,20 @@ import numpy as np
|
|
9
9
|
import pandas as pd
|
10
10
|
import plotly.express as px
|
11
11
|
import plotly.graph_objects as go
|
12
|
-
|
12
|
+
|
13
|
+
from validmind.errors import MissingDependencyError
|
14
|
+
|
15
|
+
try:
|
16
|
+
import scorecardpy as sc
|
17
|
+
except ImportError as e:
|
18
|
+
if "scorecardpy" in str(e):
|
19
|
+
raise MissingDependencyError(
|
20
|
+
"Missing required package `scorecardpy` for WOEBinPlots. "
|
21
|
+
"Please run `pip install validmind[credit_risk]` to use these tests",
|
22
|
+
required_dependencies=["scorecardpy"],
|
23
|
+
extra="credit_risk",
|
24
|
+
) from e
|
25
|
+
raise e
|
13
26
|
from plotly.subplots import make_subplots
|
14
27
|
|
15
28
|
from validmind import RawData, tags, tasks
|
@@ -5,12 +5,23 @@
|
|
5
5
|
from typing import Dict, Tuple
|
6
6
|
|
7
7
|
import pandas as pd
|
8
|
-
import scorecardpy as sc
|
9
8
|
|
10
9
|
from validmind import RawData, tags, tasks
|
11
|
-
from validmind.errors import SkipTestError
|
10
|
+
from validmind.errors import MissingDependencyError, SkipTestError
|
12
11
|
from validmind.vm_models import VMDataset
|
13
12
|
|
13
|
+
try:
|
14
|
+
import scorecardpy as sc
|
15
|
+
except ImportError as e:
|
16
|
+
if "scorecardpy" in str(e):
|
17
|
+
raise MissingDependencyError(
|
18
|
+
"Missing required package `scorecardpy` for WOEBinTable. "
|
19
|
+
"Please run `pip install validmind[credit_risk]` to use these tests",
|
20
|
+
required_dependencies=["scorecardpy"],
|
21
|
+
extra="credit_risk",
|
22
|
+
) from e
|
23
|
+
raise e
|
24
|
+
|
14
25
|
|
15
26
|
@tags("tabular_data", "categorical_data")
|
16
27
|
@tasks("classification")
|
@@ -5,14 +5,25 @@
|
|
5
5
|
from typing import Any, Dict, Tuple
|
6
6
|
|
7
7
|
import pandas as pd
|
8
|
-
from arch.unitroot import ZivotAndrews
|
9
8
|
from numpy.linalg import LinAlgError
|
10
9
|
|
11
10
|
from validmind import RawData, tags, tasks
|
12
|
-
from validmind.errors import SkipTestError
|
11
|
+
from validmind.errors import MissingDependencyError, SkipTestError
|
13
12
|
from validmind.logging import get_logger
|
14
13
|
from validmind.vm_models import VMDataset
|
15
14
|
|
15
|
+
try:
|
16
|
+
from arch.unitroot import ZivotAndrews
|
17
|
+
except ImportError as e:
|
18
|
+
if "arch" in str(e):
|
19
|
+
raise MissingDependencyError(
|
20
|
+
"Missing required package `arch` for ZivotAndrewsArch. "
|
21
|
+
"Please run `pip install validmind[stats]` to use statistical tests",
|
22
|
+
required_dependencies=["arch"],
|
23
|
+
extra="stats",
|
24
|
+
) from e
|
25
|
+
raise e
|
26
|
+
|
16
27
|
logger = get_logger(__name__)
|
17
28
|
|
18
29
|
|
@@ -5,13 +5,25 @@
|
|
5
5
|
from collections import Counter
|
6
6
|
from typing import Tuple
|
7
7
|
|
8
|
-
import nltk
|
9
8
|
import plotly.graph_objects as go
|
10
|
-
from nltk.corpus import stopwords
|
11
9
|
|
12
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.errors import MissingDependencyError
|
13
12
|
from validmind.vm_models import VMDataset
|
14
13
|
|
14
|
+
try:
|
15
|
+
import nltk
|
16
|
+
from nltk.corpus import stopwords
|
17
|
+
except ImportError as e:
|
18
|
+
if "nltk" in str(e).lower():
|
19
|
+
raise MissingDependencyError(
|
20
|
+
"Missing required package `nltk` for CommonWords. "
|
21
|
+
"Please run `pip install validmind[nlp]` to use NLP tests",
|
22
|
+
required_dependencies=["nltk"],
|
23
|
+
extra="nlp",
|
24
|
+
) from e
|
25
|
+
raise e
|
26
|
+
|
15
27
|
|
16
28
|
@tags("nlp", "text_data", "visualization", "frequency_analysis")
|
17
29
|
@tasks("text_classification", "text_summarization")
|
@@ -6,9 +6,22 @@ from typing import Tuple
|
|
6
6
|
|
7
7
|
import plotly.express as px
|
8
8
|
import plotly.graph_objects as go
|
9
|
-
from langdetect import LangDetectException, detect
|
10
9
|
|
11
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.errors import MissingDependencyError
|
12
|
+
|
13
|
+
try:
|
14
|
+
from langdetect import LangDetectException, detect
|
15
|
+
except ImportError as e:
|
16
|
+
if "langdetect" in str(e):
|
17
|
+
raise MissingDependencyError(
|
18
|
+
"Missing required package `langdetect` for LanguageDetection. "
|
19
|
+
"Please run `pip install validmind[nlp]` to use NLP tests",
|
20
|
+
required_dependencies=["langdetect"],
|
21
|
+
extra="nlp",
|
22
|
+
) from e
|
23
|
+
|
24
|
+
raise e
|
12
25
|
|
13
26
|
|
14
27
|
@tags("nlp", "text_data", "visualization")
|
@@ -8,9 +8,21 @@ from typing import Dict, Tuple
|
|
8
8
|
import pandas as pd
|
9
9
|
import plotly.express as px
|
10
10
|
import plotly.graph_objects as go
|
11
|
-
from textblob import TextBlob
|
12
11
|
|
13
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.errors import MissingDependencyError
|
14
|
+
|
15
|
+
try:
|
16
|
+
from textblob import TextBlob
|
17
|
+
except ImportError as e:
|
18
|
+
if "textblob" in str(e).lower():
|
19
|
+
raise MissingDependencyError(
|
20
|
+
"Missing required package `textblob` for PolarityAndSubjectivity. "
|
21
|
+
"Please run `pip install validmind[nlp]` to use NLP tests",
|
22
|
+
required_dependencies=["textblob"],
|
23
|
+
extra="nlp",
|
24
|
+
) from e
|
25
|
+
raise e
|
14
26
|
|
15
27
|
|
16
28
|
@tags("nlp", "text_data", "data_validation")
|
@@ -8,9 +8,21 @@ from typing import Tuple
|
|
8
8
|
import matplotlib.pyplot as plt
|
9
9
|
import nltk
|
10
10
|
import seaborn as sns
|
11
|
-
from nltk.sentiment import SentimentIntensityAnalyzer
|
12
11
|
|
13
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.errors import MissingDependencyError
|
14
|
+
|
15
|
+
try:
|
16
|
+
from nltk.sentiment import SentimentIntensityAnalyzer
|
17
|
+
except ImportError as e:
|
18
|
+
if "nltk" in str(e).lower():
|
19
|
+
raise MissingDependencyError(
|
20
|
+
"Missing required package `nltk` for Sentiment. "
|
21
|
+
"Please run `pip install validmind[nlp]` to use NLP tests",
|
22
|
+
required_dependencies=["nltk"],
|
23
|
+
extra="nlp",
|
24
|
+
) from e
|
25
|
+
raise e
|
14
26
|
|
15
27
|
|
16
28
|
@tags("nlp", "text_data", "data_validation")
|
@@ -9,14 +9,26 @@ Threshold based tests
|
|
9
9
|
from collections import defaultdict
|
10
10
|
from typing import Dict, Tuple
|
11
11
|
|
12
|
-
import nltk
|
13
12
|
import pandas as pd
|
14
13
|
import plotly.graph_objects as go
|
15
|
-
from nltk.corpus import stopwords
|
16
14
|
|
17
15
|
from validmind import RawData, tags, tasks
|
16
|
+
from validmind.errors import MissingDependencyError
|
18
17
|
from validmind.vm_models import VMDataset
|
19
18
|
|
19
|
+
try:
|
20
|
+
import nltk
|
21
|
+
from nltk.corpus import stopwords
|
22
|
+
except ImportError as e:
|
23
|
+
if "nltk" in str(e).lower():
|
24
|
+
raise MissingDependencyError(
|
25
|
+
"Missing required package `nltk` for StopWords. "
|
26
|
+
"Please run `pip install validmind[nlp]` to use NLP tests",
|
27
|
+
required_dependencies=["nltk"],
|
28
|
+
extra="nlp",
|
29
|
+
) from e
|
30
|
+
raise e
|
31
|
+
|
20
32
|
|
21
33
|
@tags("nlp", "text_data", "frequency_analysis", "visualization")
|
22
34
|
@tasks("text_classification", "text_summarization")
|