validmind 1.7.0__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -1
- validmind/{client.pyx → client.py} +48 -41
- validmind/data_validation/{threshold_tests.pyx → threshold_tests.py} +1 -2
- validmind/datasets/__init__.py +0 -0
- validmind/datasets/classification/{customer_churn.pyx → customer_churn.py} +1 -1
- validmind/datasets/classification/datasets/bank_customer_churn.csv +8001 -0
- validmind/datasets/classification/datasets/taiwan_credit.csv +30001 -0
- validmind/datasets/classification/{taiwan_credit.pyx → taiwan_credit.py} +1 -1
- validmind/datasets/regression/__init__.py +55 -1
- validmind/datasets/regression/datasets/fred_loan_rates.csv +3552 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +126 -0
- validmind/datasets/regression/datasets/lending_club_loan_rates.csv +138 -0
- validmind/datasets/regression/fred.py +132 -0
- validmind/datasets/regression/lending_club.py +70 -0
- validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- validmind/model_validation/sklearn/{threshold_tests.pyx → threshold_tests.py} +9 -9
- validmind/model_validation/statsmodels/{metrics.pyx → metrics.py} +123 -138
- validmind/test_plans/__init__.py +0 -4
- validmind/test_plans/{binary_classifier.pyx → binary_classifier.py} +0 -15
- validmind/test_plans/{statsmodels_timeseries.pyx → statsmodels_timeseries.py} +2 -2
- validmind/test_plans/{tabular_datasets.pyx → tabular_datasets.py} +0 -13
- validmind/test_plans/{time_series.pyx → time_series.py} +3 -3
- validmind/test_suites/__init__.py +73 -0
- validmind/test_suites/test_suites.py +48 -0
- validmind/vm_models/__init__.py +2 -0
- validmind/vm_models/{dataset.pyx → dataset.py} +17 -8
- validmind/vm_models/test_suite.py +57 -0
- {validmind-1.7.0.dist-info → validmind-1.8.1.dist-info}/METADATA +1 -3
- validmind-1.8.1.dist-info/RECORD +63 -0
- validmind/api_client.c +0 -9481
- validmind/api_client.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/client.c +0 -7198
- validmind/client.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/datasets/regression/fred.pyx +0 -7
- validmind/datasets/regression/lending_club.pyx +0 -7
- validmind/model_utils.c +0 -9281
- validmind/model_utils.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/utils.c +0 -10284
- validmind/utils.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind-1.7.0.dist-info/RECORD +0 -53
- /validmind/{api_client.pyx → api_client.py} +0 -0
- /validmind/data_validation/{metrics.pyx → metrics.py} +0 -0
- /validmind/{model_utils.pyx → model_utils.py} +0 -0
- /validmind/model_validation/{model_metadata.pyx → model_metadata.py} +0 -0
- /validmind/model_validation/sklearn/{metrics.pyx → metrics.py} +0 -0
- /validmind/model_validation/statsmodels/{threshold_tests.pyx → threshold_tests.py} +0 -0
- /validmind/model_validation/{utils.pyx → utils.py} +0 -0
- /validmind/{utils.pyx → utils.py} +0 -0
- /validmind/vm_models/{dataset_utils.pyx → dataset_utils.py} +0 -0
- /validmind/vm_models/{figure.pyx → figure.py} +0 -0
- /validmind/vm_models/{metric.pyx → metric.py} +0 -0
- /validmind/vm_models/{metric_result.pyx → metric_result.py} +0 -0
- /validmind/vm_models/{model.pyx → model.py} +0 -0
- /validmind/vm_models/{plot_utils.pyx → plot_utils.py} +0 -0
- /validmind/vm_models/{result_summary.pyx → result_summary.py} +0 -0
- /validmind/vm_models/{test_context.pyx → test_context.py} +0 -0
- /validmind/vm_models/{test_plan.pyx → test_plan.py} +0 -0
- /validmind/vm_models/{test_plan_result.pyx → test_plan_result.py} +0 -0
- /validmind/vm_models/{test_result.pyx → test_result.py} +0 -0
- /validmind/vm_models/{threshold_test.pyx → threshold_test.py} +0 -0
- {validmind-1.7.0.dist-info → validmind-1.8.1.dist-info}/WHEEL +0 -0
@@ -11,7 +11,7 @@ from . import (
|
|
11
11
|
)
|
12
12
|
|
13
13
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
14
|
-
dataset_path = os.path.join(current_path, "
|
14
|
+
dataset_path = os.path.join(current_path, "datasets")
|
15
15
|
|
16
16
|
drop_columns = ["ID"]
|
17
17
|
boolean_columns = ["SEX"]
|
@@ -1,3 +1,57 @@
|
|
1
1
|
"""
|
2
|
-
Entrypoint
|
2
|
+
Entrypoint for regression datasets
|
3
3
|
"""
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
|
7
|
+
def identify_frequencies(df):
|
8
|
+
"""
|
9
|
+
Identify the frequency of each series in the DataFrame.
|
10
|
+
|
11
|
+
:param df: Time-series DataFrame
|
12
|
+
:return: DataFrame with two columns: 'Variable' and 'Frequency'
|
13
|
+
"""
|
14
|
+
frequencies = []
|
15
|
+
for column in df.columns:
|
16
|
+
series = df[column].dropna()
|
17
|
+
if not series.empty:
|
18
|
+
freq = pd.infer_freq(series.index)
|
19
|
+
if freq == "MS" or freq == "M":
|
20
|
+
label = "Monthly"
|
21
|
+
elif freq == "Q":
|
22
|
+
label = "Quarterly"
|
23
|
+
elif freq == "A":
|
24
|
+
label = "Yearly"
|
25
|
+
else:
|
26
|
+
label = freq
|
27
|
+
else:
|
28
|
+
label = None
|
29
|
+
|
30
|
+
frequencies.append({"Variable": column, "Frequency": label})
|
31
|
+
|
32
|
+
freq_df = pd.DataFrame(frequencies)
|
33
|
+
|
34
|
+
return freq_df
|
35
|
+
|
36
|
+
|
37
|
+
def resample_to_common_frequency(df, common_frequency="MS"):
|
38
|
+
# Make sure the index is a datetime index
|
39
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
40
|
+
df.index = pd.to_datetime(df.index)
|
41
|
+
|
42
|
+
# Create an empty DataFrame to store the resampled data
|
43
|
+
resampled_df = pd.DataFrame()
|
44
|
+
|
45
|
+
# Iterate through each variable and resample it to the common frequency
|
46
|
+
for column in df.columns:
|
47
|
+
series = df[column].dropna()
|
48
|
+
inferred_freq = pd.infer_freq(series.index)
|
49
|
+
|
50
|
+
if inferred_freq is None or inferred_freq != common_frequency:
|
51
|
+
resampled_series = df[column].resample(common_frequency).interpolate()
|
52
|
+
else:
|
53
|
+
resampled_series = df[column]
|
54
|
+
|
55
|
+
resampled_df[column] = resampled_series
|
56
|
+
|
57
|
+
return resampled_df
|