upgini 1.2.120__tar.gz → 1.2.121__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.120 → upgini-1.2.121}/PKG-INFO +1 -1
- upgini-1.2.121/src/upgini/__about__.py +1 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/data_source/data_source_publisher.py +6 -3
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/resource_bundle/strings.properties +1 -1
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/features_validator.py +49 -17
- upgini-1.2.120/src/upgini/__about__.py +0 -1
- {upgini-1.2.120 → upgini-1.2.121}/.gitignore +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/LICENSE +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/README.md +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/pyproject.toml +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/ads.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/dataset.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/errors.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/features_enricher.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/http.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/metadata.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/metrics.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/search_task.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/spinner.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/config.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/hash_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/psi.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.120 → upgini-1.2.121}/src/upgini/version_validator.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.2.121"
|
@@ -519,21 +519,24 @@ class DataSourcePublisher:
|
|
519
519
|
description: str = "",
|
520
520
|
):
|
521
521
|
if model_type is not None and model_type not in ["ONNX", "CATBOOST"]:
|
522
|
-
raise ValueError(f"Invalid model type: {model_type}. Available values: ONNX")
|
522
|
+
raise ValueError(f"Invalid model type: {model_type}. Available values: ONNX, CATBOOST")
|
523
523
|
metadata = {
|
524
524
|
"modelName": name,
|
525
525
|
"inputNames": input_names,
|
526
526
|
"dateColumn": date_column,
|
527
527
|
"scoreName": score_name,
|
528
528
|
"searchTaskId": search_id,
|
529
|
-
"modelType": model_type or "
|
529
|
+
"modelType": model_type or "CATBOOST",
|
530
530
|
"description": description,
|
531
531
|
}
|
532
532
|
|
533
533
|
trace_id = str(uuid.uuid4())
|
534
534
|
with MDC(trace_id=trace_id):
|
535
535
|
try:
|
536
|
-
self._rest_client.upload_autofe_model(file_path, metadata, trace_id)
|
536
|
+
result = self._rest_client.upload_autofe_model(file_path, metadata, trace_id)
|
537
|
+
if "ERROR" in result:
|
538
|
+
raise Exception(result)
|
539
|
+
print(result)
|
537
540
|
except Exception:
|
538
541
|
self.logger.exception("Failed to upload autofe model")
|
539
542
|
raise
|
@@ -155,7 +155,7 @@ target_outliers_warning=We detected {} outliers in your sample.\nExamples of out
|
|
155
155
|
# features validation
|
156
156
|
empty_or_contant_features=Columns {} has value with frequency more than 99%, removed from X
|
157
157
|
high_cardinality_features=Columns {} has high cardinality (>90% unique values), removed from X
|
158
|
-
|
158
|
+
one_hot_encoded_features=One hot encoded features detected. Use int encoding for correct results of fit.\n{}
|
159
159
|
|
160
160
|
# Dataset validation
|
161
161
|
dataset_too_few_rows=X size should be at least {} rows after validation
|
@@ -24,7 +24,7 @@ class FeaturesValidator:
|
|
24
24
|
features_for_generate: Optional[List[str]] = None,
|
25
25
|
columns_renaming: Optional[Dict[str, str]] = None,
|
26
26
|
) -> Tuple[List[str], List[str]]:
|
27
|
-
|
27
|
+
one_hot_encoded_features = []
|
28
28
|
empty_or_constant_features = []
|
29
29
|
high_cardinality_features = []
|
30
30
|
warnings = []
|
@@ -36,23 +36,17 @@ class FeaturesValidator:
|
|
36
36
|
value_counts = column.value_counts(dropna=False, normalize=True)
|
37
37
|
most_frequent_percent = value_counts.iloc[0]
|
38
38
|
|
39
|
-
if
|
39
|
+
if len(value_counts) == 1:
|
40
40
|
empty_or_constant_features.append(f)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
# empty_or_constant_features.append(f)
|
51
|
-
# continue
|
52
|
-
|
53
|
-
# if one_hot_encoded_features:
|
54
|
-
# msg = bundle.get("one_hot_encoded_features").format(one_hot_encoded_features)
|
55
|
-
# warnings.append(msg)
|
41
|
+
elif most_frequent_percent >= 0.99:
|
42
|
+
if self.is_one_hot_encoded(column):
|
43
|
+
one_hot_encoded_features.append(f)
|
44
|
+
else:
|
45
|
+
empty_or_constant_features.append(f)
|
46
|
+
|
47
|
+
if one_hot_encoded_features:
|
48
|
+
msg = bundle.get("one_hot_encoded_features").format(one_hot_encoded_features)
|
49
|
+
warnings.append(msg)
|
56
50
|
|
57
51
|
columns_renaming = columns_renaming or {}
|
58
52
|
|
@@ -102,3 +96,41 @@ class FeaturesValidator:
|
|
102
96
|
@staticmethod
|
103
97
|
def find_constant_features(df: pd.DataFrame) -> List[str]:
|
104
98
|
return [i for i in df if df[i].nunique() <= 1]
|
99
|
+
|
100
|
+
@staticmethod
|
101
|
+
def is_one_hot_encoded(series: pd.Series) -> bool:
|
102
|
+
try:
|
103
|
+
# All rows should be the same type
|
104
|
+
if series.apply(lambda x: type(x)).nunique() != 1:
|
105
|
+
return False
|
106
|
+
|
107
|
+
# First, handle string representations of True/False
|
108
|
+
series_copy = series.copy()
|
109
|
+
if series_copy.dtype == "object" or series_copy.dtype == "string":
|
110
|
+
# Convert string representations of boolean values to numeric
|
111
|
+
series_copy = series_copy.astype(str).str.strip().str.lower()
|
112
|
+
series_copy = series_copy.replace({"true": "1", "false": "0"})
|
113
|
+
|
114
|
+
# Column contains only 0 and 1 (as strings or numbers or booleans)
|
115
|
+
series_copy = series_copy.astype(float)
|
116
|
+
if set(series_copy.unique()) != {0.0, 1.0}:
|
117
|
+
return False
|
118
|
+
|
119
|
+
series_copy = series_copy.astype(int)
|
120
|
+
|
121
|
+
# Column doesn't contain any NaN, np.NaN, space, null, etc.
|
122
|
+
if not (series_copy.isin([0, 1])).all():
|
123
|
+
return False
|
124
|
+
|
125
|
+
vc = series_copy.value_counts()
|
126
|
+
# Column should contain both 0 and 1
|
127
|
+
if len(vc) != 2:
|
128
|
+
return False
|
129
|
+
|
130
|
+
# Minority class is 1
|
131
|
+
if vc[1] >= vc[0]:
|
132
|
+
return False
|
133
|
+
|
134
|
+
return True
|
135
|
+
except ValueError:
|
136
|
+
return False
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "1.2.120"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|