upgini 1.1.277__tar.gz → 1.1.278a2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.277/src/upgini.egg-info → upgini-1.1.278a2}/PKG-INFO +1 -1
- {upgini-1.1.277 → upgini-1.1.278a2}/setup.py +1 -1
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/dataset.py +11 -2
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/features_enricher.py +213 -100
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/metadata.py +10 -2
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/metrics.py +1 -1
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/strings.properties +1 -0
- upgini-1.1.278a2/src/upgini/utils/base_search_key_detector.py +27 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/datetime_utils.py +2 -2
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/deduplicate_utils.py +11 -1
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/email_utils.py +5 -0
- {upgini-1.1.277 → upgini-1.1.278a2/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/SOURCES.txt +0 -1
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_country_utils.py +4 -4
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_email_utils.py +8 -10
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_etalon_validation.py +21 -2
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_features_enricher.py +11 -2
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_phone_utils.py +6 -6
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_postal_code_utils.py +6 -6
- upgini-1.1.277/src/upgini/fingerprint.js +0 -8
- upgini-1.1.277/src/upgini/utils/base_search_key_detector.py +0 -25
- {upgini-1.1.277 → upgini-1.1.278a2}/LICENSE +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/README.md +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/pyproject.toml +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/setup.cfg +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/ads.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/date.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/errors.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/http.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/search_task.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/spinner.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_autofe_operands.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_metrics.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_target_utils.py +0 -0
- {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_widget.py +0 -0
|
@@ -23,7 +23,9 @@ from pandas.api.types import (
|
|
|
23
23
|
from upgini.errors import ValidationError
|
|
24
24
|
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
25
25
|
from upgini.metadata import (
|
|
26
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
26
27
|
EVAL_SET_INDEX,
|
|
28
|
+
SEARCH_KEY_UNNEST,
|
|
27
29
|
SYSTEM_COLUMNS,
|
|
28
30
|
SYSTEM_RECORD_ID,
|
|
29
31
|
TARGET,
|
|
@@ -79,6 +81,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
79
81
|
path: Optional[str] = None,
|
|
80
82
|
meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
|
|
81
83
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
84
|
+
unnest_search_keys: Optional[Dict[str, str]] = None,
|
|
82
85
|
model_task_type: Optional[ModelTaskType] = None,
|
|
83
86
|
random_state: Optional[int] = None,
|
|
84
87
|
rest_client: Optional[_RestClient] = None,
|
|
@@ -113,6 +116,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
113
116
|
self.description = description
|
|
114
117
|
self.meaning_types = meaning_types
|
|
115
118
|
self.search_keys = search_keys
|
|
119
|
+
self.unnest_search_keys = unnest_search_keys
|
|
116
120
|
self.ignore_columns = []
|
|
117
121
|
self.hierarchical_group_keys = []
|
|
118
122
|
self.hierarchical_subgroup_keys = []
|
|
@@ -172,7 +176,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
172
176
|
new_columns = []
|
|
173
177
|
dup_counter = 0
|
|
174
178
|
for column in self.data.columns:
|
|
175
|
-
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
|
|
179
|
+
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
|
|
176
180
|
self.columns_renaming[column] = column
|
|
177
181
|
new_columns.append(column)
|
|
178
182
|
continue
|
|
@@ -353,7 +357,9 @@ class Dataset: # (pd.DataFrame):
|
|
|
353
357
|
|
|
354
358
|
if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
|
|
355
359
|
try:
|
|
356
|
-
self.data[postal_code] =
|
|
360
|
+
self.data[postal_code] = (
|
|
361
|
+
self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
362
|
+
)
|
|
357
363
|
except Exception:
|
|
358
364
|
pass
|
|
359
365
|
elif is_float_dtype(self.data[postal_code]):
|
|
@@ -803,6 +809,9 @@ class Dataset: # (pd.DataFrame):
|
|
|
803
809
|
meaningType=meaning_type,
|
|
804
810
|
minMaxValues=min_max_values,
|
|
805
811
|
)
|
|
812
|
+
if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
|
|
813
|
+
column_meta.isUnnest = True
|
|
814
|
+
column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
|
|
806
815
|
|
|
807
816
|
columns.append(column_meta)
|
|
808
817
|
|