upgini 1.1.278a2__tar.gz → 1.1.279a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.278a2/src/upgini.egg-info → upgini-1.1.279a1}/PKG-INFO +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279a1}/setup.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/dataset.py +2 -11
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/features_enricher.py +100 -213
- upgini-1.1.279a1/src/upgini/fingerprint.js +8 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/metadata.py +2 -10
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/metrics.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/strings.properties +0 -1
- upgini-1.1.279a1/src/upgini/utils/base_search_key_detector.py +25 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/datetime_utils.py +9 -10
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/deduplicate_utils.py +1 -11
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/email_utils.py +0 -5
- {upgini-1.1.278a2 → upgini-1.1.279a1/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/SOURCES.txt +1 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_country_utils.py +4 -4
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_email_utils.py +10 -8
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_etalon_validation.py +2 -21
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_features_enricher.py +2 -11
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_phone_utils.py +6 -6
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_postal_code_utils.py +6 -6
- upgini-1.1.278a2/src/upgini/utils/base_search_key_detector.py +0 -27
- {upgini-1.1.278a2 → upgini-1.1.279a1}/LICENSE +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/README.md +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/pyproject.toml +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/setup.cfg +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/ads.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/date.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/errors.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/http.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/search_task.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/spinner.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_autofe_operands.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_metrics.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_target_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_widget.py +0 -0
|
@@ -23,9 +23,7 @@ from pandas.api.types import (
|
|
|
23
23
|
from upgini.errors import ValidationError
|
|
24
24
|
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
25
25
|
from upgini.metadata import (
|
|
26
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
27
26
|
EVAL_SET_INDEX,
|
|
28
|
-
SEARCH_KEY_UNNEST,
|
|
29
27
|
SYSTEM_COLUMNS,
|
|
30
28
|
SYSTEM_RECORD_ID,
|
|
31
29
|
TARGET,
|
|
@@ -81,7 +79,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
81
79
|
path: Optional[str] = None,
|
|
82
80
|
meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
|
|
83
81
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
84
|
-
unnest_search_keys: Optional[Dict[str, str]] = None,
|
|
85
82
|
model_task_type: Optional[ModelTaskType] = None,
|
|
86
83
|
random_state: Optional[int] = None,
|
|
87
84
|
rest_client: Optional[_RestClient] = None,
|
|
@@ -116,7 +113,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
116
113
|
self.description = description
|
|
117
114
|
self.meaning_types = meaning_types
|
|
118
115
|
self.search_keys = search_keys
|
|
119
|
-
self.unnest_search_keys = unnest_search_keys
|
|
120
116
|
self.ignore_columns = []
|
|
121
117
|
self.hierarchical_group_keys = []
|
|
122
118
|
self.hierarchical_subgroup_keys = []
|
|
@@ -176,7 +172,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
176
172
|
new_columns = []
|
|
177
173
|
dup_counter = 0
|
|
178
174
|
for column in self.data.columns:
|
|
179
|
-
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID
|
|
175
|
+
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
|
|
180
176
|
self.columns_renaming[column] = column
|
|
181
177
|
new_columns.append(column)
|
|
182
178
|
continue
|
|
@@ -357,9 +353,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
357
353
|
|
|
358
354
|
if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
|
|
359
355
|
try:
|
|
360
|
-
self.data[postal_code] = (
|
|
361
|
-
self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
362
|
-
)
|
|
356
|
+
self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
|
|
363
357
|
except Exception:
|
|
364
358
|
pass
|
|
365
359
|
elif is_float_dtype(self.data[postal_code]):
|
|
@@ -809,9 +803,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
809
803
|
meaningType=meaning_type,
|
|
810
804
|
minMaxValues=min_max_values,
|
|
811
805
|
)
|
|
812
|
-
if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
|
|
813
|
-
column_meta.isUnnest = True
|
|
814
|
-
column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
|
|
815
806
|
|
|
816
807
|
columns.append(column_meta)
|
|
817
808
|
|