upgini 1.1.274a2__tar.gz → 1.1.274a4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.274a2/src/upgini.egg-info → upgini-1.1.274a4}/PKG-INFO +1 -1
- {upgini-1.1.274a2 → upgini-1.1.274a4}/setup.py +1 -1
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/features_enricher.py +20 -9
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/resource_bundle/strings.properties +1 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_features_enricher.py +13 -7
- {upgini-1.1.274a2 → upgini-1.1.274a4}/LICENSE +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/README.md +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/pyproject.toml +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/setup.cfg +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/ads.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/date.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/dataset.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/errors.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/http.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/metadata.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/metrics.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/search_task.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/spinner.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini.egg-info/SOURCES.txt +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_autofe_operands.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_country_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_email_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_metrics.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_target_utils.py +0 -0
- {upgini-1.1.274a2 → upgini-1.1.274a4}/tests/test_widget.py +0 -0
|
@@ -1809,12 +1809,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1809
1809
|
features_section = ""
|
|
1810
1810
|
|
|
1811
1811
|
search_id = self._search_task.search_task_id
|
|
1812
|
-
api_example =
|
|
1813
|
-
f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
|
|
1812
|
+
api_example = f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
|
|
1814
1813
|
-H 'Authorization: {self.api_key}' \\
|
|
1815
1814
|
-H 'Content-Type: application/json' \\
|
|
1816
1815
|
-d '{{"search_keys": {keys}{features_section}}}'"""
|
|
1817
|
-
)
|
|
1818
1816
|
return api_example
|
|
1819
1817
|
|
|
1820
1818
|
def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
|
|
@@ -1910,8 +1908,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1910
1908
|
else:
|
|
1911
1909
|
self.logger.info("Input dataset hasn't date column")
|
|
1912
1910
|
if self.add_date_if_missing:
|
|
1913
|
-
df = self._add_current_date_as_key(df)
|
|
1914
|
-
search_keys[self.CURRENT_DATE] = SearchKey.DATE
|
|
1911
|
+
df = self._add_current_date_as_key(df, search_keys, self.logger, self.bundle)
|
|
1915
1912
|
email_column = self._get_email_column(search_keys)
|
|
1916
1913
|
hem_column = self._get_hem_column(search_keys)
|
|
1917
1914
|
email_converted_to_hem = False
|
|
@@ -2282,8 +2279,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2282
2279
|
else:
|
|
2283
2280
|
self.logger.info("Input dataset hasn't date column")
|
|
2284
2281
|
if self.add_date_if_missing:
|
|
2285
|
-
df = self._add_current_date_as_key(df)
|
|
2286
|
-
self.fit_search_keys[self.CURRENT_DATE] = SearchKey.DATE
|
|
2282
|
+
df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
|
|
2287
2283
|
email_column = self._get_email_column(self.fit_search_keys)
|
|
2288
2284
|
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2289
2285
|
email_converted_to_hem = False
|
|
@@ -2865,8 +2861,23 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2865
2861
|
return col
|
|
2866
2862
|
|
|
2867
2863
|
@staticmethod
|
|
2868
|
-
def _add_current_date_as_key(
|
|
2869
|
-
df[
|
|
2864
|
+
def _add_current_date_as_key(
|
|
2865
|
+
df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
|
|
2866
|
+
) -> pd.DataFrame:
|
|
2867
|
+
if (
|
|
2868
|
+
set(search_keys.values()) == {SearchKey.PHONE}
|
|
2869
|
+
or set(search_keys.values()) == {SearchKey.EMAIL}
|
|
2870
|
+
or set(search_keys.values()) == {SearchKey.HEM}
|
|
2871
|
+
or set(search_keys.values()) == {SearchKey.COUNTRY, SearchKey.POSTAL_CODE}
|
|
2872
|
+
):
|
|
2873
|
+
msg = bundle.get("current_date_added")
|
|
2874
|
+
print(msg)
|
|
2875
|
+
logger.warning(msg)
|
|
2876
|
+
df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
|
|
2877
|
+
search_keys[FeaturesEnricher.CURRENT_DATE] = SearchKey.DATE
|
|
2878
|
+
converter = DateTimeSearchKeyConverter(FeaturesEnricher.CURRENT_DATE, None, logger, bundle)
|
|
2879
|
+
df = converter.convert(df)
|
|
2880
|
+
return df
|
|
2870
2881
|
|
|
2871
2882
|
@staticmethod
|
|
2872
2883
|
def _get_group_columns(df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> List[str]:
|
|
@@ -38,6 +38,7 @@ loss_selection_warn=\nWARNING: Loss `{0}` is not supported for feature selection
|
|
|
38
38
|
loss_calc_metrics_warn=\nWARNING: Loss `{0}` is not supported for metrics calculation with {1}
|
|
39
39
|
multivariate_timeseries_detected=\nWARNING: Multivariate TimeSeries detected. Blocked time series cross-validation split selected.\nMore details: https://github.com/upgini/upgini#-time-series-prediction-support
|
|
40
40
|
group_k_fold_in_classification=\nWARNING: Using group K-fold cross-validation split for classification task.
|
|
41
|
+
current_date_added=\nWARNING: No date/datetime column was detected in X to be used as a search key. The current date will be used to match the latest version of data sources
|
|
41
42
|
|
|
42
43
|
# Errors
|
|
43
44
|
failed_search_by_task_id=Failed to retrieve the specified search results
|
|
@@ -487,10 +487,7 @@ def test_saved_features_enricher(requests_mock: Mocker):
|
|
|
487
487
|
train_random_indices = random.choice(train_target.index, size=9000, replace=False)
|
|
488
488
|
train_target.loc[train_random_indices] = 0
|
|
489
489
|
|
|
490
|
-
metrics = enricher.calculate_metrics(
|
|
491
|
-
train_features,
|
|
492
|
-
train_target
|
|
493
|
-
)
|
|
490
|
+
metrics = enricher.calculate_metrics(train_features, train_target)
|
|
494
491
|
expected_metrics = pd.DataFrame(
|
|
495
492
|
{
|
|
496
493
|
segment_header: [train_segment],
|
|
@@ -2230,8 +2227,9 @@ def test_email_search_key(requests_mock: Mocker):
|
|
|
2230
2227
|
"hashed_email_64ff8c",
|
|
2231
2228
|
"email_one_domain_3b0a68",
|
|
2232
2229
|
"email_domain_10c73f",
|
|
2230
|
+
"current_date_b993c4",
|
|
2233
2231
|
}
|
|
2234
|
-
assert {"hashed_email_64ff8c", "email_one_domain_3b0a68"} == {
|
|
2232
|
+
assert {"hashed_email_64ff8c", "email_one_domain_3b0a68", "current_date_b993c4"} == {
|
|
2235
2233
|
sk for sublist in self.search_keys for sk in sublist
|
|
2236
2234
|
}
|
|
2237
2235
|
raise TestException()
|
|
@@ -2276,10 +2274,18 @@ def test_composit_index_search_key(requests_mock: Mocker):
|
|
|
2276
2274
|
**kwargs,
|
|
2277
2275
|
):
|
|
2278
2276
|
self.validate()
|
|
2279
|
-
assert set(self.columns.to_list()) == {
|
|
2277
|
+
assert set(self.columns.to_list()) == {
|
|
2278
|
+
"system_record_id",
|
|
2279
|
+
"country_aff64e",
|
|
2280
|
+
"postal_code_13534a",
|
|
2281
|
+
"current_date_b993c4",
|
|
2282
|
+
"target",
|
|
2283
|
+
}
|
|
2280
2284
|
assert "country_aff64e" in self.columns
|
|
2281
2285
|
assert "postal_code_13534a"
|
|
2282
|
-
assert {"country_aff64e", "postal_code_13534a"} == {
|
|
2286
|
+
assert {"country_aff64e", "postal_code_13534a", "current_date_b993c4"} == {
|
|
2287
|
+
sk for sublist in self.search_keys for sk in sublist
|
|
2288
|
+
}
|
|
2283
2289
|
# assert "country_fake_a" in self.columns
|
|
2284
2290
|
# assert "postal_code_fake_a" in self.columns
|
|
2285
2291
|
# assert {"country_fake_a", "postal_code_fake_a"} == {sk for sublist in self.search_keys for sk in sublist}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|