upgini 1.1.307__tar.gz → 1.1.309__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.307 → upgini-1.1.309}/PKG-INFO +1 -1
- upgini-1.1.309/src/upgini/__about__.py +1 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/data_source/data_source_publisher.py +21 -5
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/features_enricher.py +11 -34
- upgini-1.1.307/src/upgini/__about__.py +0 -1
- {upgini-1.1.307 → upgini-1.1.309}/.gitignore +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/LICENSE +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/README.md +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/pyproject.toml +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/ads.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/date.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/dataset.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/errors.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/http.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/lazy_import.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/metadata.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/metrics.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/search_task.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/spinner.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.307 → upgini-1.1.309}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.309"
|
|
@@ -3,7 +3,7 @@ import time
|
|
|
3
3
|
import uuid
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import Dict, List, Optional, Union
|
|
6
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
7
7
|
|
|
8
8
|
from upgini.errors import HttpError, ValidationError
|
|
9
9
|
from upgini.http import LoggerFactory, get_rest_client
|
|
@@ -47,7 +47,9 @@ class DataSourcePublisher:
|
|
|
47
47
|
self,
|
|
48
48
|
data_table_uri: str,
|
|
49
49
|
search_keys: Dict[str, SearchKey],
|
|
50
|
-
update_frequency:
|
|
50
|
+
update_frequency: (
|
|
51
|
+
Literal["Daily"] | Literal["Weekly"] | Literal["Monthly"] | Literal["Quarterly"] | Literal["Annually"]
|
|
52
|
+
),
|
|
51
53
|
exclude_from_autofe_generation: Optional[List[str]],
|
|
52
54
|
secondary_search_keys: Optional[Dict[str, SearchKey]] = None,
|
|
53
55
|
sort_column: Optional[str] = None,
|
|
@@ -233,11 +235,17 @@ class DataSourcePublisher:
|
|
|
233
235
|
self.logger.exception("Failed to register data table")
|
|
234
236
|
raise
|
|
235
237
|
|
|
236
|
-
def remove(self, data_table_ids: List[str]):
|
|
238
|
+
def remove(self, data_table_ids: List[str] | str):
|
|
237
239
|
trace_id = str(uuid.uuid4())
|
|
238
240
|
with MDC(trace_id=trace_id):
|
|
239
241
|
try:
|
|
240
|
-
if
|
|
242
|
+
if not data_table_ids:
|
|
243
|
+
raise ValidationError("Empty data table ids")
|
|
244
|
+
if isinstance(data_table_ids, str):
|
|
245
|
+
data_table_ids = [data_table_ids]
|
|
246
|
+
if not isinstance(data_table_ids, list):
|
|
247
|
+
raise ValidationError("Invalid format of data_table_ids argument")
|
|
248
|
+
if len(data_table_ids) == 0:
|
|
241
249
|
raise ValidationError("Empty data table ids")
|
|
242
250
|
|
|
243
251
|
for data_table_id in data_table_ids:
|
|
@@ -266,16 +274,20 @@ class DataSourcePublisher:
|
|
|
266
274
|
source_link: Optional[str] = None,
|
|
267
275
|
update_frequency: Optional[str] = None,
|
|
268
276
|
client_emails: Optional[List[str]] = None,
|
|
277
|
+
date_features: Optional[List[str]] = None,
|
|
278
|
+
date_vector_features: Optional[List[str]] = None,
|
|
269
279
|
):
|
|
270
280
|
trace_id = str(uuid.uuid4())
|
|
271
281
|
with MDC(trace_id=trace_id):
|
|
272
282
|
try:
|
|
273
|
-
if data_table_ids is None
|
|
283
|
+
if data_table_ids is None:
|
|
274
284
|
raise ValidationError("Empty data table ids")
|
|
275
285
|
if isinstance(data_table_ids, str):
|
|
276
286
|
data_table_ids = [data_table_ids]
|
|
277
287
|
if not isinstance(data_table_ids, list):
|
|
278
288
|
raise ValidationError("data_table_ids should be string or list of strings")
|
|
289
|
+
if len(data_table_ids) == 0:
|
|
290
|
+
raise ValidationError("Empty data table ids")
|
|
279
291
|
if update_frequency is not None and update_frequency not in self.ACCEPTABLE_UPDATE_FREQUENCIES:
|
|
280
292
|
raise ValidationError(
|
|
281
293
|
f"Invalid update frequency: {update_frequency}. "
|
|
@@ -311,6 +323,10 @@ class DataSourcePublisher:
|
|
|
311
323
|
request["updateFrequency"] = update_frequency
|
|
312
324
|
if client_emails is not None:
|
|
313
325
|
request["clientEmails"] = client_emails
|
|
326
|
+
if date_features is not None:
|
|
327
|
+
request["dateFeatures"] = date_features
|
|
328
|
+
if date_vector_features is not None:
|
|
329
|
+
request["dateVectorFeatures"] = date_vector_features
|
|
314
330
|
self.logger.info(f"Activating data tables with request {request}")
|
|
315
331
|
|
|
316
332
|
self._rest_client.activate_datatables(request, trace_id)
|
|
@@ -90,7 +90,6 @@ from upgini.utils.display_utils import (
|
|
|
90
90
|
from upgini.utils.email_utils import EmailSearchKeyConverter, EmailSearchKeyDetector
|
|
91
91
|
from upgini.utils.features_validator import FeaturesValidator
|
|
92
92
|
from upgini.utils.format import Format
|
|
93
|
-
from upgini.utils.ip_utils import IpToCountrySearchKeyConverter
|
|
94
93
|
from upgini.utils.phone_utils import PhoneSearchKeyDetector
|
|
95
94
|
from upgini.utils.postal_code_utils import PostalCodeSearchKeyDetector
|
|
96
95
|
|
|
@@ -866,13 +865,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
866
865
|
if X is not None and y is None:
|
|
867
866
|
raise ValidationError("X passed without y")
|
|
868
867
|
|
|
869
|
-
if self.X is None:
|
|
870
|
-
self.X = X
|
|
871
|
-
if self.y is None:
|
|
872
|
-
self.y = y
|
|
873
|
-
if self.eval_set is None:
|
|
874
|
-
self.eval_set = effective_eval_set
|
|
875
|
-
|
|
876
868
|
validate_scoring_argument(scoring)
|
|
877
869
|
|
|
878
870
|
self._validate_baseline_score(effective_X, effective_eval_set)
|
|
@@ -889,9 +881,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
889
881
|
|
|
890
882
|
prepared_data = self._prepare_data_for_metrics(
|
|
891
883
|
trace_id=trace_id,
|
|
892
|
-
X=
|
|
893
|
-
y=
|
|
894
|
-
eval_set=
|
|
884
|
+
X=X,
|
|
885
|
+
y=y,
|
|
886
|
+
eval_set=eval_set,
|
|
895
887
|
exclude_features_sources=exclude_features_sources,
|
|
896
888
|
importance_threshold=importance_threshold,
|
|
897
889
|
max_features=max_features,
|
|
@@ -1136,6 +1128,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1136
1128
|
elif uplift_col in metrics_df.columns and (metrics_df[uplift_col] < 0).any():
|
|
1137
1129
|
self.logger.warning("Uplift is negative")
|
|
1138
1130
|
|
|
1131
|
+
if self.X is None:
|
|
1132
|
+
self.X = X
|
|
1133
|
+
if self.y is None:
|
|
1134
|
+
self.y = y
|
|
1135
|
+
if self.eval_set is None:
|
|
1136
|
+
self.eval_set = effective_eval_set
|
|
1137
|
+
|
|
1139
1138
|
return metrics_df
|
|
1140
1139
|
except Exception as e:
|
|
1141
1140
|
error_message = "Failed to calculate metrics" + (
|
|
@@ -1213,13 +1212,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1213
1212
|
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, [], self.logger)
|
|
1214
1213
|
extended_X = converter.convert(extended_X)
|
|
1215
1214
|
generated_features.extend(converter.generated_features)
|
|
1216
|
-
if (
|
|
1217
|
-
self.detect_missing_search_keys
|
|
1218
|
-
and list(search_keys.values()) == [SearchKey.DATE]
|
|
1219
|
-
and self.country_code is None
|
|
1220
|
-
):
|
|
1221
|
-
converter = IpToCountrySearchKeyConverter(search_keys, self.logger)
|
|
1222
|
-
extended_X = converter.convert(extended_X)
|
|
1223
1215
|
generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
1224
1216
|
|
|
1225
1217
|
return extended_X, search_keys
|
|
@@ -1987,13 +1979,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1987
1979
|
df = converter.convert(df)
|
|
1988
1980
|
generated_features.extend(converter.generated_features)
|
|
1989
1981
|
email_converted_to_hem = converter.email_converted_to_hem
|
|
1990
|
-
if (
|
|
1991
|
-
self.detect_missing_search_keys
|
|
1992
|
-
and list(search_keys.values()) == [SearchKey.DATE]
|
|
1993
|
-
and self.country_code is None
|
|
1994
|
-
):
|
|
1995
|
-
converter = IpToCountrySearchKeyConverter(search_keys, self.logger)
|
|
1996
|
-
df = converter.convert(df)
|
|
1997
1982
|
generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
1998
1983
|
|
|
1999
1984
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
@@ -2342,7 +2327,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2342
2327
|
df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
|
|
2343
2328
|
|
|
2344
2329
|
# Checks that need validated date
|
|
2345
|
-
validate_dates_distribution(
|
|
2330
|
+
validate_dates_distribution(df, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
|
|
2346
2331
|
|
|
2347
2332
|
if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
|
|
2348
2333
|
self._validate_PSI(df.sort_values(by=maybe_date_column))
|
|
@@ -2356,14 +2341,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2356
2341
|
)
|
|
2357
2342
|
df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
|
|
2358
2343
|
|
|
2359
|
-
if (
|
|
2360
|
-
self.detect_missing_search_keys
|
|
2361
|
-
and list(self.fit_search_keys.values()) == [SearchKey.DATE]
|
|
2362
|
-
and self.country_code is None
|
|
2363
|
-
):
|
|
2364
|
-
converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
|
|
2365
|
-
df = converter.convert(df)
|
|
2366
|
-
|
|
2367
2344
|
# Explode multiple search keys
|
|
2368
2345
|
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
|
|
2369
2346
|
meaning_types = {
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.307"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|