upgini 1.1.307__py3-none-any.whl → 1.1.309__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.307"
1
+ __version__ = "1.1.309"
@@ -3,7 +3,7 @@ import time
3
3
  import uuid
4
4
  from datetime import datetime
5
5
  from enum import Enum
6
- from typing import Dict, List, Optional, Union
6
+ from typing import Dict, List, Literal, Optional, Union
7
7
 
8
8
  from upgini.errors import HttpError, ValidationError
9
9
  from upgini.http import LoggerFactory, get_rest_client
@@ -47,7 +47,9 @@ class DataSourcePublisher:
47
47
  self,
48
48
  data_table_uri: str,
49
49
  search_keys: Dict[str, SearchKey],
50
- update_frequency: str,
50
+ update_frequency: (
51
+ Literal["Daily"] | Literal["Weekly"] | Literal["Monthly"] | Literal["Quarterly"] | Literal["Annually"]
52
+ ),
51
53
  exclude_from_autofe_generation: Optional[List[str]],
52
54
  secondary_search_keys: Optional[Dict[str, SearchKey]] = None,
53
55
  sort_column: Optional[str] = None,
@@ -233,11 +235,17 @@ class DataSourcePublisher:
233
235
  self.logger.exception("Failed to register data table")
234
236
  raise
235
237
 
236
- def remove(self, data_table_ids: List[str]):
238
+ def remove(self, data_table_ids: List[str] | str):
237
239
  trace_id = str(uuid.uuid4())
238
240
  with MDC(trace_id=trace_id):
239
241
  try:
240
- if data_table_ids is None or len(data_table_ids) == 0:
242
+ if not data_table_ids:
243
+ raise ValidationError("Empty data table ids")
244
+ if isinstance(data_table_ids, str):
245
+ data_table_ids = [data_table_ids]
246
+ if not isinstance(data_table_ids, list):
247
+ raise ValidationError("Invalid format of data_table_ids argument")
248
+ if len(data_table_ids) == 0:
241
249
  raise ValidationError("Empty data table ids")
242
250
 
243
251
  for data_table_id in data_table_ids:
@@ -266,16 +274,20 @@ class DataSourcePublisher:
266
274
  source_link: Optional[str] = None,
267
275
  update_frequency: Optional[str] = None,
268
276
  client_emails: Optional[List[str]] = None,
277
+ date_features: Optional[List[str]] = None,
278
+ date_vector_features: Optional[List[str]] = None,
269
279
  ):
270
280
  trace_id = str(uuid.uuid4())
271
281
  with MDC(trace_id=trace_id):
272
282
  try:
273
- if data_table_ids is None or len(data_table_ids) == 0:
283
+ if data_table_ids is None:
274
284
  raise ValidationError("Empty data table ids")
275
285
  if isinstance(data_table_ids, str):
276
286
  data_table_ids = [data_table_ids]
277
287
  if not isinstance(data_table_ids, list):
278
288
  raise ValidationError("data_table_ids should be string or list of strings")
289
+ if len(data_table_ids) == 0:
290
+ raise ValidationError("Empty data table ids")
279
291
  if update_frequency is not None and update_frequency not in self.ACCEPTABLE_UPDATE_FREQUENCIES:
280
292
  raise ValidationError(
281
293
  f"Invalid update frequency: {update_frequency}. "
@@ -311,6 +323,10 @@ class DataSourcePublisher:
311
323
  request["updateFrequency"] = update_frequency
312
324
  if client_emails is not None:
313
325
  request["clientEmails"] = client_emails
326
+ if date_features is not None:
327
+ request["dateFeatures"] = date_features
328
+ if date_vector_features is not None:
329
+ request["dateVectorFeatures"] = date_vector_features
314
330
  self.logger.info(f"Activating data tables with request {request}")
315
331
 
316
332
  self._rest_client.activate_datatables(request, trace_id)
@@ -90,7 +90,6 @@ from upgini.utils.display_utils import (
90
90
  from upgini.utils.email_utils import EmailSearchKeyConverter, EmailSearchKeyDetector
91
91
  from upgini.utils.features_validator import FeaturesValidator
92
92
  from upgini.utils.format import Format
93
- from upgini.utils.ip_utils import IpToCountrySearchKeyConverter
94
93
  from upgini.utils.phone_utils import PhoneSearchKeyDetector
95
94
  from upgini.utils.postal_code_utils import PostalCodeSearchKeyDetector
96
95
 
@@ -866,13 +865,6 @@ class FeaturesEnricher(TransformerMixin):
866
865
  if X is not None and y is None:
867
866
  raise ValidationError("X passed without y")
868
867
 
869
- if self.X is None:
870
- self.X = X
871
- if self.y is None:
872
- self.y = y
873
- if self.eval_set is None:
874
- self.eval_set = effective_eval_set
875
-
876
868
  validate_scoring_argument(scoring)
877
869
 
878
870
  self._validate_baseline_score(effective_X, effective_eval_set)
@@ -889,9 +881,9 @@ class FeaturesEnricher(TransformerMixin):
889
881
 
890
882
  prepared_data = self._prepare_data_for_metrics(
891
883
  trace_id=trace_id,
892
- X=effective_X,
893
- y=effective_y,
894
- eval_set=effective_eval_set,
884
+ X=X,
885
+ y=y,
886
+ eval_set=eval_set,
895
887
  exclude_features_sources=exclude_features_sources,
896
888
  importance_threshold=importance_threshold,
897
889
  max_features=max_features,
@@ -1136,6 +1128,13 @@ class FeaturesEnricher(TransformerMixin):
1136
1128
  elif uplift_col in metrics_df.columns and (metrics_df[uplift_col] < 0).any():
1137
1129
  self.logger.warning("Uplift is negative")
1138
1130
 
1131
+ if self.X is None:
1132
+ self.X = X
1133
+ if self.y is None:
1134
+ self.y = y
1135
+ if self.eval_set is None:
1136
+ self.eval_set = effective_eval_set
1137
+
1139
1138
  return metrics_df
1140
1139
  except Exception as e:
1141
1140
  error_message = "Failed to calculate metrics" + (
@@ -1213,13 +1212,6 @@ class FeaturesEnricher(TransformerMixin):
1213
1212
  converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, [], self.logger)
1214
1213
  extended_X = converter.convert(extended_X)
1215
1214
  generated_features.extend(converter.generated_features)
1216
- if (
1217
- self.detect_missing_search_keys
1218
- and list(search_keys.values()) == [SearchKey.DATE]
1219
- and self.country_code is None
1220
- ):
1221
- converter = IpToCountrySearchKeyConverter(search_keys, self.logger)
1222
- extended_X = converter.convert(extended_X)
1223
1215
  generated_features = [f for f in generated_features if f in self.fit_generated_features]
1224
1216
 
1225
1217
  return extended_X, search_keys
@@ -1987,13 +1979,6 @@ class FeaturesEnricher(TransformerMixin):
1987
1979
  df = converter.convert(df)
1988
1980
  generated_features.extend(converter.generated_features)
1989
1981
  email_converted_to_hem = converter.email_converted_to_hem
1990
- if (
1991
- self.detect_missing_search_keys
1992
- and list(search_keys.values()) == [SearchKey.DATE]
1993
- and self.country_code is None
1994
- ):
1995
- converter = IpToCountrySearchKeyConverter(search_keys, self.logger)
1996
- df = converter.convert(df)
1997
1982
  generated_features = [f for f in generated_features if f in self.fit_generated_features]
1998
1983
 
1999
1984
  meaning_types = {col: key.value for col, key in search_keys.items()}
@@ -2342,7 +2327,7 @@ class FeaturesEnricher(TransformerMixin):
2342
2327
  df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
2343
2328
 
2344
2329
  # Checks that need validated date
2345
- validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
2330
+ validate_dates_distribution(df, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
2346
2331
 
2347
2332
  if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
2348
2333
  self._validate_PSI(df.sort_values(by=maybe_date_column))
@@ -2356,14 +2341,6 @@ class FeaturesEnricher(TransformerMixin):
2356
2341
  )
2357
2342
  df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
2358
2343
 
2359
- if (
2360
- self.detect_missing_search_keys
2361
- and list(self.fit_search_keys.values()) == [SearchKey.DATE]
2362
- and self.country_code is None
2363
- ):
2364
- converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
2365
- df = converter.convert(df)
2366
-
2367
2344
  # Explode multiple search keys
2368
2345
  non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
2369
2346
  meaning_types = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.307
3
+ Version: 1.1.309
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=Sn0l4v1zhcKcI7IkU-4gpLRv1k03kr7gu6biIIfnoLM,24
1
+ upgini/__about__.py,sha256=fVCCA8eA4k2v8Wxf_32xlQw7p7cPzdG8xF2zFSPzI0E,24
2
2
  upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=MOzBVsvzlHLxNfPWtMaXC_jIPeW7_gUvbSGeXnsPgNI,46158
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=87gU9dwSjdqJ5ebOnDPk9gKvoLj6XZvpO5mkIbnaWNE,183601
6
+ upgini/features_enricher.py,sha256=JzSnwqxUKuYqBC4DHgPcG4MxQzvnCfKuOgihTllwRis,182583
7
7
  upgini/http.py,sha256=a4Epc9YLIJBuYk4t8E_2-QDLBtJFqKO35jn2SnYQZCg,42920
8
8
  upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
9
9
  upgini/metadata.py,sha256=E5WWZ_MkjGyYNQh_LnwMIBHyqPx1fxk-qhEfQIJnzq8,10209
@@ -23,7 +23,7 @@ upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
23
23
  upgini/autofe/unary.py,sha256=ZWjLd-CUkNt_PpM8YuWLLipW1v_RdBlsl4JxXIVo9aM,3652
24
24
  upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
25
25
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- upgini/data_source/data_source_publisher.py,sha256=y0tYFp7N3bSI7BwQ5SRF8r0bRaI3z6Zc1fsZezVg7hE,20552
26
+ upgini/data_source/data_source_publisher.py,sha256=c6fkdkV5teoqHkOJRjtUhEoeu2JRKBvEAXcKRja2pHk,21443
27
27
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
28
28
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
29
29
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.1.307.dist-info/METADATA,sha256=-HQpKKy8UA3w6J6Oi6WNYgqmaqCOWQmlVGaV7B5VBZc,48153
61
- upgini-1.1.307.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.1.307.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.1.307.dist-info/RECORD,,
60
+ upgini-1.1.309.dist-info/METADATA,sha256=B4T0sL1tSW7jKZYB87qxnx86bQPQadEOtiAhMFgtjPs,48153
61
+ upgini-1.1.309.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.1.309.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.1.309.dist-info/RECORD,,