upgini 1.1.293__py3-none-any.whl → 1.1.296__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.293"
1
+ __version__ = "1.1.296"
@@ -59,9 +59,35 @@ class DataSourcePublisher:
59
59
  features_for_embeddings: Optional[List[str]] = DEFAULT_GENERATE_EMBEDDINGS,
60
60
  data_table_id_to_replace: Optional[str] = None,
61
61
  keep_features: Optional[List[str]] = None,
62
+ date_features: Optional[List[str]] = None,
63
+ date_vector_features: Optional[List[str]] = None,
62
64
  _force_generation=False,
63
65
  _silent=False,
64
66
  ) -> str:
67
+ """Register new ADS
68
+
69
+ Parameters
70
+ ----------
71
+ data_table_uri - str - table name in format {project_id}.{datasource_name}.{table_name}
72
+
73
+ search_keys - dict with column names as keys and SearchKey as value
74
+
75
+ update_frequency - str - (Monthly, Weekly, Daily, Annually, Quarterly)
76
+
77
+ exclude_from_autofe_generation - optional list of features that should be excluded from AutoFE
78
+
79
+ secondary_search_keys - optional dict of secondary search keys
80
+
81
+ sort_column - optional str - name of unique column that could be used for sort
82
+
83
+ date_format - optional str - format of date if it is present in search keys
84
+
85
+ ...
86
+
87
+ data_table_id_to_replace - optional str - id of registered ADS that should be replaced by new table
88
+
89
+ keep_features - optional list - features that should not be removed from ADS (even if they are personal)
90
+ """
65
91
  trace_id = str(uuid.uuid4())
66
92
 
67
93
  with MDC(trace_id=trace_id):
@@ -95,6 +121,11 @@ class DataSourcePublisher:
95
121
  "With MSISDN and DATE keys one of the snapshot_frequency_days or"
96
122
  " join_date_abs_limit_days parameters is required"
97
123
  )
124
+ if (
125
+ set(search_keys.values()) == {SearchKey.PHONE, SearchKey.DATE}
126
+ or set(search_keys.values()) == {SearchKey.HEM, SearchKey.DATE}
127
+ ) and not date_format:
128
+ raise ValidationError("date_format argument is required for PHONE+DATE and HEM+DATE search keys")
98
129
 
99
130
  request = {
100
131
  "dataTableUri": data_table_uri,
@@ -119,6 +150,14 @@ class DataSourcePublisher:
119
150
  request["excludeFromGeneration"] = exclude_from_autofe_generation
120
151
  if keep_features is not None:
121
152
  request["keepFeatures"] = keep_features
153
+ if date_features is not None:
154
+ if date_format is None:
155
+ raise ValidationError("date_format should be presented if you use date features")
156
+ request["dateFeatures"] = date_features
157
+ if date_vector_features is not None:
158
+ if date_format is None:
159
+ raise ValidationError("date_format should be presented if you use date vector features")
160
+ request["dateVectorFeatures"] = date_vector_features
122
161
  self.logger.info(f"Start registering data table {request}")
123
162
 
124
163
  task_id = self._rest_client.register_ads(request, trace_id)
@@ -176,6 +215,9 @@ class DataSourcePublisher:
176
215
  msg = f"Data table successfully registered with id: {data_table_id}"
177
216
  self.logger.info(msg)
178
217
  print(msg)
218
+ if "warnings" in status_response and status_response["warnings"]:
219
+ self.logger.warning(status_response["warnings"])
220
+ print(status_response["warnings"])
179
221
  return data_table_id
180
222
  except KeyboardInterrupt:
181
223
  if task_id is not None:
@@ -2557,9 +2557,6 @@ class FeaturesEnricher(TransformerMixin):
2557
2557
  return [c for c, v in search_keys_with_autodetection.items() if v.value.value in keys]
2558
2558
 
2559
2559
  def _validate_X(self, X, is_transform=False) -> pd.DataFrame:
2560
- if _num_samples(X) == 0:
2561
- raise ValidationError(self.bundle.get("x_is_empty"))
2562
-
2563
2560
  if isinstance(X, pd.DataFrame):
2564
2561
  if isinstance(X.columns, pd.MultiIndex) or isinstance(X.index, pd.MultiIndex):
2565
2562
  raise ValidationError(self.bundle.get("x_multiindex_unsupported"))
@@ -2573,6 +2570,9 @@ class FeaturesEnricher(TransformerMixin):
2573
2570
  else:
2574
2571
  raise ValidationError(self.bundle.get("unsupported_x_type").format(type(X)))
2575
2572
 
2573
+ if _num_samples(X) == 0:
2574
+ raise ValidationError(self.bundle.get("x_is_empty"))
2575
+
2576
2576
  if len(set(validated_X.columns)) != len(validated_X.columns):
2577
2577
  raise ValidationError(self.bundle.get("x_contains_dup_columns"))
2578
2578
  if not is_transform and not validated_X.index.is_unique:
@@ -2870,7 +2870,7 @@ class FeaturesEnricher(TransformerMixin):
2870
2870
  self.logger.info(f"Dates interval is ({min_date}, {max_date})")
2871
2871
 
2872
2872
  except Exception:
2873
- self.logger.exception("Failed to log debug information")
2873
+ self.logger.warning("Failed to log debug information", exc_info=True)
2874
2874
 
2875
2875
  def __handle_index_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
2876
2876
  index_names = df.index.names if df.index.names != [None] else [DEFAULT_INDEX]
upgini/metrics.py CHANGED
@@ -679,6 +679,11 @@ def validate_scoring_argument(scoring: Union[Callable, str, None]):
679
679
  raise ValidationError(
680
680
  f"Invalid scoring function passed {scoring}. It should accept 3 input arguments: estimator, x, y"
681
681
  )
682
+ elif scoring is not None:
683
+ raise ValidationError(
684
+ f"Invalid scoring argument passed {scoring}. It should be string with scoring name or function"
685
+ " that accepts 3 input arguments: estimator, x, y"
686
+ )
682
687
 
683
688
 
684
689
  def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.293
3
+ Version: 1.1.296
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,13 +1,13 @@
1
- upgini/__about__.py,sha256=qZMAUJW6Js0WNRcvGFbeSyGFbiVxGPjSbHngvQV4JrY,24
1
+ upgini/__about__.py,sha256=VYB3k4Dn9TdkyE_1qiCFNhgHJH5heaX4R3m8VNi495g,24
2
2
  upgini/__init__.py,sha256=ObEtjFkIssl83qeKNMLpIQygfwK8TzztwiI43YTsAP0,353
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=Bfqbzqj3h1Ox3s6hBA4UYXkG7hQDqGPB4JK1YXtxXxw,177530
6
+ upgini/features_enricher.py,sha256=HQFLw3VyEsZfAt4xFnIYOnp3fzQSHAsyHzIm0gTJpOI,177543
7
7
  upgini/http.py,sha256=bp6jWl422Icy3AhHMdCcJv5NjExE45gSMmzMTPJjPuk,42600
8
8
  upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
9
9
  upgini/metadata.py,sha256=qDAIO7NLSSQp_XiXCv3U4XJTLO0KH3YuQ8lvCLYPqzs,9781
10
- upgini/metrics.py,sha256=DLvA2YLV4f7lnzBCcfZ5T4NkqAv3pbstbjTepavuT7U,30688
10
+ upgini/metrics.py,sha256=DiDgdFvYu64ArlPEgjppZShK6yybWtIEbdPAhI3yO1I,30930
11
11
  upgini/search_task.py,sha256=LtRJ9bCPjMo1gJ-sUDKERhDwGcWKImrzwVFHjkMSQHQ,17071
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1389
@@ -23,7 +23,7 @@ upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
23
23
  upgini/autofe/unary.py,sha256=ZWjLd-CUkNt_PpM8YuWLLipW1v_RdBlsl4JxXIVo9aM,3652
24
24
  upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
25
25
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- upgini/data_source/data_source_publisher.py,sha256=2BCFcglyZC1T5WYerqd_GGC6dg22o2Mh-ce53wOIOjQ,17465
26
+ upgini/data_source/data_source_publisher.py,sha256=1cQZrK630VztwGGDp41ec9gqIeUtkefaqSSQEitVWiM,19581
27
27
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
28
28
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
29
29
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.1.293.dist-info/METADATA,sha256=xxqpODSdTGvAAXew_U-IjsfNzYFALrNqOS2BK81cgy8,48117
61
- upgini-1.1.293.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
62
- upgini-1.1.293.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.1.293.dist-info/RECORD,,
60
+ upgini-1.1.296.dist-info/METADATA,sha256=4eJQUEcmU8LjaLKmvkZ4OPJBat7dPkBI0GwnNKpBZcc,48117
61
+ upgini-1.1.296.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.1.296.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.1.296.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.2
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any