upgini 1.2.38a3769.dev2__py3-none-any.whl → 1.2.38a3769.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.38a3769.dev2"
1
+ __version__ = "1.2.38a3769.dev4"
@@ -932,6 +932,7 @@ class FeaturesEnricher(TransformerMixin):
932
932
  cat_features, search_keys_for_metrics = self._get_client_cat_features(
933
933
  estimator, validated_X, self.search_keys
934
934
  )
935
+ search_keys_for_metrics.extend([c for c in self.id_columns if c not in search_keys_for_metrics])
935
936
 
936
937
  prepared_data = self._prepare_data_for_metrics(
937
938
  trace_id=trace_id,
@@ -2656,7 +2657,10 @@ class FeaturesEnricher(TransformerMixin):
2656
2657
  self.__adjust_cv(df)
2657
2658
 
2658
2659
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
2659
- self.search_keys.update({col: SearchKey.CUSTOM_KEY for col in self.id_columns})
2660
+ reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
2661
+ id_columns = [reverse_renaming[col] for col in self.id_columns if col in reverse_renaming]
2662
+ self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
2663
+ self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
2660
2664
 
2661
2665
  df, fintech_warnings = remove_fintech_duplicates(
2662
2666
  df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
@@ -295,6 +295,8 @@ def balance_undersample_time_series(
295
295
 
296
296
  random_state = np.random.RandomState(random_state)
297
297
 
298
+ if not id_columns:
299
+ id_columns = [date_column]
298
300
  ids_sort = df.groupby(id_columns)[date_column].aggregate(["max", "count"]).T.to_dict()
299
301
  ids_sort = {
300
302
  ensure_tuple(k): (
@@ -306,7 +308,7 @@ def balance_undersample_time_series(
306
308
  id_counts.index = [ensure_tuple(i) for i in id_counts.index]
307
309
  id_counts = id_counts.sort_index(key=lambda x: [ids_sort[y] for y in x], ascending=False).cumsum()
308
310
  id_counts = id_counts[id_counts <= sample_size]
309
- min_different_ids = int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio)
311
+ min_different_ids = max(int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio), 1)
310
312
 
311
313
  def id_mask(sample_index: pd.Index) -> pd.Index:
312
314
  if isinstance(sample_index, pd.MultiIndex):
@@ -317,10 +319,10 @@ def balance_undersample_time_series(
317
319
  if len(id_counts) < min_different_ids:
318
320
  if logger is not None:
319
321
  logger.info(
320
- f"Different ids count {len(id_counts)} is less than min different ids {min_different_ids}, sampling time window"
322
+ f"Different ids count {len(id_counts)} for sample size {sample_size} is less than min different ids {min_different_ids}, sampling time window"
321
323
  )
322
324
  date_counts = df.groupby(id_columns)[date_column].nunique().sort_values(ascending=False)
323
- ids_to_sample = date_counts.index[:min_different_ids]
325
+ ids_to_sample = date_counts.index[:min_different_ids] if len(id_counts) > 0 else date_counts.index
324
326
  mask = id_mask(ids_to_sample)
325
327
  df = df[mask]
326
328
  sample_date_counts = df[date_column].value_counts().sort_index(ascending=False).cumsum()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.38a3769.dev2
3
+ Version: 1.2.38a3769.dev4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=Pi7t905kMODW13sjgsQdvH0UCQF5An8AOCl5peng2eA,33
1
+ upgini/__about__.py,sha256=LD7zYM0Dg1LG9nVQ26_22PqIAzyHE6_ydgUMfXgIB6o,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=zYPSQ73ch6k5EWxZlh1KrjL0gMkmAwl7Nkgrz6zxywY,33161
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=m7z3iWSEj0ORUVnp65I0b_427SITjNnBvn8hdebS_xE,195541
6
+ upgini/features_enricher.py,sha256=ia40Oq2WZMVCfsTPmsjtMgsz0j8zOdoqlQxyW_-5NPA,195924
7
7
  upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
@@ -56,10 +56,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
56
56
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
57
57
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
58
58
  upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
59
- upgini/utils/target_utils.py,sha256=AosrXCtfbNEUbc7fzyt1udKIZ-vvt-ngwx9SkOclWWQ,14200
59
+ upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.38a3769.dev2.dist-info/METADATA,sha256=CifOyMPTP6xdb1WljZK3BuIb5FomL6xgRGbrq1sC_04,48604
63
- upgini-1.2.38a3769.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
- upgini-1.2.38a3769.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.38a3769.dev2.dist-info/RECORD,,
62
+ upgini-1.2.38a3769.dev4.dist-info/METADATA,sha256=eJSDG0irBysdfvDWt44W70ePfymnLAdMnxbUam-IcBQ,48604
63
+ upgini-1.2.38a3769.dev4.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
+ upgini-1.2.38a3769.dev4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.38a3769.dev4.dist-info/RECORD,,