upgini 1.2.38a3769.dev3__py3-none-any.whl → 1.2.38a3769.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.38a3769.dev3"
1
+ __version__ = "1.2.38a3769.dev5"
upgini/dataset.py CHANGED
@@ -77,6 +77,7 @@ class Dataset: # (pd.DataFrame):
77
77
  unnest_search_keys: Optional[Dict[str, str]] = None,
78
78
  model_task_type: Optional[ModelTaskType] = None,
79
79
  cv_type: Optional[CVType] = None,
80
+ id_columns: Optional[List[str]] = None,
80
81
  random_state: Optional[int] = None,
81
82
  rest_client: Optional[_RestClient] = None,
82
83
  logger: Optional[logging.Logger] = None,
@@ -120,6 +121,7 @@ class Dataset: # (pd.DataFrame):
120
121
  self.random_state = random_state
121
122
  self.columns_renaming: Dict[str, str] = {}
122
123
  self.imbalanced: bool = False
124
+ self.id_columns = id_columns
123
125
  if logger is not None:
124
126
  self.logger = logger
125
127
  else:
@@ -230,6 +232,7 @@ class Dataset: # (pd.DataFrame):
230
232
  target_column=target_column,
231
233
  task_type=self.task_type,
232
234
  cv_type=self.cv_type,
235
+ id_columns=self.id_columns,
233
236
  random_state=self.random_state,
234
237
  sample_size=self.FORCE_SAMPLE_SIZE,
235
238
  logger=self.logger,
@@ -305,7 +308,7 @@ class Dataset: # (pd.DataFrame):
305
308
  if self.cv_type is not None and self.cv_type.is_time_series():
306
309
  resampled_data = balance_undersample_time_series(
307
310
  df=self.data,
308
- id_columns=[k for k, v in self.meaning_types.items() if v == FileColumnMeaningType.CUSTOM_KEY],
311
+ id_columns=self.id_columns,
309
312
  date_column=next(
310
313
  k
311
314
  for k, v in self.meaning_types.items()
@@ -932,6 +932,7 @@ class FeaturesEnricher(TransformerMixin):
932
932
  cat_features, search_keys_for_metrics = self._get_client_cat_features(
933
933
  estimator, validated_X, self.search_keys
934
934
  )
935
+ search_keys_for_metrics.extend([c for c in self.id_columns or [] if c not in search_keys_for_metrics])
935
936
 
936
937
  prepared_data = self._prepare_data_for_metrics(
937
938
  trace_id=trace_id,
@@ -2299,6 +2300,7 @@ class FeaturesEnricher(TransformerMixin):
2299
2300
  meaning_types=meaning_types,
2300
2301
  search_keys=combined_search_keys,
2301
2302
  unnest_search_keys=unnest_search_keys,
2303
+ id_columns=self.id_columns,
2302
2304
  date_format=self.date_format,
2303
2305
  rest_client=self.rest_client,
2304
2306
  logger=self.logger,
@@ -2656,7 +2658,10 @@ class FeaturesEnricher(TransformerMixin):
2656
2658
  self.__adjust_cv(df)
2657
2659
 
2658
2660
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
2659
- self.search_keys.update({col: SearchKey.CUSTOM_KEY for col in self.id_columns})
2661
+ reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
2662
+ id_columns = [reverse_renaming[col] for col in self.id_columns if col in reverse_renaming]
2663
+ self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
2664
+ self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
2660
2665
 
2661
2666
  df, fintech_warnings = remove_fintech_duplicates(
2662
2667
  df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
@@ -2780,6 +2785,7 @@ class FeaturesEnricher(TransformerMixin):
2780
2785
  unnest_search_keys=unnest_search_keys,
2781
2786
  model_task_type=self.model_task_type,
2782
2787
  cv_type=self.cv,
2788
+ id_columns=self.id_columns,
2783
2789
  date_format=self.date_format,
2784
2790
  random_state=self.random_state,
2785
2791
  rest_client=self.rest_client,
@@ -295,6 +295,8 @@ def balance_undersample_time_series(
295
295
 
296
296
  random_state = np.random.RandomState(random_state)
297
297
 
298
+ if not id_columns:
299
+ id_columns = [date_column]
298
300
  ids_sort = df.groupby(id_columns)[date_column].aggregate(["max", "count"]).T.to_dict()
299
301
  ids_sort = {
300
302
  ensure_tuple(k): (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.38a3769.dev3
3
+ Version: 1.2.38a3769.dev5
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=sQ7NNr0lfG3UfxCnX2sMNRntUVR0zW-NHhIgizLV7ls,33
1
+ upgini/__about__.py,sha256=bj9nQpQPQBgyZ975N_D4PWwEYJJKbsfpt1gs-e4tMio,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=zYPSQ73ch6k5EWxZlh1KrjL0gMkmAwl7Nkgrz6zxywY,33161
4
+ upgini/dataset.py,sha256=-3FeDMADnHxGb70rKFY_U96NCQO-TEUAXFicFl25CtY,33222
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=m7z3iWSEj0ORUVnp65I0b_427SITjNnBvn8hdebS_xE,195541
6
+ upgini/features_enricher.py,sha256=usNAM9eNFa19OeyPuMaaCp_4HMLeuYrkG4gBU6MwANg,196014
7
7
  upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
@@ -56,10 +56,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
56
56
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
57
57
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
58
58
  upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
59
- upgini/utils/target_utils.py,sha256=i_EsluRZG3LKrqv9NmhvEha9Uwp8JQjRUmokeo240Is,14283
59
+ upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.38a3769.dev3.dist-info/METADATA,sha256=AeaVPfRIc-RCuzozwXSgurTpHXE21yR_tpsBjCra3KA,48604
63
- upgini-1.2.38a3769.dev3.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
- upgini-1.2.38a3769.dev3.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.38a3769.dev3.dist-info/RECORD,,
62
+ upgini-1.2.38a3769.dev5.dist-info/METADATA,sha256=ldrVhkIorzNJE1GYHBfBQkXGi8upTruz5sa9s-DTld4,48604
63
+ upgini-1.2.38a3769.dev5.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
+ upgini-1.2.38a3769.dev5.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.38a3769.dev5.dist-info/RECORD,,