upgini 1.2.38a3769.dev2__tar.gz → 1.2.38a3769.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/PKG-INFO +1 -1
  2. upgini-1.2.38a3769.dev4/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/features_enricher.py +5 -1
  4. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/target_utils.py +5 -3
  5. upgini-1.2.38a3769.dev2/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/.gitignore +0 -0
  7. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/LICENSE +0 -0
  8. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/README.md +0 -0
  9. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/pyproject.toml +0 -0
  10. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/all_operands.py +0 -0
  16. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/unary.py +0 -0
  22. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/autofe/vector.py +0 -0
  23. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/data_source/__init__.py +0 -0
  24. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/data_source/data_source_publisher.py +0 -0
  25. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/dataset.py +0 -0
  26. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/errors.py +0 -0
  27. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/http.py +0 -0
  28. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/lazy_import.py +0 -0
  29. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/mdc/__init__.py +0 -0
  30. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/mdc/context.py +0 -0
  31. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/metadata.py +0 -0
  32. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/metrics.py +0 -0
  33. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/normalizer/__init__.py +0 -0
  34. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/normalizer/normalize_utils.py +0 -0
  35. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/resource_bundle/__init__.py +0 -0
  36. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/resource_bundle/exceptions.py +0 -0
  37. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/resource_bundle/strings.properties +0 -0
  38. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  46. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/__init__.py +0 -0
  47. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/base_search_key_detector.py +0 -0
  48. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/blocked_time_series.py +0 -0
  49. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/country_utils.py +0 -0
  50. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/custom_loss_utils.py +0 -0
  51. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/cv_utils.py +0 -0
  52. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/datetime_utils.py +0 -0
  53. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/deduplicate_utils.py +0 -0
  54. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/display_utils.py +0 -0
  55. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/email_utils.py +0 -0
  56. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/fallback_progress_bar.py +0 -0
  57. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/feature_info.py +0 -0
  58. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/features_validator.py +0 -0
  59. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/format.py +0 -0
  60. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/ip_utils.py +0 -0
  61. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/phone_utils.py +0 -0
  62. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/postal_code_utils.py +0 -0
  63. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/progress_bar.py +0 -0
  64. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/sklearn_ext.py +0 -0
  65. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.38a3769.dev2 → upgini-1.2.38a3769.dev4}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.38a3769.dev2
3
+ Version: 1.2.38a3769.dev4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.38a3769.dev4"
@@ -932,6 +932,7 @@ class FeaturesEnricher(TransformerMixin):
932
932
  cat_features, search_keys_for_metrics = self._get_client_cat_features(
933
933
  estimator, validated_X, self.search_keys
934
934
  )
935
+ search_keys_for_metrics.extend([c for c in self.id_columns if c not in search_keys_for_metrics])
935
936
 
936
937
  prepared_data = self._prepare_data_for_metrics(
937
938
  trace_id=trace_id,
@@ -2656,7 +2657,10 @@ class FeaturesEnricher(TransformerMixin):
2656
2657
  self.__adjust_cv(df)
2657
2658
 
2658
2659
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
2659
- self.search_keys.update({col: SearchKey.CUSTOM_KEY for col in self.id_columns})
2660
+ reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
2661
+ id_columns = [reverse_renaming[col] for col in self.id_columns if col in reverse_renaming]
2662
+ self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
2663
+ self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
2660
2664
 
2661
2665
  df, fintech_warnings = remove_fintech_duplicates(
2662
2666
  df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
@@ -295,6 +295,8 @@ def balance_undersample_time_series(
295
295
 
296
296
  random_state = np.random.RandomState(random_state)
297
297
 
298
+ if not id_columns:
299
+ id_columns = [date_column]
298
300
  ids_sort = df.groupby(id_columns)[date_column].aggregate(["max", "count"]).T.to_dict()
299
301
  ids_sort = {
300
302
  ensure_tuple(k): (
@@ -306,7 +308,7 @@ def balance_undersample_time_series(
306
308
  id_counts.index = [ensure_tuple(i) for i in id_counts.index]
307
309
  id_counts = id_counts.sort_index(key=lambda x: [ids_sort[y] for y in x], ascending=False).cumsum()
308
310
  id_counts = id_counts[id_counts <= sample_size]
309
- min_different_ids = int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio)
311
+ min_different_ids = max(int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio), 1)
310
312
 
311
313
  def id_mask(sample_index: pd.Index) -> pd.Index:
312
314
  if isinstance(sample_index, pd.MultiIndex):
@@ -317,10 +319,10 @@ def balance_undersample_time_series(
317
319
  if len(id_counts) < min_different_ids:
318
320
  if logger is not None:
319
321
  logger.info(
320
- f"Different ids count {len(id_counts)} is less than min different ids {min_different_ids}, sampling time window"
322
+ f"Different ids count {len(id_counts)} for sample size {sample_size} is less than min different ids {min_different_ids}, sampling time window"
321
323
  )
322
324
  date_counts = df.groupby(id_columns)[date_column].nunique().sort_values(ascending=False)
323
- ids_to_sample = date_counts.index[:min_different_ids]
325
+ ids_to_sample = date_counts.index[:min_different_ids] if len(id_counts) > 0 else date_counts.index
324
326
  mask = id_mask(ids_to_sample)
325
327
  df = df[mask]
326
328
  sample_date_counts = df[date_column].value_counts().sort_index(ascending=False).cumsum()
@@ -1 +0,0 @@
1
- __version__ = "1.2.38a3769.dev2"