upgini 1.2.38a3769.dev4__tar.gz → 1.2.38a3769.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/PKG-INFO +1 -1
  2. upgini-1.2.38a3769.dev6/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/dataset.py +4 -1
  4. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/features_enricher.py +8 -3
  5. upgini-1.2.38a3769.dev4/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/.gitignore +0 -0
  7. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/LICENSE +0 -0
  8. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/README.md +0 -0
  9. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/pyproject.toml +0 -0
  10. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/all_operands.py +0 -0
  16. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/unary.py +0 -0
  22. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/autofe/vector.py +0 -0
  23. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/data_source/__init__.py +0 -0
  24. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/data_source/data_source_publisher.py +0 -0
  25. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/errors.py +0 -0
  26. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/http.py +0 -0
  27. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/metadata.py +0 -0
  31. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/metrics.py +0 -0
  32. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/normalizer/normalize_utils.py +0 -0
  34. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/search_task.py +0 -0
  43. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/spinner.py +0 -0
  44. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  45. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/feature_info.py +0 -0
  57. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/features_validator.py +0 -0
  58. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/format.py +0 -0
  59. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/ip_utils.py +0 -0
  60. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.38a3769.dev4 → upgini-1.2.38a3769.dev6}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.38a3769.dev4
3
+ Version: 1.2.38a3769.dev6
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.38a3769.dev6"
@@ -77,6 +77,7 @@ class Dataset: # (pd.DataFrame):
77
77
  unnest_search_keys: Optional[Dict[str, str]] = None,
78
78
  model_task_type: Optional[ModelTaskType] = None,
79
79
  cv_type: Optional[CVType] = None,
80
+ id_columns: Optional[List[str]] = None,
80
81
  random_state: Optional[int] = None,
81
82
  rest_client: Optional[_RestClient] = None,
82
83
  logger: Optional[logging.Logger] = None,
@@ -120,6 +121,7 @@ class Dataset: # (pd.DataFrame):
120
121
  self.random_state = random_state
121
122
  self.columns_renaming: Dict[str, str] = {}
122
123
  self.imbalanced: bool = False
124
+ self.id_columns = id_columns
123
125
  if logger is not None:
124
126
  self.logger = logger
125
127
  else:
@@ -230,6 +232,7 @@ class Dataset: # (pd.DataFrame):
230
232
  target_column=target_column,
231
233
  task_type=self.task_type,
232
234
  cv_type=self.cv_type,
235
+ id_columns=self.id_columns,
233
236
  random_state=self.random_state,
234
237
  sample_size=self.FORCE_SAMPLE_SIZE,
235
238
  logger=self.logger,
@@ -305,7 +308,7 @@ class Dataset: # (pd.DataFrame):
305
308
  if self.cv_type is not None and self.cv_type.is_time_series():
306
309
  resampled_data = balance_undersample_time_series(
307
310
  df=self.data,
308
- id_columns=[k for k, v in self.meaning_types.items() if v == FileColumnMeaningType.CUSTOM_KEY],
311
+ id_columns=self.id_columns,
309
312
  date_column=next(
310
313
  k
311
314
  for k, v in self.meaning_types.items()
@@ -932,7 +932,7 @@ class FeaturesEnricher(TransformerMixin):
932
932
  cat_features, search_keys_for_metrics = self._get_client_cat_features(
933
933
  estimator, validated_X, self.search_keys
934
934
  )
935
- search_keys_for_metrics.extend([c for c in self.id_columns if c not in search_keys_for_metrics])
935
+ search_keys_for_metrics.extend([c for c in self.id_columns or [] if c not in search_keys_for_metrics])
936
936
 
937
937
  prepared_data = self._prepare_data_for_metrics(
938
938
  trace_id=trace_id,
@@ -2300,6 +2300,7 @@ class FeaturesEnricher(TransformerMixin):
2300
2300
  meaning_types=meaning_types,
2301
2301
  search_keys=combined_search_keys,
2302
2302
  unnest_search_keys=unnest_search_keys,
2303
+ id_columns=self.__get_renamed_id_columns(),
2303
2304
  date_format=self.date_format,
2304
2305
  rest_client=self.rest_client,
2305
2306
  logger=self.logger,
@@ -2657,8 +2658,7 @@ class FeaturesEnricher(TransformerMixin):
2657
2658
  self.__adjust_cv(df)
2658
2659
 
2659
2660
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
2660
- reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
2661
- id_columns = [reverse_renaming[col] for col in self.id_columns if col in reverse_renaming]
2661
+ id_columns = self.__get_renamed_id_columns()
2662
2662
  self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
2663
2663
  self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
2664
2664
 
@@ -2784,6 +2784,7 @@ class FeaturesEnricher(TransformerMixin):
2784
2784
  unnest_search_keys=unnest_search_keys,
2785
2785
  model_task_type=self.model_task_type,
2786
2786
  cv_type=self.cv,
2787
+ id_columns=self.__get_renamed_id_columns(),
2787
2788
  date_format=self.date_format,
2788
2789
  random_state=self.random_state,
2789
2790
  rest_client=self.rest_client,
@@ -2943,6 +2944,10 @@ class FeaturesEnricher(TransformerMixin):
2943
2944
  def __should_add_date_column(self):
2944
2945
  return self.add_date_if_missing or (self.cv is not None and self.cv.is_time_series())
2945
2946
 
2947
+ def __get_renamed_id_columns(self):
2948
+ reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
2949
+ return [reverse_renaming.get(c) or c for c in self.id_columns]
2950
+
2946
2951
  def __adjust_cv(self, df: pd.DataFrame):
2947
2952
  date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
2948
2953
  # Check Multivariate time series
@@ -1 +0,0 @@
1
- __version__ = "1.2.38a3769.dev4"