upgini 1.1.152__tar.gz → 1.1.154a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (68) hide show
  1. {upgini-1.1.152/src/upgini.egg-info → upgini-1.1.154a1}/PKG-INFO +1 -1
  2. {upgini-1.1.152 → upgini-1.1.154a1}/setup.py +1 -1
  3. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/features_enricher.py +60 -50
  4. {upgini-1.1.152 → upgini-1.1.154a1/src/upgini.egg-info}/PKG-INFO +1 -1
  5. {upgini-1.1.152 → upgini-1.1.154a1}/LICENSE +0 -0
  6. {upgini-1.1.152 → upgini-1.1.154a1}/README.md +0 -0
  7. {upgini-1.1.152 → upgini-1.1.154a1}/pyproject.toml +0 -0
  8. {upgini-1.1.152 → upgini-1.1.154a1}/setup.cfg +0 -0
  9. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/__init__.py +0 -0
  10. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/ads.py +0 -0
  11. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/data_source/__init__.py +0 -0
  14. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/data_source/data_source_publisher.py +0 -0
  15. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/dataset.py +0 -0
  16. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/errors.py +0 -0
  17. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/fingerprint.js +0 -0
  18. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/http.py +0 -0
  19. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/mdc/__init__.py +0 -0
  20. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/mdc/context.py +0 -0
  21. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/metadata.py +0 -0
  22. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/metrics.py +0 -0
  23. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/normalizer/__init__.py +0 -0
  24. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/normalizer/phone_normalizer.py +0 -0
  25. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/resource_bundle/__init__.py +0 -0
  26. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/resource_bundle/exceptions.py +0 -0
  27. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/resource_bundle/strings.properties +0 -0
  28. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/sampler/__init__.py +0 -0
  29. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/sampler/base.py +0 -0
  30. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/sampler/random_under_sampler.py +0 -0
  31. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/sampler/utils.py +0 -0
  32. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/search_task.py +0 -0
  33. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/spinner.py +0 -0
  34. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/__init__.py +0 -0
  35. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/base_search_key_detector.py +0 -0
  36. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/blocked_time_series.py +0 -0
  37. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/country_utils.py +0 -0
  38. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/custom_loss_utils.py +0 -0
  39. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/cv_utils.py +0 -0
  40. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/datetime_utils.py +0 -0
  41. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/display_utils.py +0 -0
  42. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/email_utils.py +0 -0
  43. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/features_validator.py +0 -0
  44. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/format.py +0 -0
  45. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/ip_utils.py +0 -0
  46. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/phone_utils.py +0 -0
  47. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/postal_code_utils.py +0 -0
  48. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/target_utils.py +0 -0
  49. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/track_info.py +0 -0
  50. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/utils/warning_counter.py +0 -0
  51. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini/version_validator.py +0 -0
  52. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini.egg-info/SOURCES.txt +0 -0
  53. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini.egg-info/dependency_links.txt +0 -0
  54. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini.egg-info/requires.txt +0 -0
  55. {upgini-1.1.152 → upgini-1.1.154a1}/src/upgini.egg-info/top_level.txt +0 -0
  56. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_binary_dataset.py +0 -0
  57. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_blocked_time_series.py +0 -0
  58. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_categorical_dataset.py +0 -0
  59. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_continuous_dataset.py +0 -0
  60. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_country_utils.py +0 -0
  61. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_custom_loss_utils.py +0 -0
  62. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_datetime_utils.py +0 -0
  63. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_email_utils.py +0 -0
  64. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_etalon_validation.py +0 -0
  65. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_features_enricher.py +0 -0
  66. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_metrics.py +0 -0
  67. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_phone_utils.py +0 -0
  68. {upgini-1.1.152 → upgini-1.1.154a1}/tests/test_postal_code_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.152
3
+ Version: 1.1.154a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -35,7 +35,7 @@ def send_log(msg: str):
35
35
 
36
36
 
37
37
  here = Path(__file__).parent.resolve()
38
- version = "1.1.152"
38
+ version = "1.1.154a1"
39
39
  try:
40
40
  send_log(f"Start setup PyLib version {version}")
41
41
  setup(
@@ -142,6 +142,7 @@ class FeaturesEnricher(TransformerMixin):
142
142
  round_embeddings: Optional[int] = None,
143
143
  logs_enabled: bool = True,
144
144
  raise_validation_error: bool = False,
145
+ exclude_columns: Optional[List[str]] = None,
145
146
  **kwargs,
146
147
  ):
147
148
  self._api_key = api_key or os.environ.get(UPGINI_API_KEY)
@@ -207,21 +208,19 @@ class FeaturesEnricher(TransformerMixin):
207
208
  self.shared_datasets = shared_datasets
208
209
  if shared_datasets is not None:
209
210
  self.runtime_parameters.properties["shared_datasets"] = ",".join(shared_datasets)
210
- self.generate_features: Optional[List[str]] = None
211
+ self.generate_features = generate_features
212
+ self.round_embeddings = round_embeddings
211
213
  if generate_features is not None:
212
214
  if len(generate_features) > 2:
213
215
  msg = bundle.get("too_many_generate_features")
214
216
  self.logger.error(msg)
215
217
  raise ValidationError(msg)
216
- self.generate_features = generate_features
217
218
  self.runtime_parameters.properties["generate_features"] = ",".join(generate_features)
218
- self.round_embeddings: Optional[int] = None
219
219
  if round_embeddings is not None:
220
220
  if not isinstance(round_embeddings, int) or round_embeddings < 0:
221
221
  msg = bundle.get("invalid_round_embeddings")
222
222
  self.logger.error(msg)
223
223
  raise ValidationError(msg)
224
- self.round_embeddings = round_embeddings
225
224
  self.runtime_parameters.properties["round_embeddings"] = round_embeddings
226
225
 
227
226
  self.passed_features: List[str] = []
@@ -238,6 +237,7 @@ class FeaturesEnricher(TransformerMixin):
238
237
  self.imbalanced = False
239
238
  self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict]] = None
240
239
  self.raise_validation_error = raise_validation_error
240
+ self.exclude_columns = exclude_columns
241
241
 
242
242
  def _get_api_key(self):
243
243
  return self._api_key
@@ -678,11 +678,11 @@ class FeaturesEnricher(TransformerMixin):
678
678
 
679
679
  try:
680
680
  self.__log_debug_information(
681
- X,
682
- y,
683
- eval_set,
681
+ X if X is not None else self.X,
682
+ y if y is not None else self.y,
683
+ eval_set if eval_set is not None else self.eval_set,
684
684
  exclude_features_sources=exclude_features_sources,
685
- cv=cv,
685
+ cv=cv if cv is not None else self.cv,
686
686
  importance_threshold=importance_threshold,
687
687
  max_features=max_features,
688
688
  scoring=scoring,
@@ -1801,6 +1801,9 @@ class FeaturesEnricher(TransformerMixin):
1801
1801
  if not is_transform and not validated_X.index.is_unique:
1802
1802
  raise ValidationError(bundle.get("x_non_unique_index"))
1803
1803
 
1804
+ if self.exclude_columns is not None:
1805
+ validated_X = drop_existing_columns(validated_X, self.exclude_columns)
1806
+
1804
1807
  if TARGET in validated_X.columns:
1805
1808
  raise ValidationError(bundle.get("x_contains_reserved_column_name").format(TARGET))
1806
1809
  if not is_transform and EVAL_SET_INDEX in validated_X.columns:
@@ -1970,51 +1973,58 @@ class FeaturesEnricher(TransformerMixin):
1970
1973
  estimator: Optional[Any] = None,
1971
1974
  remove_outliers_calc_metrics: Optional[bool] = None,
1972
1975
  ):
1973
- resolved_api_key = self.api_key or os.environ.get(UPGINI_API_KEY)
1974
- self.logger.info(
1975
- f"Search keys: {self.search_keys}\n"
1976
- f"Country code: {self.country_code}\n"
1977
- f"Model task type: {self.model_task_type}\n"
1978
- f"Api key presented?: {resolved_api_key is not None and resolved_api_key != ''}\n"
1979
- f"Endpoint: {self.endpoint}\n"
1980
- f"Runtime parameters: {self.runtime_parameters}\n"
1981
- f"Date format: {self.date_format}\n"
1982
- f"CV: {cv}\n"
1983
- f"importance_threshold: {importance_threshold}\n"
1984
- f"max_features: {max_features}"
1985
- f"Shared datasets: {self.shared_datasets}\n"
1986
- f"Random state: {self.random_state}\n"
1987
- f"Generate features: {self.generate_features}\n"
1988
- f"Round embeddings: {self.round_embeddings}\n"
1989
- f"Detect missing search keys: {self.detect_missing_search_keys}\n"
1990
- f"Exclude features sources: {exclude_features_sources}\n"
1991
- f"Calculate metrics: {calculate_metrics}\n"
1992
- f"Scoring: {scoring}\n"
1993
- f"Estimator: {estimator}\n"
1994
- f"Remove target outliers: {remove_outliers_calc_metrics}\n"
1995
- f"Search id: {self.search_id}\n"
1996
- )
1976
+ try:
1977
+ resolved_api_key = self.api_key or os.environ.get(UPGINI_API_KEY)
1978
+ self.logger.info(
1979
+ f"Search keys: {self.search_keys}\n"
1980
+ f"Country code: {self.country_code}\n"
1981
+ f"Model task type: {self.model_task_type}\n"
1982
+ f"Api key presented?: {resolved_api_key is not None and resolved_api_key != ''}\n"
1983
+ f"Endpoint: {self.endpoint}\n"
1984
+ f"Runtime parameters: {self.runtime_parameters}\n"
1985
+ f"Date format: {self.date_format}\n"
1986
+ f"CV: {cv}\n"
1987
+ f"importance_threshold: {importance_threshold}\n"
1988
+ f"max_features: {max_features}\n"
1989
+ f"Shared datasets: {self.shared_datasets}\n"
1990
+ f"Random state: {self.random_state}\n"
1991
+ f"Generate features: {self.generate_features}\n"
1992
+ f"Round embeddings: {self.round_embeddings}\n"
1993
+ f"Detect missing search keys: {self.detect_missing_search_keys}\n"
1994
+ f"Exclude features sources: {exclude_features_sources}\n"
1995
+ f"Calculate metrics: {calculate_metrics}\n"
1996
+ f"Scoring: {scoring}\n"
1997
+ f"Estimator: {estimator}\n"
1998
+ f"Remove target outliers: {remove_outliers_calc_metrics}\n"
1999
+ f"Exclude columns: {self.exclude_columns}\n"
2000
+ f"Search id: {self.search_id}\n"
2001
+ )
1997
2002
 
1998
- def sample(df):
1999
- if isinstance(df, pd.Series) or isinstance(df, pd.DataFrame):
2000
- return df.head(10)
2001
- else:
2002
- return df[:10]
2003
+ def sample(df):
2004
+ if isinstance(df, pd.Series) or isinstance(df, pd.DataFrame):
2005
+ return df.head(10)
2006
+ else:
2007
+ return df[:10]
2003
2008
 
2004
- def print_datasets_sample():
2005
- self.logger.info(f"First 10 rows of the X with shape {X.shape}:\n{sample(X)}")
2006
- if y is not None:
2007
- self.logger.info(f"First 10 rows of the y with shape {_num_samples(y)}:\n{sample(y)}")
2008
- if eval_set is not None:
2009
- for idx, eval_pair in enumerate(eval_set):
2010
- eval_X: pd.DataFrame = eval_pair[0]
2011
- eval_y = eval_pair[1]
2012
- self.logger.info(f"First 10 rows of the eval_X_{idx} with shape {eval_X.shape}:\n{sample(eval_X)}")
2013
- self.logger.info(
2014
- f"First 10 rows of the eval_y_{idx} with shape {_num_samples(eval_y)}:\n{sample(eval_y)}"
2015
- )
2009
+ def print_datasets_sample():
2010
+ if X is not None:
2011
+ self.logger.info(f"First 10 rows of the X with shape {X.shape}:\n{sample(X)}")
2012
+ if y is not None:
2013
+ self.logger.info(f"First 10 rows of the y with shape {_num_samples(y)}:\n{sample(y)}")
2014
+ if eval_set is not None:
2015
+ for idx, eval_pair in enumerate(eval_set):
2016
+ eval_X: pd.DataFrame = eval_pair[0]
2017
+ eval_y = eval_pair[1]
2018
+ self.logger.info(
2019
+ f"First 10 rows of the eval_X_{idx} with shape {eval_X.shape}:\n{sample(eval_X)}"
2020
+ )
2021
+ self.logger.info(
2022
+ f"First 10 rows of the eval_y_{idx} with shape {_num_samples(eval_y)}:\n{sample(eval_y)}"
2023
+ )
2016
2024
 
2017
- do_without_pandas_limits(print_datasets_sample)
2025
+ do_without_pandas_limits(print_datasets_sample)
2026
+ except Exception:
2027
+ self.logger.exception("Failed to log debug information")
2018
2028
 
2019
2029
  def __handle_index_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
2020
2030
  index_names = df.index.names if df.index.names != [None] else [DEFAULT_INDEX]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.152
3
+ Version: 1.1.154a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes