upgini 1.2.29a5__tar.gz → 1.2.29a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.29a5 → upgini-1.2.29a6}/PKG-INFO +1 -1
  2. upgini-1.2.29a6/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/features_enricher.py +26 -14
  4. upgini-1.2.29a5/src/upgini/__about__.py +0 -1
  5. {upgini-1.2.29a5 → upgini-1.2.29a6}/.gitignore +0 -0
  6. {upgini-1.2.29a5 → upgini-1.2.29a6}/LICENSE +0 -0
  7. {upgini-1.2.29a5 → upgini-1.2.29a6}/README.md +0 -0
  8. {upgini-1.2.29a5 → upgini-1.2.29a6}/pyproject.toml +0 -0
  9. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/__init__.py +0 -0
  10. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/ads.py +0 -0
  11. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/__init__.py +0 -0
  14. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/all_operands.py +0 -0
  15. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/binary.py +0 -0
  16. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/operand.py +0 -0
  20. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/unary.py +0 -0
  21. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/autofe/vector.py +0 -0
  22. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/dataset.py +0 -0
  25. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/errors.py +0 -0
  26. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/http.py +0 -0
  27. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/metadata.py +0 -0
  31. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/metrics.py +0 -0
  32. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/normalizer/normalize_utils.py +0 -0
  34. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/search_task.py +0 -0
  43. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/spinner.py +0 -0
  44. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  45. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/feature_info.py +0 -0
  57. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/features_validator.py +0 -0
  58. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/format.py +0 -0
  59. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/ip_utils.py +0 -0
  60. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.29a5 → upgini-1.2.29a6}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.29a5
3
+ Version: 1.2.29a6
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.29a6"
@@ -2,6 +2,7 @@ import dataclasses
2
2
  import datetime
3
3
  import gc
4
4
  import hashlib
5
+ import itertools
5
6
  import logging
6
7
  import numbers
7
8
  import os
@@ -159,6 +160,10 @@ class FeaturesEnricher(TransformerMixin):
159
160
 
160
161
  shared_datasets: list of str, optional (default=None)
161
162
  List of private shared dataset ids for custom search
163
+
164
+ select_features: bool, optional (default=False)
165
+ If True, return only selected features both from input and data sources.
166
+ Otherwise, return all features from input and only selected features from data sources.
162
167
  """
163
168
 
164
169
  TARGET_NAME = "target"
@@ -279,7 +284,7 @@ class FeaturesEnricher(TransformerMixin):
279
284
  self._relevant_data_sources_wo_links: pd.DataFrame = self.EMPTY_DATA_SOURCES
280
285
  self.metrics: Optional[pd.DataFrame] = None
281
286
  self.feature_names_ = []
282
- self.client_feature_names_ = []
287
+ self.dropped_client_feature_names_ = []
283
288
  self.feature_importances_ = []
284
289
  self.search_id = search_id
285
290
  self.select_features = select_features
@@ -2071,7 +2076,7 @@ class FeaturesEnricher(TransformerMixin):
2071
2076
  is_demo_dataset = hash_input(validated_X) in DEMO_DATASET_HASHES
2072
2077
 
2073
2078
  columns_to_drop = [
2074
- c for c in validated_X.columns if c in self.feature_names_ and c not in self.client_feature_names_
2079
+ c for c in validated_X.columns if c in self.feature_names_ and c in self.dropped_client_feature_names_
2075
2080
  ]
2076
2081
  if len(columns_to_drop) > 0:
2077
2082
  msg = self.bundle.get("x_contains_enriching_columns").format(columns_to_drop)
@@ -2328,11 +2333,13 @@ class FeaturesEnricher(TransformerMixin):
2328
2333
  else:
2329
2334
  result = enrich()
2330
2335
 
2331
- filtered_columns = self.__filtered_enriched_features(importance_threshold, max_features)
2332
- existing_filtered_columns = [
2333
- c for c in filtered_columns if c in result.columns and c not in validated_X.columns
2336
+ selecting_columns = [
2337
+ c
2338
+ for c in itertools.chain(validated_X.columns.tolist(), generated_features)
2339
+ if c not in self.dropped_client_feature_names_
2334
2340
  ]
2335
- selecting_columns = validated_X.columns.tolist() + generated_features + existing_filtered_columns
2341
+ filtered_columns = self.__filtered_enriched_features(importance_threshold, max_features)
2342
+ selecting_columns.extend(c for c in filtered_columns if c in result.columns and c not in validated_X.columns)
2336
2343
  if add_fit_system_record_id:
2337
2344
  selecting_columns.append(SORT_ID)
2338
2345
 
@@ -3510,7 +3517,7 @@ class FeaturesEnricher(TransformerMixin):
3510
3517
  features_df = self._search_task.get_all_initial_raw_features(trace_id, metrics_calculation=True)
3511
3518
 
3512
3519
  self.feature_names_ = []
3513
- self.client_feature_names_ = []
3520
+ self.dropped_client_feature_names_ = []
3514
3521
  self.feature_importances_ = []
3515
3522
  features_info = []
3516
3523
  features_info_without_links = []
@@ -3520,18 +3527,23 @@ class FeaturesEnricher(TransformerMixin):
3520
3527
  for feature_meta in features_meta:
3521
3528
  if feature_meta.name in original_names_dict.keys():
3522
3529
  feature_meta.name = original_names_dict[feature_meta.name]
3530
+
3531
+ is_client_feature = feature_meta.name in x_columns
3532
+
3533
+ if feature_meta.shap_value == 0.0:
3534
+ if self.select_features:
3535
+ self.dropped_client_feature_names_.append(feature_meta.name)
3536
+ continue
3537
+
3523
3538
  # Use only important features
3524
3539
  if (
3525
- (feature_meta.shap_value == 0.0)
3526
- or (feature_meta.name in self.fit_generated_features)
3527
- or (feature_meta.name == COUNTRY)
3540
+ feature_meta.name in self.fit_generated_features
3541
+ or feature_meta.name == COUNTRY
3542
+ # In select_features mode we select also from etalon features and need to show them
3543
+ or (not self.select_features and is_client_feature)
3528
3544
  ):
3529
3545
  continue
3530
3546
 
3531
- is_client_feature = feature_meta.name in x_columns
3532
- # In select_features mode we select also from etalon features and need to show them
3533
- if not self.select_features and is_client_feature:
3534
- continue
3535
3547
 
3536
3548
  self.feature_names_.append(feature_meta.name)
3537
3549
  self.feature_importances_.append(_round_shap_value(feature_meta.shap_value))
@@ -1 +0,0 @@
1
- __version__ = "1.2.29a5"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes