upgini 1.2.115a1__tar.gz → 1.2.117a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {upgini-1.2.115a1 → upgini-1.2.117a1}/PKG-INFO +4 -3
  2. {upgini-1.2.115a1 → upgini-1.2.117a1}/README.md +1 -1
  3. {upgini-1.2.115a1 → upgini-1.2.117a1}/pyproject.toml +3 -2
  4. upgini-1.2.117a1/src/upgini/__about__.py +1 -0
  5. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/features_enricher.py +15 -13
  6. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/metrics.py +3 -3
  7. upgini-1.2.115a1/src/upgini/__about__.py +0 -1
  8. {upgini-1.2.115a1 → upgini-1.2.117a1}/.gitignore +0 -0
  9. {upgini-1.2.115a1 → upgini-1.2.117a1}/LICENSE +0 -0
  10. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/all_operators.py +0 -0
  16. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/operator.py +0 -0
  21. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/__init__.py +0 -0
  22. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/base.py +0 -0
  23. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/cross.py +0 -0
  24. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/delta.py +0 -0
  25. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/lag.py +0 -0
  26. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/roll.py +0 -0
  27. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/trend.py +0 -0
  28. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/volatility.py +0 -0
  29. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/unary.py +0 -0
  30. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/utils.py +0 -0
  31. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/vector.py +0 -0
  32. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/data_source/__init__.py +0 -0
  33. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/data_source/data_source_publisher.py +0 -0
  34. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/dataset.py +0 -0
  35. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/errors.py +0 -0
  36. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/http.py +0 -0
  37. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/metadata.py +0 -0
  40. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/normalizer/__init__.py +0 -0
  41. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/normalizer/normalize_utils.py +0 -0
  42. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/__init__.py +0 -0
  43. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/exceptions.py +0 -0
  44. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/strings.properties +0 -0
  45. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  46. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/__init__.py +0 -0
  47. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/base.py +0 -0
  48. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/random_under_sampler.py +0 -0
  49. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/utils.py +0 -0
  50. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/search_task.py +0 -0
  51. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/spinner.py +0 -0
  52. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  53. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/__init__.py +0 -0
  54. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/base_search_key_detector.py +0 -0
  55. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/blocked_time_series.py +0 -0
  56. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/config.py +0 -0
  57. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/country_utils.py +0 -0
  58. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/custom_loss_utils.py +0 -0
  59. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/cv_utils.py +0 -0
  60. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/datetime_utils.py +0 -0
  61. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/deduplicate_utils.py +0 -0
  62. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/display_utils.py +0 -0
  63. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/feature_info.py +0 -0
  66. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/phone_utils.py +0 -0
  72. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/postal_code_utils.py +0 -0
  73. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/psi.py +0 -0
  75. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/sample_utils.py +0 -0
  76. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/sklearn_ext.py +0 -0
  77. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.115a1
3
+ Version: 1.2.117a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -19,9 +19,10 @@ Classifier: License :: OSI Approved :: BSD License
19
19
  Classifier: Operating System :: OS Independent
20
20
  Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
22
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
24
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
24
- Requires-Python: <3.12,>=3.10
25
+ Requires-Python: <3.13,>=3.10
25
26
  Requires-Dist: catboost>=1.0.3
26
27
  Requires-Dist: category-encoders>=2.8.1
27
28
  Requires-Dist: fastparquet>=0.8.1
@@ -904,7 +905,7 @@ Some convenient ways to start contributing are:
904
905
  ⚙️ **Gitpod** [![Gitpod Ready-to-Code](https://img.shields.io/badge/Gitpod-Ready--to--Code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/upgini/upgini) You can use Gitpod to launch a fully functional development environment right in your browser.
905
906
 
906
907
  ## 🔗 Useful links
907
- - [Simple sales predictions as a template notebook](#-simple-sales-predictions-use-as-a-template)
908
+ - [Simple sales predictions as a template notebook](#-simple-sales-prediction-for-retail-stores)
908
909
  - [Full list of Kaggle Guides & Examples](https://www.kaggle.com/romaupgini/code)
909
910
  - [Project on PyPI](https://pypi.org/project/upgini)
910
911
  - [More perks for registered users](https://profile.upgini.com)
@@ -858,7 +858,7 @@ Some convenient ways to start contributing are:
858
858
  ⚙️ **Gitpod** [![Gitpod Ready-to-Code](https://img.shields.io/badge/Gitpod-Ready--to--Code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/upgini/upgini) You can use Gitpod to launch a fully functional development environment right in your browser.
859
859
 
860
860
  ## 🔗 Useful links
861
- - [Simple sales predictions as a template notebook](#-simple-sales-predictions-use-as-a-template)
861
+ - [Simple sales predictions as a template notebook](#-simple-sales-prediction-for-retail-stores)
862
862
  - [Full list of Kaggle Guides & Examples](https://www.kaggle.com/romaupgini/code)
863
863
  - [Project on PyPI](https://pypi.org/project/upgini)
864
864
  - [More perks for registered users](https://profile.upgini.com)
@@ -7,7 +7,7 @@ name = "upgini"
7
7
  dynamic = ["version"]
8
8
  description = "Intelligent data search & enrichment for Machine Learning"
9
9
  readme = "README.md"
10
- requires-python = ">=3.10,<3.12"
10
+ requires-python = ">=3.10,<3.13"
11
11
  authors = [
12
12
  { name = "Upgini Developers", email = "madewithlove@upgini.com" },
13
13
  ]
@@ -30,6 +30,7 @@ classifiers = [
30
30
  "Operating System :: OS Independent",
31
31
  "Programming Language :: Python :: 3.10",
32
32
  "Programming Language :: Python :: 3.11",
33
+ "Programming Language :: Python :: 3.12",
33
34
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
34
35
  "Topic :: Scientific/Engineering :: Information Analysis",
35
36
  ]
@@ -90,7 +91,7 @@ lint = "ruff check {args}"
90
91
  test_all = 'pytest -s -vv tests'
91
92
 
92
93
  [[tool.hatch.envs.test.matrix]]
93
- python = ["3.10", "3.11"]
94
+ python = ["3.10", "3.11", "3.12"]
94
95
  pandas = ["1.2.0", "1.3.0", "1.4.0", "1.5.0", "2.0.0", "2.1.0", "2.2.0"]
95
96
 
96
97
  # from versions: 0.1, 0.2, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.4.3, 0.5.0, 0.6.0, 0.6.1, 0.7.0, 0.7.1, 0.7.2, 0.7.3, 0.8.0, 0.8.1, 0.9.0, 0.9.1, 0.10.0, 0.10.1, 0.11.0, 0.12.0, 0.13.0, 0.13.1, 0.14.0, 0.14.1, 0.15.0, 0.15.1, 0.15.2, 0.16.0, 0.16.1, 0.16.2, 0.17.0, 0.17.1, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.19.2, 0.20.0, 0.20.1, 0.20.2, 0.20.3, 0.21.0, 0.21.1, 0.22.0, 0.23.0, 0.23.1, 0.23.2, 0.23.3, 0.23.4, 0.24.0, 0.24.1, 0.24.2, 0.25.0, 0.25.1, 0.25.2, 0.25.3, 1.0.0, 1.0.1, 1.0.2, 1.0.3, 1.0.4, 1.0.5, 1.1.0, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.0, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.2.5, 1.3.0, 1.3.1, 1.3.2, 1.3.3, 1.3.4, 1.3.5, 1.4.0rc0, 1.4.0, 1.4.1, 1.4.2, 1.4.3, 1.4.4, 1.5.0rc0, 1.5.0, 1.5.1, 1.5.2, 1.5.3, 2.0.0rc0, 2.0.0rc1, 2.0.0, 2.0.1, 2.0.2, 2.0.3
@@ -0,0 +1 @@
1
+ __version__ = "1.2.117a1"
@@ -584,7 +584,7 @@ class FeaturesEnricher(TransformerMixin):
584
584
  eval_set: list[tuple] | tuple | None = None,
585
585
  *args,
586
586
  exclude_features_sources: list[str] | None | None = None,
587
- keep_input: bool | None = None,
587
+ keep_input: bool = True,
588
588
  calculate_metrics: bool | None = None,
589
589
  scoring: Callable | str | None = None,
590
590
  estimator: Any | None = None,
@@ -612,10 +612,9 @@ class FeaturesEnricher(TransformerMixin):
612
612
  list of pairs (X, y) for validation.
613
613
 
614
614
  keep_input: bool, optional (default=True)
615
- keep_input: bool, optional (default=None)
616
- If True, copy original input columns to the output dataframe.
615
+ keep_input: bool, optional (default=True)
616
+ If True, then all search keys, ID columns, selected client features and enriched columns will be returned.
617
617
  If False, then only enriched columns are returned.
618
- If None, then all search keys, ID columns, selected client features and enriched columns will be returned.
619
618
 
620
619
  estimator: sklearn-compatible estimator, optional (default=None)
621
620
  Custom estimator for metrics calculation.
@@ -751,7 +750,7 @@ class FeaturesEnricher(TransformerMixin):
751
750
  *args,
752
751
  y: pd.Series | None = None,
753
752
  exclude_features_sources: list[str] | None = None,
754
- keep_input: bool | None = None,
753
+ keep_input: bool = True,
755
754
  trace_id: str | None = None,
756
755
  metrics_calculation: bool = False,
757
756
  silent_mode=False,
@@ -768,10 +767,11 @@ class FeaturesEnricher(TransformerMixin):
768
767
  X: pandas.DataFrame of shape (n_samples, n_features)
769
768
  Input samples.
770
769
 
771
- keep_input: bool, optional (default=None)
772
- If True, copy original input columns to the output dataframe.
770
+ keep_input: bool, optional (default=True)
771
+ keep_input: bool, optional (default=True)
772
+ If True, then all search keys, ID columns, selected client features, enriched columns and intput columns
773
+ that were not present on fit will be returned.
773
774
  If False, then only enriched columns are returned.
774
- If None, then all search keys, ID columns, selected client features and enriched columns will be returned.
775
775
 
776
776
  Returns
777
777
  -------
@@ -2178,7 +2178,7 @@ class FeaturesEnricher(TransformerMixin):
2178
2178
  df = self.__combine_train_and_eval_sets(validated_X, validated_y, eval_set)
2179
2179
 
2180
2180
  # Exclude OOT eval sets from transform because they are not used for metrics calculation
2181
- if not is_for_metrics and EVAL_SET_INDEX in df.columns:
2181
+ if is_for_metrics and EVAL_SET_INDEX in df.columns:
2182
2182
  for eval_index in df[EVAL_SET_INDEX].unique():
2183
2183
  if eval_index == 0:
2184
2184
  continue
@@ -2482,7 +2482,7 @@ if response.status_code == 200:
2482
2482
  progress_bar: ProgressBar | None = None,
2483
2483
  progress_callback: Callable[[SearchProgress], Any] | None = None,
2484
2484
  add_fit_system_record_id: bool = False,
2485
- keep_input: bool | None = None,
2485
+ keep_input: bool = True,
2486
2486
  ) -> tuple[pd.DataFrame, dict[str, str], list[str], dict[str, SearchKey]]:
2487
2487
  if self._search_task is None:
2488
2488
  raise NotFittedError(self.bundle.get("transform_unfitted_enricher"))
@@ -2831,21 +2831,23 @@ if response.status_code == 200:
2831
2831
  how="left",
2832
2832
  )
2833
2833
 
2834
+ fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
2835
+ new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
2836
+
2834
2837
  selected_generated_features = [
2835
2838
  c for c in generated_features if not self.fit_select_features or c in self.feature_names_
2836
2839
  ]
2837
- if keep_input is None:
2840
+ if keep_input is True:
2838
2841
  selected_input_columns = [
2839
2842
  c
2840
2843
  for c in validated_Xy.columns
2841
2844
  if not self.fit_select_features
2842
2845
  or c in self.feature_names_
2846
+ or c in new_columns_on_transform
2843
2847
  or c in self.search_keys
2844
2848
  or c in (self.id_columns or [])
2845
2849
  or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
2846
2850
  ]
2847
- elif keep_input is True:
2848
- selected_input_columns = validated_Xy.columns.to_list()
2849
2851
  else:
2850
2852
  selected_input_columns = []
2851
2853
 
@@ -815,9 +815,9 @@ class CatBoostWrapper(EstimatorWrapper):
815
815
  encoded = cat_encoder.transform(x[self.cat_features]).astype(int)
816
816
  else:
817
817
  encoded = cat_encoder.transform(x[self.cat_features])
818
- cat_features = encoded.columns.to_list()
819
- x.drop(columns=encoded.columns, inplace=True, errors="ignore")
820
- x[encoded.columns] = encoded
818
+ cat_features = self.cat_features
819
+ x = x.drop(columns=self.cat_features, errors="ignore")
820
+ x[self.cat_features] = encoded
821
821
  else:
822
822
  cat_features = self.cat_features
823
823
 
@@ -1 +0,0 @@
1
- __version__ = "1.2.115a1"
File without changes
File without changes
File without changes