upgini 1.2.115a1__tar.gz → 1.2.117a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.115a1 → upgini-1.2.117a1}/PKG-INFO +4 -3
- {upgini-1.2.115a1 → upgini-1.2.117a1}/README.md +1 -1
- {upgini-1.2.115a1 → upgini-1.2.117a1}/pyproject.toml +3 -2
- upgini-1.2.117a1/src/upgini/__about__.py +1 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/features_enricher.py +15 -13
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/metrics.py +3 -3
- upgini-1.2.115a1/src/upgini/__about__.py +0 -1
- {upgini-1.2.115a1 → upgini-1.2.117a1}/.gitignore +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/LICENSE +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/ads.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/dataset.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/errors.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/http.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/metadata.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/search_task.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/spinner.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/config.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/hash_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/psi.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.115a1 → upgini-1.2.117a1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.117a1
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -19,9 +19,10 @@ Classifier: License :: OSI Approved :: BSD License
|
|
19
19
|
Classifier: Operating System :: OS Independent
|
20
20
|
Classifier: Programming Language :: Python :: 3.10
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
22
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
23
24
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
24
|
-
Requires-Python: <3.
|
25
|
+
Requires-Python: <3.13,>=3.10
|
25
26
|
Requires-Dist: catboost>=1.0.3
|
26
27
|
Requires-Dist: category-encoders>=2.8.1
|
27
28
|
Requires-Dist: fastparquet>=0.8.1
|
@@ -904,7 +905,7 @@ Some convenient ways to start contributing are:
|
|
904
905
|
⚙️ **Gitpod** [](https://gitpod.io/#https://github.com/upgini/upgini) You can use Gitpod to launch a fully functional development environment right in your browser.
|
905
906
|
|
906
907
|
## 🔗 Useful links
|
907
|
-
- [Simple sales predictions as a template notebook](#-simple-sales-
|
908
|
+
- [Simple sales predictions as a template notebook](#-simple-sales-prediction-for-retail-stores)
|
908
909
|
- [Full list of Kaggle Guides & Examples](https://www.kaggle.com/romaupgini/code)
|
909
910
|
- [Project on PyPI](https://pypi.org/project/upgini)
|
910
911
|
- [More perks for registered users](https://profile.upgini.com)
|
@@ -858,7 +858,7 @@ Some convenient ways to start contributing are:
|
|
858
858
|
⚙️ **Gitpod** [](https://gitpod.io/#https://github.com/upgini/upgini) You can use Gitpod to launch a fully functional development environment right in your browser.
|
859
859
|
|
860
860
|
## 🔗 Useful links
|
861
|
-
- [Simple sales predictions as a template notebook](#-simple-sales-
|
861
|
+
- [Simple sales predictions as a template notebook](#-simple-sales-prediction-for-retail-stores)
|
862
862
|
- [Full list of Kaggle Guides & Examples](https://www.kaggle.com/romaupgini/code)
|
863
863
|
- [Project on PyPI](https://pypi.org/project/upgini)
|
864
864
|
- [More perks for registered users](https://profile.upgini.com)
|
@@ -7,7 +7,7 @@ name = "upgini"
|
|
7
7
|
dynamic = ["version"]
|
8
8
|
description = "Intelligent data search & enrichment for Machine Learning"
|
9
9
|
readme = "README.md"
|
10
|
-
requires-python = ">=3.10,<3.
|
10
|
+
requires-python = ">=3.10,<3.13"
|
11
11
|
authors = [
|
12
12
|
{ name = "Upgini Developers", email = "madewithlove@upgini.com" },
|
13
13
|
]
|
@@ -30,6 +30,7 @@ classifiers = [
|
|
30
30
|
"Operating System :: OS Independent",
|
31
31
|
"Programming Language :: Python :: 3.10",
|
32
32
|
"Programming Language :: Python :: 3.11",
|
33
|
+
"Programming Language :: Python :: 3.12",
|
33
34
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
34
35
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
35
36
|
]
|
@@ -90,7 +91,7 @@ lint = "ruff check {args}"
|
|
90
91
|
test_all = 'pytest -s -vv tests'
|
91
92
|
|
92
93
|
[[tool.hatch.envs.test.matrix]]
|
93
|
-
python = ["3.10", "3.11"]
|
94
|
+
python = ["3.10", "3.11", "3.12"]
|
94
95
|
pandas = ["1.2.0", "1.3.0", "1.4.0", "1.5.0", "2.0.0", "2.1.0", "2.2.0"]
|
95
96
|
|
96
97
|
# from versions: 0.1, 0.2, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.4.3, 0.5.0, 0.6.0, 0.6.1, 0.7.0, 0.7.1, 0.7.2, 0.7.3, 0.8.0, 0.8.1, 0.9.0, 0.9.1, 0.10.0, 0.10.1, 0.11.0, 0.12.0, 0.13.0, 0.13.1, 0.14.0, 0.14.1, 0.15.0, 0.15.1, 0.15.2, 0.16.0, 0.16.1, 0.16.2, 0.17.0, 0.17.1, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.19.2, 0.20.0, 0.20.1, 0.20.2, 0.20.3, 0.21.0, 0.21.1, 0.22.0, 0.23.0, 0.23.1, 0.23.2, 0.23.3, 0.23.4, 0.24.0, 0.24.1, 0.24.2, 0.25.0, 0.25.1, 0.25.2, 0.25.3, 1.0.0, 1.0.1, 1.0.2, 1.0.3, 1.0.4, 1.0.5, 1.1.0, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.0, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.2.5, 1.3.0, 1.3.1, 1.3.2, 1.3.3, 1.3.4, 1.3.5, 1.4.0rc0, 1.4.0, 1.4.1, 1.4.2, 1.4.3, 1.4.4, 1.5.0rc0, 1.5.0, 1.5.1, 1.5.2, 1.5.3, 2.0.0rc0, 2.0.0rc1, 2.0.0, 2.0.1, 2.0.2, 2.0.3
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.2.117a1"
|
@@ -584,7 +584,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
584
584
|
eval_set: list[tuple] | tuple | None = None,
|
585
585
|
*args,
|
586
586
|
exclude_features_sources: list[str] | None | None = None,
|
587
|
-
keep_input: bool
|
587
|
+
keep_input: bool = True,
|
588
588
|
calculate_metrics: bool | None = None,
|
589
589
|
scoring: Callable | str | None = None,
|
590
590
|
estimator: Any | None = None,
|
@@ -612,10 +612,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
612
612
|
list of pairs (X, y) for validation.
|
613
613
|
|
614
614
|
keep_input: bool, optional (default=True)
|
615
|
-
keep_input: bool, optional (default=
|
616
|
-
If True,
|
615
|
+
keep_input: bool, optional (default=True)
|
616
|
+
If True, then all search keys, ID columns, selected client features and enriched columns will be returned.
|
617
617
|
If False, then only enriched columns are returned.
|
618
|
-
If None, then all search keys, ID columns, selected client features and enriched columns will be returned.
|
619
618
|
|
620
619
|
estimator: sklearn-compatible estimator, optional (default=None)
|
621
620
|
Custom estimator for metrics calculation.
|
@@ -751,7 +750,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
751
750
|
*args,
|
752
751
|
y: pd.Series | None = None,
|
753
752
|
exclude_features_sources: list[str] | None = None,
|
754
|
-
keep_input: bool
|
753
|
+
keep_input: bool = True,
|
755
754
|
trace_id: str | None = None,
|
756
755
|
metrics_calculation: bool = False,
|
757
756
|
silent_mode=False,
|
@@ -768,10 +767,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
768
767
|
X: pandas.DataFrame of shape (n_samples, n_features)
|
769
768
|
Input samples.
|
770
769
|
|
771
|
-
keep_input: bool, optional (default=
|
772
|
-
|
770
|
+
keep_input: bool, optional (default=True)
|
771
|
+
keep_input: bool, optional (default=True)
|
772
|
+
If True, then all search keys, ID columns, selected client features, enriched columns and intput columns
|
773
|
+
that were not present on fit will be returned.
|
773
774
|
If False, then only enriched columns are returned.
|
774
|
-
If None, then all search keys, ID columns, selected client features and enriched columns will be returned.
|
775
775
|
|
776
776
|
Returns
|
777
777
|
-------
|
@@ -2178,7 +2178,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
2178
2178
|
df = self.__combine_train_and_eval_sets(validated_X, validated_y, eval_set)
|
2179
2179
|
|
2180
2180
|
# Exclude OOT eval sets from transform because they are not used for metrics calculation
|
2181
|
-
if
|
2181
|
+
if is_for_metrics and EVAL_SET_INDEX in df.columns:
|
2182
2182
|
for eval_index in df[EVAL_SET_INDEX].unique():
|
2183
2183
|
if eval_index == 0:
|
2184
2184
|
continue
|
@@ -2482,7 +2482,7 @@ if response.status_code == 200:
|
|
2482
2482
|
progress_bar: ProgressBar | None = None,
|
2483
2483
|
progress_callback: Callable[[SearchProgress], Any] | None = None,
|
2484
2484
|
add_fit_system_record_id: bool = False,
|
2485
|
-
keep_input: bool
|
2485
|
+
keep_input: bool = True,
|
2486
2486
|
) -> tuple[pd.DataFrame, dict[str, str], list[str], dict[str, SearchKey]]:
|
2487
2487
|
if self._search_task is None:
|
2488
2488
|
raise NotFittedError(self.bundle.get("transform_unfitted_enricher"))
|
@@ -2831,21 +2831,23 @@ if response.status_code == 200:
|
|
2831
2831
|
how="left",
|
2832
2832
|
)
|
2833
2833
|
|
2834
|
+
fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
|
2835
|
+
new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
|
2836
|
+
|
2834
2837
|
selected_generated_features = [
|
2835
2838
|
c for c in generated_features if not self.fit_select_features or c in self.feature_names_
|
2836
2839
|
]
|
2837
|
-
if keep_input is
|
2840
|
+
if keep_input is True:
|
2838
2841
|
selected_input_columns = [
|
2839
2842
|
c
|
2840
2843
|
for c in validated_Xy.columns
|
2841
2844
|
if not self.fit_select_features
|
2842
2845
|
or c in self.feature_names_
|
2846
|
+
or c in new_columns_on_transform
|
2843
2847
|
or c in self.search_keys
|
2844
2848
|
or c in (self.id_columns or [])
|
2845
2849
|
or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
|
2846
2850
|
]
|
2847
|
-
elif keep_input is True:
|
2848
|
-
selected_input_columns = validated_Xy.columns.to_list()
|
2849
2851
|
else:
|
2850
2852
|
selected_input_columns = []
|
2851
2853
|
|
@@ -815,9 +815,9 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
815
815
|
encoded = cat_encoder.transform(x[self.cat_features]).astype(int)
|
816
816
|
else:
|
817
817
|
encoded = cat_encoder.transform(x[self.cat_features])
|
818
|
-
cat_features =
|
819
|
-
x.drop(columns=
|
820
|
-
x[
|
818
|
+
cat_features = self.cat_features
|
819
|
+
x = x.drop(columns=self.cat_features, errors="ignore")
|
820
|
+
x[self.cat_features] = encoded
|
821
821
|
else:
|
822
822
|
cat_features = self.cat_features
|
823
823
|
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "1.2.115a1"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|