upgini 1.2.39a3769.dev1__py3-none-any.whl → 1.2.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/dataset.py +1 -0
- upgini/features_enricher.py +19 -6
- upgini/resource_bundle/strings.properties +1 -0
- {upgini-1.2.39a3769.dev1.dist-info → upgini-1.2.40.dist-info}/METADATA +15 -3
- {upgini-1.2.39a3769.dev1.dist-info → upgini-1.2.40.dist-info}/RECORD +8 -8
- {upgini-1.2.39a3769.dev1.dist-info → upgini-1.2.40.dist-info}/WHEEL +1 -1
- {upgini-1.2.39a3769.dev1.dist-info → upgini-1.2.40.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.40"
|
upgini/dataset.py
CHANGED
upgini/features_enricher.py
CHANGED
|
@@ -2008,7 +2008,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2008
2008
|
trace_id = trace_id or uuid.uuid4()
|
|
2009
2009
|
return search_task.get_progress(trace_id)
|
|
2010
2010
|
|
|
2011
|
-
def get_transactional_transform_api(self):
|
|
2011
|
+
def get_transactional_transform_api(self, only_online_sources=False):
|
|
2012
2012
|
if self.api_key is None:
|
|
2013
2013
|
raise ValidationError(self.bundle.get("transactional_transform_unregistered"))
|
|
2014
2014
|
if self._search_task is None:
|
|
@@ -2066,7 +2066,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2066
2066
|
api_example = f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
|
|
2067
2067
|
-H 'Authorization: {self.api_key}' \\
|
|
2068
2068
|
-H 'Content-Type: application/json' \\
|
|
2069
|
-
-d '{{"search_keys": {keys}{features_section}}}'"""
|
|
2069
|
+
-d '{{"search_keys": {keys}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'"""
|
|
2070
2070
|
return api_example
|
|
2071
2071
|
|
|
2072
2072
|
def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
|
|
@@ -2110,13 +2110,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2110
2110
|
return None, {c: c for c in X.columns}, []
|
|
2111
2111
|
|
|
2112
2112
|
features_meta = self._search_task.get_all_features_metadata_v2()
|
|
2113
|
-
online_api_features = [fm.name for fm in features_meta if fm.from_online_api]
|
|
2113
|
+
online_api_features = [fm.name for fm in features_meta if fm.from_online_api and fm.shap_value > 0]
|
|
2114
2114
|
if len(online_api_features) > 0:
|
|
2115
2115
|
self.logger.warning(
|
|
2116
2116
|
f"There are important features for transform, that generated by online API: {online_api_features}"
|
|
2117
2117
|
)
|
|
2118
|
-
|
|
2119
|
-
|
|
2118
|
+
msg = self.bundle.get("online_api_features_transform").format(online_api_features)
|
|
2119
|
+
self.logger.warning(msg)
|
|
2120
|
+
print(msg)
|
|
2121
|
+
print(self.get_transactional_transform_api(only_online_sources=True))
|
|
2120
2122
|
|
|
2121
2123
|
if not metrics_calculation:
|
|
2122
2124
|
transform_usage = self.rest_client.get_current_transform_usage(trace_id)
|
|
@@ -2702,6 +2704,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2702
2704
|
self.fit_search_keys,
|
|
2703
2705
|
self.fit_columns_renaming,
|
|
2704
2706
|
list(unnest_search_keys.keys()),
|
|
2707
|
+
self.bundle,
|
|
2705
2708
|
self.logger,
|
|
2706
2709
|
)
|
|
2707
2710
|
df = converter.convert(df)
|
|
@@ -3269,6 +3272,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3269
3272
|
f"Generate features: {self.generate_features}\n"
|
|
3270
3273
|
f"Round embeddings: {self.round_embeddings}\n"
|
|
3271
3274
|
f"Detect missing search keys: {self.detect_missing_search_keys}\n"
|
|
3275
|
+
f"Exclude columns: {self.exclude_columns}\n"
|
|
3272
3276
|
f"Exclude features sources: {exclude_features_sources}\n"
|
|
3273
3277
|
f"Calculate metrics: {calculate_metrics}\n"
|
|
3274
3278
|
f"Scoring: {scoring}\n"
|
|
@@ -3276,6 +3280,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3276
3280
|
f"Remove target outliers: {remove_outliers_calc_metrics}\n"
|
|
3277
3281
|
f"Exclude columns: {self.exclude_columns}\n"
|
|
3278
3282
|
f"Search id: {self.search_id}\n"
|
|
3283
|
+
f"Custom loss: {self.loss}\n"
|
|
3284
|
+
f"Logs enabled: {self.logs_enabled}\n"
|
|
3285
|
+
f"Raise validation error: {self.raise_validation_error}\n"
|
|
3286
|
+
f"Baseline score column: {self.baseline_score_column}\n"
|
|
3287
|
+
f"Client ip: {self.client_ip}\n"
|
|
3288
|
+
f"Client visitorId: {self.client_visitorid}\n"
|
|
3289
|
+
f"Add date if missing: {self.add_date_if_missing}\n"
|
|
3290
|
+
f"Select features: {self.select_features}\n"
|
|
3291
|
+
f"Disable force downsampling: {self.disable_force_downsampling}\n"
|
|
3279
3292
|
)
|
|
3280
3293
|
|
|
3281
3294
|
def sample(df):
|
|
@@ -3959,7 +3972,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3959
3972
|
display_html_dataframe(self.metrics, self.metrics, msg)
|
|
3960
3973
|
|
|
3961
3974
|
def __show_selected_features(self, search_keys: Dict[str, SearchKey]):
|
|
3962
|
-
search_key_names = search_keys.
|
|
3975
|
+
search_key_names = [col for col, tpe in search_keys.items() if tpe != SearchKey.CUSTOM_KEY]
|
|
3963
3976
|
if self.fit_columns_renaming:
|
|
3964
3977
|
search_key_names = [self.fit_columns_renaming.get(col, col) for col in search_key_names]
|
|
3965
3978
|
msg = self.bundle.get("features_info_header").format(len(self.feature_names_), search_key_names)
|
|
@@ -216,6 +216,7 @@ imbalanced_target=\nTarget is imbalanced and will be undersampled. Frequency of
|
|
|
216
216
|
loss_selection_info=Using loss `{}` for feature selection
|
|
217
217
|
loss_calc_metrics_info=Using loss `{}` for metrics calculation with default estimator
|
|
218
218
|
forced_balance_undersample=For quick data retrieval, your dataset has been sampled. To use data search without data sampling please contact support (sales@upgini.com)
|
|
219
|
+
online_api_features_transform=Please note that some of the selected features {} are provided through a slow enrichment interface and are not available via transformation. However, they can be accessed via the API:
|
|
219
220
|
|
|
220
221
|
# Validation table
|
|
221
222
|
validation_column_name_header=Column name
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.40
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -382,6 +382,7 @@ enricher = FeaturesEnricher(
|
|
|
382
382
|
date_format = "%Y-%d-%m"
|
|
383
383
|
)
|
|
384
384
|
```
|
|
385
|
+
|
|
385
386
|
### 4. 🔍 Start your first feature search!
|
|
386
387
|
The main abstraction you interact is `FeaturesEnricher`, a Scikit-learn compatible estimator. You can easily add it into your existing ML pipelines.
|
|
387
388
|
Create instance of the `FeaturesEnricher` class and call:
|
|
@@ -412,7 +413,7 @@ enricher = FeaturesEnricher(
|
|
|
412
413
|
enricher.fit(X, y)
|
|
413
414
|
```
|
|
414
415
|
|
|
415
|
-
That's all
|
|
416
|
+
That's all! We've fit `FeaturesEnricher`.
|
|
416
417
|
### 5. 📈 Evaluate feature importances (SHAP values) from the search result
|
|
417
418
|
|
|
418
419
|
`FeaturesEnricher` class has two properties for feature importances, which will be filled after fit - `feature_names_` and `feature_importances_`:
|
|
@@ -464,7 +465,7 @@ enricher = FeaturesEnricher(
|
|
|
464
465
|
)
|
|
465
466
|
```
|
|
466
467
|
|
|
467
|
-
## 💻 How it
|
|
468
|
+
## 💻 How does it work?
|
|
468
469
|
|
|
469
470
|
### 🧹 Search dataset validation
|
|
470
471
|
We validate and clean search initialization dataset under the hood:
|
|
@@ -506,6 +507,17 @@ enricher = FeaturesEnricher(
|
|
|
506
507
|
cv=CVType.time_series
|
|
507
508
|
)
|
|
508
509
|
```
|
|
510
|
+
|
|
511
|
+
If you're working with multivariate time series, you should specify id columns of individual univariate series in `FeaturesEnricher`. For example, if you have a dataset predicting sales for different stores and products, you should specify store and product id columns as follows:
|
|
512
|
+
```python
|
|
513
|
+
enricher = FeaturesEnricher(
|
|
514
|
+
search_keys={
|
|
515
|
+
"sales_date": SearchKey.DATE,
|
|
516
|
+
},
|
|
517
|
+
id_columns=["store_id", "product_id"],
|
|
518
|
+
cv=CVType.time_series
|
|
519
|
+
)
|
|
520
|
+
```
|
|
509
521
|
⚠️ **Pre-process search dataset** in case of time series prediction:
|
|
510
522
|
sort rows in dataset according to observation order, in most cases - ascending order by date/datetime.
|
|
511
523
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=iSJlF17_nAoTrcNmK9Ggvp4uHaLT4lGvRjnsq0x_83c,23
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=d9VlOs9hTf6eL8TX_9bO400HQj3y_jVGthABvQJqONs,33350
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=e1psLi5mv6Ml8CG6x_R8SN8hnyfDH0VsZjhFnoswoEY,197918
|
|
7
7
|
upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
|
|
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
30
30
|
upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
|
|
31
31
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
32
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
33
|
+
upgini/resource_bundle/strings.properties,sha256=uQWmbcd9TJh-xE0QpmHpHYKw-20utvXeHwFA-U_iTLw,27302
|
|
34
34
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
35
35
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
65
|
-
upgini-1.2.
|
|
62
|
+
upgini-1.2.40.dist-info/METADATA,sha256=_UmnR2uPQq6LIgUN2-Z9B_QzrgC3sn8GflT4upbc4fg,49054
|
|
63
|
+
upgini-1.2.40.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
64
|
+
upgini-1.2.40.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.40.dist-info/RECORD,,
|
|
File without changes
|