upgini 1.2.39a3769.dev2__py3-none-any.whl → 1.2.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.39a3769.dev2"
1
+ __version__ = "1.2.41"
@@ -2008,7 +2008,7 @@ class FeaturesEnricher(TransformerMixin):
2008
2008
  trace_id = trace_id or uuid.uuid4()
2009
2009
  return search_task.get_progress(trace_id)
2010
2010
 
2011
- def get_transactional_transform_api(self):
2011
+ def get_transactional_transform_api(self, only_online_sources=False):
2012
2012
  if self.api_key is None:
2013
2013
  raise ValidationError(self.bundle.get("transactional_transform_unregistered"))
2014
2014
  if self._search_task is None:
@@ -2036,14 +2036,13 @@ class FeaturesEnricher(TransformerMixin):
2036
2036
  file_metadata = self._search_task.get_file_metadata(str(uuid.uuid4()))
2037
2037
  search_keys = file_metadata.search_types()
2038
2038
  if SearchKey.IPV6_ADDRESS in search_keys:
2039
- # search_keys.remove(SearchKey.IPV6_ADDRESS)
2040
2039
  search_keys.pop(SearchKey.IPV6_ADDRESS, None)
2041
-
2040
+ original_names = {c.name: c.originalName for c in file_metadata.columns}
2042
2041
  keys = (
2043
2042
  "{"
2044
2043
  + ", ".join(
2045
2044
  [
2046
- f'"{key.name}": {{"name": "{name}", "value": "{key_example(key)}"}}'
2045
+ f'"{key.name}": {{"name": "{original_names.get(name, name)}", "value": "{key_example(key)}"}}'
2047
2046
  for key, name in search_keys.items()
2048
2047
  ]
2049
2048
  )
@@ -2063,10 +2062,27 @@ class FeaturesEnricher(TransformerMixin):
2063
2062
  features_section = ""
2064
2063
 
2065
2064
  search_id = self._search_task.search_task_id
2066
- api_example = f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
2065
+ api_example = f"""
2066
+ {Format.BOLD}Shell{Format.END}:
2067
+
2068
+ curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
2067
2069
  -H 'Authorization: {self.api_key}' \\
2068
2070
  -H 'Content-Type: application/json' \\
2069
- -d '{{"search_keys": {keys}{features_section}}}'"""
2071
+ -d '{{"search_keys": {keys}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'
2072
+
2073
+ {Format.BOLD}Python{Format.END}:
2074
+
2075
+ import requests
2076
+
2077
+ response = requests.post(
2078
+ url='https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}',
2079
+ headers={{'Authorization': '{self.api_key}'}},
2080
+ json={{"search_keys": {keys}{features_section}, "only_online_sources": {only_online_sources}}}
2081
+ )
2082
+ if response.status_code == 200:
2083
+ print(response.json())
2084
+ """
2085
+
2070
2086
  return api_example
2071
2087
 
2072
2088
  def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
@@ -2110,13 +2126,15 @@ class FeaturesEnricher(TransformerMixin):
2110
2126
  return None, {c: c for c in X.columns}, []
2111
2127
 
2112
2128
  features_meta = self._search_task.get_all_features_metadata_v2()
2113
- online_api_features = [fm.name for fm in features_meta if fm.from_online_api]
2129
+ online_api_features = [fm.name for fm in features_meta if fm.from_online_api and fm.shap_value > 0]
2114
2130
  if len(online_api_features) > 0:
2115
2131
  self.logger.warning(
2116
2132
  f"There are important features for transform, that generated by online API: {online_api_features}"
2117
2133
  )
2118
- # TODO
2119
- raise Exception("There are features selected that are paid. Contact support (sales@upgini.com)")
2134
+ msg = self.bundle.get("online_api_features_transform").format(online_api_features)
2135
+ self.logger.warning(msg)
2136
+ print(msg)
2137
+ print(self.get_transactional_transform_api(only_online_sources=True))
2120
2138
 
2121
2139
  if not metrics_calculation:
2122
2140
  transform_usage = self.rest_client.get_current_transform_usage(trace_id)
@@ -2509,9 +2527,11 @@ class FeaturesEnricher(TransformerMixin):
2509
2527
  def __is_registered(self) -> bool:
2510
2528
  return self.api_key is not None and self.api_key != ""
2511
2529
 
2512
- def __log_warning(self, message: str, show_support_link: bool = False):
2530
+ def __log_warning(self, message: str, show_support_link: bool = False, is_red=False):
2513
2531
  warning_num = self.warning_counter.increment()
2514
2532
  formatted_message = f"WARNING #{warning_num}: {message}\n"
2533
+ if is_red:
2534
+ formatted_message = Format.RED + formatted_message + Format.END
2515
2535
  if show_support_link:
2516
2536
  self.__display_support_link(formatted_message)
2517
2537
  else:
@@ -2702,6 +2722,7 @@ class FeaturesEnricher(TransformerMixin):
2702
2722
  self.fit_search_keys,
2703
2723
  self.fit_columns_renaming,
2704
2724
  list(unnest_search_keys.keys()),
2725
+ self.bundle,
2705
2726
  self.logger,
2706
2727
  )
2707
2728
  df = converter.convert(df)
@@ -3269,6 +3290,7 @@ class FeaturesEnricher(TransformerMixin):
3269
3290
  f"Generate features: {self.generate_features}\n"
3270
3291
  f"Round embeddings: {self.round_embeddings}\n"
3271
3292
  f"Detect missing search keys: {self.detect_missing_search_keys}\n"
3293
+ f"Exclude columns: {self.exclude_columns}\n"
3272
3294
  f"Exclude features sources: {exclude_features_sources}\n"
3273
3295
  f"Calculate metrics: {calculate_metrics}\n"
3274
3296
  f"Scoring: {scoring}\n"
@@ -3276,6 +3298,15 @@ class FeaturesEnricher(TransformerMixin):
3276
3298
  f"Remove target outliers: {remove_outliers_calc_metrics}\n"
3277
3299
  f"Exclude columns: {self.exclude_columns}\n"
3278
3300
  f"Search id: {self.search_id}\n"
3301
+ f"Custom loss: {self.loss}\n"
3302
+ f"Logs enabled: {self.logs_enabled}\n"
3303
+ f"Raise validation error: {self.raise_validation_error}\n"
3304
+ f"Baseline score column: {self.baseline_score_column}\n"
3305
+ f"Client ip: {self.client_ip}\n"
3306
+ f"Client visitorId: {self.client_visitorid}\n"
3307
+ f"Add date if missing: {self.add_date_if_missing}\n"
3308
+ f"Select features: {self.select_features}\n"
3309
+ f"Disable force downsampling: {self.disable_force_downsampling}\n"
3279
3310
  )
3280
3311
 
3281
3312
  def sample(df):
@@ -3959,7 +3990,7 @@ class FeaturesEnricher(TransformerMixin):
3959
3990
  display_html_dataframe(self.metrics, self.metrics, msg)
3960
3991
 
3961
3992
  def __show_selected_features(self, search_keys: Dict[str, SearchKey]):
3962
- search_key_names = search_keys.keys()
3993
+ search_key_names = [col for col, tpe in search_keys.items() if tpe != SearchKey.CUSTOM_KEY]
3963
3994
  if self.fit_columns_renaming:
3964
3995
  search_key_names = [self.fit_columns_renaming.get(col, col) for col in search_key_names]
3965
3996
  msg = self.bundle.get("features_info_header").format(len(self.feature_names_), search_key_names)
@@ -216,6 +216,7 @@ imbalanced_target=\nTarget is imbalanced and will be undersampled. Frequency of
216
216
  loss_selection_info=Using loss `{}` for feature selection
217
217
  loss_calc_metrics_info=Using loss `{}` for metrics calculation with default estimator
218
218
  forced_balance_undersample=For quick data retrieval, your dataset has been sampled. To use data search without data sampling please contact support (sales@upgini.com)
219
+ online_api_features_transform=Please note that some of the selected features {} are provided through a slow enrichment interface and are not available via transformation. \nHowever, they can be accessed via the API:
219
220
 
220
221
  # Validation table
221
222
  validation_column_name_header=Column name
@@ -39,7 +39,7 @@ def validate_version(logger: logging.Logger, warning_function: Optional[Callable
39
39
  if current_version < latest_version:
40
40
  msg = bundle.get("version_warning").format(current_version, latest_version)
41
41
  if warning_function:
42
- warning_function(msg)
42
+ warning_function(msg, is_red=True)
43
43
  else:
44
44
  logger.warning(msg)
45
45
  print(msg)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.39a3769.dev2
3
+ Version: 1.2.41
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -164,11 +164,12 @@ Run [Feature search & generation notebook](https://github.com/upgini/upgini/blob
164
164
 
165
165
  * The goal is to **predict future sales of different goods in stores** based on a 5-year history of sales.
166
166
  * Kaggle Competition [Store Item Demand Forecasting Challenge](https://www.kaggle.com/c/demand-forecasting-kernels-only) is a product sales forecasting. The evaluation metric is [SMAPE](https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error).
167
- <!--
167
+
168
168
  Run [Simple sales prediction for retail stores](https://github.com/upgini/upgini/blob/main/notebooks/kaggle_example.ipynb) inside your browser:
169
169
 
170
170
  [![Open example in Google Colab](https://img.shields.io/badge/run_example_in-colab-blue?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/upgini/upgini/blob/main/notebooks/kaggle_example.ipynb)
171
171
  &nbsp;
172
+ <!--
172
173
  [![Open in Binder](https://img.shields.io/badge/run_example_in-mybinder-red.svg?style=for-the-badge&logo=)](https://mybinder.org/v2/gh/upgini/upgini/main?urlpath=notebooks%2Fnotebooks%2Fkaggle_example.ipynb)
173
174
  &nbsp;
174
175
  [![Open example in Gitpod](https://img.shields.io/badge/run_example_in-gitpod-orange?style=for-the-badge&logo=gitpod)](https://gitpod.io/#/github.com/upgini/upgini)
@@ -382,6 +383,7 @@ enricher = FeaturesEnricher(
382
383
  date_format = "%Y-%d-%m"
383
384
  )
384
385
  ```
386
+
385
387
  ### 4. 🔍 Start your first feature search!
386
388
  The main abstraction you interact is `FeaturesEnricher`, a Scikit-learn compatible estimator. You can easily add it into your existing ML pipelines.
387
389
  Create instance of the `FeaturesEnricher` class and call:
@@ -412,7 +414,7 @@ enricher = FeaturesEnricher(
412
414
  enricher.fit(X, y)
413
415
  ```
414
416
 
415
- That's all). We've fitted `FeaturesEnricher`.
417
+ That's all! We've fit `FeaturesEnricher`.
416
418
  ### 5. 📈 Evaluate feature importances (SHAP values) from the search result
417
419
 
418
420
  `FeaturesEnricher` class has two properties for feature importances, which will be filled after fit - `feature_names_` and `feature_importances_`:
@@ -464,7 +466,7 @@ enricher = FeaturesEnricher(
464
466
  )
465
467
  ```
466
468
 
467
- ## 💻 How it works?
469
+ ## 💻 How does it work?
468
470
 
469
471
  ### 🧹 Search dataset validation
470
472
  We validate and clean search initialization dataset under the hood:
@@ -506,6 +508,17 @@ enricher = FeaturesEnricher(
506
508
  cv=CVType.time_series
507
509
  )
508
510
  ```
511
+
512
+ If you're working with multivariate time series, you should specify id columns of individual univariate series in `FeaturesEnricher`. For example, if you have a dataset predicting sales for different stores and products, you should specify store and product id columns as follows:
513
+ ```python
514
+ enricher = FeaturesEnricher(
515
+ search_keys={
516
+ "sales_date": SearchKey.DATE,
517
+ },
518
+ id_columns=["store_id", "product_id"],
519
+ cv=CVType.time_series
520
+ )
521
+ ```
509
522
  ⚠️ **Pre-process search dataset** in case of time series prediction:
510
523
  sort rows in dataset according to observation order, in most cases - ascending order by date/datetime.
511
524
 
@@ -1,16 +1,16 @@
1
- upgini/__about__.py,sha256=2ilnzZVy_WdaVJ8AG6XQ1dEDOf4Mo3p6WiWCjIzOxF8,33
1
+ upgini/__about__.py,sha256=4Ai8ZOYwlbREMRamBAcMFBQlvDiu4t_ciaxtb7__HP0,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=d9VlOs9hTf6eL8TX_9bO400HQj3y_jVGthABvQJqONs,33350
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=HY7FBC-ioH5hNg2NVMLMV_YAqu4rThgrJoK0JT8cdhU,196975
6
+ upgini/features_enricher.py,sha256=AiAaMc6f1EFufMcxh3In1LSe3Qia8y0La-p3pzJt3Es,198494
7
7
  upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
10
10
  upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
- upgini/version_validator.py,sha256=h1GViOWzULy5vf6M4dpTJuIk-4V38UCrTY1sb9yLa5I,1594
13
+ upgini/version_validator.py,sha256=KnmBeEqHMxBDCDT_muCx-cevxesg5YwD15NHHy7d0RE,1607
14
14
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
15
15
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
16
16
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
30
30
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
31
31
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
32
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=TiYWmFnuhOq0R3aVg2nbA3F5AWLgjrgh68Yj6MhG-x8,27088
33
+ upgini/resource_bundle/strings.properties,sha256=0jZC0HjyQHeqFCHt6nn1kz7vV0oq92AYQJvy-soAwe4,27304
34
34
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
35
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
59
59
  upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.39a3769.dev2.dist-info/METADATA,sha256=Vh1Rr3q2Osl1_Ee7uetOp8LROY2nVUb_kvZwyxEDcHc,48604
63
- upgini-1.2.39a3769.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
- upgini-1.2.39a3769.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.39a3769.dev2.dist-info/RECORD,,
62
+ upgini-1.2.41.dist-info/METADATA,sha256=CWIylu0OAI4oDPGpz_GqxQchiSHqtrA7iPsRQeqnI8M,49055
63
+ upgini-1.2.41.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
64
+ upgini-1.2.41.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.41.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.2
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any