PyPI - upgini - Versions diffs - 1.2.39a3769.dev2__py3-none-any.whl → 1.2.41a3758.dev1__py3-none-any.whl - Mend

upgini 1.2.39a3769.dev2py3-none-any.whl → 1.2.41a3758.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (7) hide show

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.~~39a3769~~.~~dev2~~"
1	+ __version__ = "1.2.41a3758.dev1"

upgini/features_enricher.py CHANGED Viewed

@@ -165,10 +165,6 @@ class FeaturesEnricher(TransformerMixin):
     shared_datasets: list of str, optional (default=None)
         List of private shared dataset ids for custom search
-    select_features: bool, optional (default=False)
-        If True, return only selected features both from input and data sources.
-        Otherwise, return all features from input and only selected features from data sources.
     """
     TARGET_NAME = "target"
@@ -235,7 +231,6 @@ class FeaturesEnricher(TransformerMixin):
         client_visitorid: Optional[str] = None,
         custom_bundle_config: Optional[str] = None,
         add_date_if_missing: bool = True,
-        select_features: bool = False,
         disable_force_downsampling: bool = False,
         id_columns: Optional[List[str]] = None,
         **kwargs,
@@ -297,7 +292,6 @@ class FeaturesEnricher(TransformerMixin):
         self.dropped_client_feature_names_ = []
         self.feature_importances_ = []
         self.search_id = search_id
-        self.select_features = select_features
         self.disable_force_downsampling = disable_force_downsampling
         if search_id:
@@ -405,6 +399,7 @@ class FeaturesEnricher(TransformerMixin):
         remove_outliers_calc_metrics: Optional[bool] = None,
         progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
         search_id_callback: Optional[Callable[[str], Any]] = None,
+        select_features: bool = False,
         **kwargs,
     ):
         """Fit to data.
@@ -440,6 +435,10 @@ class FeaturesEnricher(TransformerMixin):
         remove_outliers_calc_metrics, optional (default=True)
             If True then rows with target ouliers will be dropped on metrics calculation
+        select_features: bool, optional (default=False)
+            If True, return only selected features both from input and data sources.
+            Otherwise, return all features from input and only selected features from data sources.
         """
         trace_id = str(uuid.uuid4())
         start_time = time.time()
@@ -474,6 +473,7 @@ class FeaturesEnricher(TransformerMixin):
                 self.y = y
                 self.eval_set = self._check_eval_set(eval_set, X, self.bundle)
                 self.dump_input(trace_id, X, y, self.eval_set)
+                self.__set_select_features(select_features)
                 self.__inner_fit(
                     trace_id,
                     X,
@@ -523,6 +523,10 @@ class FeaturesEnricher(TransformerMixin):
             finally:
                 self.logger.info(f"Fit elapsed time: {time.time() - start_time}")
+    def __set_select_features(self, select_features: bool):
+        self.fit_select_features = select_features
+        self.runtime_parameters.properties["select_features"] = select_features
     def fit_transform(
         self,
         X: Union[pd.DataFrame, pd.Series, np.ndarray],
@@ -538,6 +542,7 @@ class FeaturesEnricher(TransformerMixin):
         estimator: Optional[Any] = None,
         remove_outliers_calc_metrics: Optional[bool] = None,
         progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
+        select_features: bool = False,
         **kwargs,
     ) -> pd.DataFrame:
         """Fit to data, then transform it.
@@ -578,6 +583,10 @@ class FeaturesEnricher(TransformerMixin):
         remove_outliers_calc_metrics, optional (default=True)
             If True then rows with target ouliers will be dropped on metrics calculation
+        select_features: bool, optional (default=False)
+            If True, return only selected features both from input and data sources.
+            Otherwise, return all features from input and only selected features from data sources.
         Returns
         -------
         X_new: pandas.DataFrame of shape (n_samples, n_features_new)
@@ -612,6 +621,7 @@ class FeaturesEnricher(TransformerMixin):
                 self.X = X
                 self.y = y
                 self.eval_set = self._check_eval_set(eval_set, X, self.bundle)
+                self.__set_select_features(select_features)
                 self.dump_input(trace_id, X, y, self.eval_set)
                 if _num_samples(drop_duplicates(X)) > Dataset.MAX_ROWS:
@@ -1231,8 +1241,11 @@ class FeaturesEnricher(TransformerMixin):
                 self.logger.info(f"Calculating metrics elapsed time: {time.time() - start_time}")
     def _update_shap_values(self, trace_id: str, x_columns: List[str], new_shaps: Dict[str, float]):
+        renaming = self.fit_columns_renaming or {}
         new_shaps = {
-            feature: _round_shap_value(shap) for feature, shap in new_shaps.items() if feature in self.feature_names_
+            renaming.get(feature, feature): _round_shap_value(shap)
+            for feature, shap in new_shaps.items()
+            if feature in self.feature_names_ or renaming.get(feature, feature) in self.feature_names_
         }
         self.__prepare_feature_importances(trace_id, x_columns, new_shaps, silent=True)
@@ -1461,7 +1474,7 @@ class FeaturesEnricher(TransformerMixin):
             c
             for c in X_sampled.columns.to_list()
             if (
-                not self.select_features
+                not self.fit_select_features
                 or c in self.feature_names_
                 or (self.fit_columns_renaming is not None and self.fit_columns_renaming.get(c) in self.feature_names_)
             )
@@ -2008,7 +2021,7 @@ class FeaturesEnricher(TransformerMixin):
             trace_id = trace_id or uuid.uuid4()
             return search_task.get_progress(trace_id)
-    def get_transactional_transform_api(self):
+    def get_transactional_transform_api(self, only_online_sources=False):
         if self.api_key is None:
             raise ValidationError(self.bundle.get("transactional_transform_unregistered"))
         if self._search_task is None:
@@ -2066,7 +2079,7 @@ class FeaturesEnricher(TransformerMixin):
         api_example = f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
     -H 'Authorization: {self.api_key}' \\
     -H 'Content-Type: application/json' \\
-    -d '{{"search_keys": {keys}{features_section}}}'"""
+    -d '{{"search_keys": {keys}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'"""
         return api_example
     def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
@@ -2110,13 +2123,15 @@ class FeaturesEnricher(TransformerMixin):
                 return None, {c: c for c in X.columns}, []
             features_meta = self._search_task.get_all_features_metadata_v2()
-            online_api_features = [fm.name for fm in features_meta if fm.from_online_api]
+            online_api_features = [fm.name for fm in features_meta if fm.from_online_api and fm.shap_value > 0]
             if len(online_api_features) > 0:
                 self.logger.warning(
                     f"There are important features for transform, that generated by online API: {online_api_features}"
                 )
-                # TODO
-                raise Exception("There are features selected that are paid. Contact support (sales@upgini.com)")
+                msg = self.bundle.get("online_api_features_transform").format(online_api_features)
+                self.logger.warning(msg)
+                print(msg)
+                print(self.get_transactional_transform_api(only_online_sources=True))
             if not metrics_calculation:
                 transform_usage = self.rest_client.get_current_transform_usage(trace_id)
@@ -2702,6 +2717,7 @@ class FeaturesEnricher(TransformerMixin):
                 self.fit_search_keys,
                 self.fit_columns_renaming,
                 list(unnest_search_keys.keys()),
+                self.bundle,
                 self.logger,
             )
             df = converter.convert(df)
@@ -3269,6 +3285,7 @@ class FeaturesEnricher(TransformerMixin):
                 f"Generate features: {self.generate_features}\n"
                 f"Round embeddings: {self.round_embeddings}\n"
                 f"Detect missing search keys: {self.detect_missing_search_keys}\n"
+                f"Exclude columns: {self.exclude_columns}\n"
                 f"Exclude features sources: {exclude_features_sources}\n"
                 f"Calculate metrics: {calculate_metrics}\n"
                 f"Scoring: {scoring}\n"
@@ -3276,6 +3293,15 @@ class FeaturesEnricher(TransformerMixin):
                 f"Remove target outliers: {remove_outliers_calc_metrics}\n"
                 f"Exclude columns: {self.exclude_columns}\n"
                 f"Search id: {self.search_id}\n"
+                f"Custom loss: {self.loss}\n"
+                f"Logs enabled: {self.logs_enabled}\n"
+                f"Raise validation error: {self.raise_validation_error}\n"
+                f"Baseline score column: {self.baseline_score_column}\n"
+                f"Client ip: {self.client_ip}\n"
+                f"Client visitorId: {self.client_visitorid}\n"
+                f"Add date if missing: {self.add_date_if_missing}\n"
+                f"Disable force downsampling: {self.disable_force_downsampling}\n"
+                f"Id columns: {self.id_columns}\n"
             )
             def sample(df):
@@ -3662,7 +3688,7 @@ class FeaturesEnricher(TransformerMixin):
             is_client_feature = feature_meta.name in x_columns
             if feature_meta.shap_value == 0.0:
-                if self.select_features:
+                if self.fit_select_features:
                     self.dropped_client_feature_names_.append(feature_meta.name)
                 continue
@@ -3671,7 +3697,7 @@ class FeaturesEnricher(TransformerMixin):
                 feature_meta.name in self.fit_generated_features
                 or feature_meta.name == COUNTRY
                 # In select_features mode we select also from etalon features and need to show them
-                or (not self.select_features and is_client_feature)
+                or (not self.fit_select_features and is_client_feature)
             ):
                 continue
@@ -3959,7 +3985,7 @@ class FeaturesEnricher(TransformerMixin):
             display_html_dataframe(self.metrics, self.metrics, msg)
     def __show_selected_features(self, search_keys: Dict[str, SearchKey]):
-        search_key_names = search_keys.keys()
+        search_key_names = [col for col, tpe in search_keys.items() if tpe != SearchKey.CUSTOM_KEY]
         if self.fit_columns_renaming:
             search_key_names = [self.fit_columns_renaming.get(col, col) for col in search_key_names]
         msg = self.bundle.get("features_info_header").format(len(self.feature_names_), search_key_names)

upgini/resource_bundle/strings.properties CHANGED Viewed

@@ -216,6 +216,7 @@ imbalanced_target=\nTarget is imbalanced and will be undersampled. Frequency of
 loss_selection_info=Using loss `{}` for feature selection
 loss_calc_metrics_info=Using loss `{}` for metrics calculation with default estimator
 forced_balance_undersample=For quick data retrieval, your dataset has been sampled. To use data search without data sampling please contact support (sales@upgini.com)
+online_api_features_transform=Please note that some of the selected features {} are provided through a slow enrichment interface and are not available via transformation. However, they can be accessed via the API:
 # Validation table
 validation_column_name_header=Column name

{upgini-1.2.39a3769.dev2.dist-info → upgini-1.2.41a3758.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.39a3769.dev2
+Version: 1.2.41a3758.dev1
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/
@@ -382,6 +382,7 @@ enricher = FeaturesEnricher(
 	date_format = "%Y-%d-%m"
 )
 ```
 ### 4. 🔍 Start your first feature search!
 The main abstraction you interact is `FeaturesEnricher`, a Scikit-learn compatible estimator.  You can easily add it into your existing ML pipelines.
 Create instance of the `FeaturesEnricher` class and call:
@@ -412,7 +413,7 @@ enricher = FeaturesEnricher(
 enricher.fit(X, y)
 ```
-That's all). We've fitted `FeaturesEnricher`.
+That's all! We've fit `FeaturesEnricher`.
 ### 5. 📈 Evaluate feature importances (SHAP values) from the search result
 `FeaturesEnricher` class has two properties for feature importances, which will be filled after fit - `feature_names_` and `feature_importances_`:
@@ -464,7 +465,7 @@ enricher = FeaturesEnricher(
 )
 ```
-## 💻 How it works?
+## 💻 How does it work?
 ### 🧹 Search dataset validation
 We validate and clean search initialization dataset under the hood:
@@ -506,6 +507,17 @@ enricher = FeaturesEnricher(
 	cv=CVType.time_series
 )
 ```
+If you're working with multivariate time series, you should specify id columns of individual univariate series in `FeaturesEnricher`. For example, if you have a dataset predicting sales for different stores and products, you should specify store and product id columns as follows:
+```python
+enricher = FeaturesEnricher(
+    search_keys={
+        "sales_date": SearchKey.DATE,
+    },
+    id_columns=["store_id", "product_id"],
+    cv=CVType.time_series
+)
+```
 ⚠️ **Pre-process search dataset** in case of time series prediction:
 sort rows in dataset according to observation order, in most cases - ascending order by date/datetime.

{upgini-1.2.39a3769.dev2.dist-info → upgini-1.2.41a3758.dev1.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-upgini/__about__.py,sha256=2ilnzZVy_WdaVJ8AG6XQ1dEDOf4Mo3p6WiWCjIzOxF8,33
+upgini/__about__.py,sha256=KQ5_UqUf1j9QhJsdY2vLVTEcHPCYbzp5HHMntbtpDpE,33
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=d9VlOs9hTf6eL8TX_9bO400HQj3y_jVGthABvQJqONs,33350
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=HY7FBC-ioH5hNg2NVMLMV_YAqu4rThgrJoK0JT8cdhU,196975
+upgini/features_enricher.py,sha256=c-NKv3UfMGqcyHb4KZjuCzLj6hW19_1ysi0IWDXYstI,198633
 upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
 upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
 upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
 upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
 upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
 upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
-upgini/resource_bundle/strings.properties,sha256=TiYWmFnuhOq0R3aVg2nbA3F5AWLgjrgh68Yj6MhG-x8,27088
+upgini/resource_bundle/strings.properties,sha256=uQWmbcd9TJh-xE0QpmHpHYKw-20utvXeHwFA-U_iTLw,27302
 upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
 upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
 upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.39a3769.dev2.dist-info/METADATA,sha256=Vh1Rr3q2Osl1_Ee7uetOp8LROY2nVUb_kvZwyxEDcHc,48604
-upgini-1.2.39a3769.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
-upgini-1.2.39a3769.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.39a3769.dev2.dist-info/RECORD,,
+upgini-1.2.41a3758.dev1.dist-info/METADATA,sha256=gfveQriK3BlEZTWtxNrMlApMona-ghB5CzCN0HRVGMs,49064
+upgini-1.2.41a3758.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
+upgini-1.2.41a3758.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.41a3758.dev1.dist-info/RECORD,,

{upgini-1.2.39a3769.dev2.dist-info → upgini-1.2.41a3758.dev1.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.39a3769.dev2.dist-info → upgini-1.2.41a3758.dev1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.39a3769.dev2__py3-none-any.whl → 1.2.41a3758.dev1__py3-none-any.whl

Potentially problematic release.

upgini 1.2.39a3769.dev2py3-none-any.whl → 1.2.41a3758.dev1py3-none-any.whl