PyPI - upgini - Versions diffs - 1.1.152__py3-none-any.whl → 1.1.154a1__py3-none-any.whl - Mend

upgini 1.1.152py3-none-any.whl → 1.1.154a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (6) hide show

upgini/features_enricher.py CHANGED Viewed

@@ -142,6 +142,7 @@ class FeaturesEnricher(TransformerMixin):
         round_embeddings: Optional[int] = None,
         logs_enabled: bool = True,
         raise_validation_error: bool = False,
+        exclude_columns: Optional[List[str]] = None,
         **kwargs,
     ):
         self._api_key = api_key or os.environ.get(UPGINI_API_KEY)
@@ -207,21 +208,19 @@ class FeaturesEnricher(TransformerMixin):
         self.shared_datasets = shared_datasets
         if shared_datasets is not None:
             self.runtime_parameters.properties["shared_datasets"] = ",".join(shared_datasets)
-        self.generate_features: Optional[List[str]] = None
+        self.generate_features = generate_features
+        self.round_embeddings = round_embeddings
         if generate_features is not None:
             if len(generate_features) > 2:
                 msg = bundle.get("too_many_generate_features")
                 self.logger.error(msg)
                 raise ValidationError(msg)
-            self.generate_features = generate_features
             self.runtime_parameters.properties["generate_features"] = ",".join(generate_features)
-            self.round_embeddings: Optional[int] = None
             if round_embeddings is not None:
                 if not isinstance(round_embeddings, int) or round_embeddings < 0:
                     msg = bundle.get("invalid_round_embeddings")
                     self.logger.error(msg)
                     raise ValidationError(msg)
-                self.round_embeddings = round_embeddings
                 self.runtime_parameters.properties["round_embeddings"] = round_embeddings
         self.passed_features: List[str] = []
@@ -238,6 +237,7 @@ class FeaturesEnricher(TransformerMixin):
         self.imbalanced = False
         self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict]] = None
         self.raise_validation_error = raise_validation_error
+        self.exclude_columns = exclude_columns
     def _get_api_key(self):
         return self._api_key
@@ -678,11 +678,11 @@ class FeaturesEnricher(TransformerMixin):
             try:
                 self.__log_debug_information(
-                    X,
-                    y,
-                    eval_set,
+                    X if X is not None else self.X,
+                    y if y is not None else self.y,
+                    eval_set if eval_set is not None else self.eval_set,
                     exclude_features_sources=exclude_features_sources,
-                    cv=cv,
+                    cv=cv if cv is not None else self.cv,
                     importance_threshold=importance_threshold,
                     max_features=max_features,
                     scoring=scoring,
@@ -1801,6 +1801,9 @@ class FeaturesEnricher(TransformerMixin):
         if not is_transform and not validated_X.index.is_unique:
             raise ValidationError(bundle.get("x_non_unique_index"))
+        if self.exclude_columns is not None:
+            validated_X = drop_existing_columns(validated_X, self.exclude_columns)
         if TARGET in validated_X.columns:
             raise ValidationError(bundle.get("x_contains_reserved_column_name").format(TARGET))
         if not is_transform and EVAL_SET_INDEX in validated_X.columns:
@@ -1970,51 +1973,58 @@ class FeaturesEnricher(TransformerMixin):
         estimator: Optional[Any] = None,
         remove_outliers_calc_metrics: Optional[bool] = None,
     ):
-        resolved_api_key = self.api_key or os.environ.get(UPGINI_API_KEY)
-        self.logger.info(
-            f"Search keys: {self.search_keys}\n"
-            f"Country code: {self.country_code}\n"
-            f"Model task type: {self.model_task_type}\n"
-            f"Api key presented?: {resolved_api_key is not None and resolved_api_key != ''}\n"
-            f"Endpoint: {self.endpoint}\n"
-            f"Runtime parameters: {self.runtime_parameters}\n"
-            f"Date format: {self.date_format}\n"
-            f"CV: {cv}\n"
-            f"importance_threshold: {importance_threshold}\n"
-            f"max_features: {max_features}"
-            f"Shared datasets: {self.shared_datasets}\n"
-            f"Random state: {self.random_state}\n"
-            f"Generate features: {self.generate_features}\n"
-            f"Round embeddings: {self.round_embeddings}\n"
-            f"Detect missing search keys: {self.detect_missing_search_keys}\n"
-            f"Exclude features sources: {exclude_features_sources}\n"
-            f"Calculate metrics: {calculate_metrics}\n"
-            f"Scoring: {scoring}\n"
-            f"Estimator: {estimator}\n"
-            f"Remove target outliers: {remove_outliers_calc_metrics}\n"
-            f"Search id: {self.search_id}\n"
-        )
+        try:
+            resolved_api_key = self.api_key or os.environ.get(UPGINI_API_KEY)
+            self.logger.info(
+                f"Search keys: {self.search_keys}\n"
+                f"Country code: {self.country_code}\n"
+                f"Model task type: {self.model_task_type}\n"
+                f"Api key presented?: {resolved_api_key is not None and resolved_api_key != ''}\n"
+                f"Endpoint: {self.endpoint}\n"
+                f"Runtime parameters: {self.runtime_parameters}\n"
+                f"Date format: {self.date_format}\n"
+                f"CV: {cv}\n"
+                f"importance_threshold: {importance_threshold}\n"
+                f"max_features: {max_features}\n"
+                f"Shared datasets: {self.shared_datasets}\n"
+                f"Random state: {self.random_state}\n"
+                f"Generate features: {self.generate_features}\n"
+                f"Round embeddings: {self.round_embeddings}\n"
+                f"Detect missing search keys: {self.detect_missing_search_keys}\n"
+                f"Exclude features sources: {exclude_features_sources}\n"
+                f"Calculate metrics: {calculate_metrics}\n"
+                f"Scoring: {scoring}\n"
+                f"Estimator: {estimator}\n"
+                f"Remove target outliers: {remove_outliers_calc_metrics}\n"
+                f"Exclude columns: {self.exclude_columns}\n"
+                f"Search id: {self.search_id}\n"
+            )
-        def sample(df):
-            if isinstance(df, pd.Series) or isinstance(df, pd.DataFrame):
-                return df.head(10)
-            else:
-                return df[:10]
+            def sample(df):
+                if isinstance(df, pd.Series) or isinstance(df, pd.DataFrame):
+                    return df.head(10)
+                else:
+                    return df[:10]
-        def print_datasets_sample():
-            self.logger.info(f"First 10 rows of the X with shape {X.shape}:\n{sample(X)}")
-            if y is not None:
-                self.logger.info(f"First 10 rows of the y with shape {_num_samples(y)}:\n{sample(y)}")
-            if eval_set is not None:
-                for idx, eval_pair in enumerate(eval_set):
-                    eval_X: pd.DataFrame = eval_pair[0]
-                    eval_y = eval_pair[1]
-                    self.logger.info(f"First 10 rows of the eval_X_{idx} with shape {eval_X.shape}:\n{sample(eval_X)}")
-                    self.logger.info(
-                        f"First 10 rows of the eval_y_{idx} with shape {_num_samples(eval_y)}:\n{sample(eval_y)}"
-                    )
+            def print_datasets_sample():
+                if X is not None:
+                    self.logger.info(f"First 10 rows of the X with shape {X.shape}:\n{sample(X)}")
+                if y is not None:
+                    self.logger.info(f"First 10 rows of the y with shape {_num_samples(y)}:\n{sample(y)}")
+                if eval_set is not None:
+                    for idx, eval_pair in enumerate(eval_set):
+                        eval_X: pd.DataFrame = eval_pair[0]
+                        eval_y = eval_pair[1]
+                        self.logger.info(
+                            f"First 10 rows of the eval_X_{idx} with shape {eval_X.shape}:\n{sample(eval_X)}"
+                        )
+                        self.logger.info(
+                            f"First 10 rows of the eval_y_{idx} with shape {_num_samples(eval_y)}:\n{sample(eval_y)}"
+                        )
-        do_without_pandas_limits(print_datasets_sample)
+            do_without_pandas_limits(print_datasets_sample)
+        except Exception:
+            self.logger.exception("Failed to log debug information")
     def __handle_index_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
         index_names = df.index.names if df.index.names != [None] else [DEFAULT_INDEX]

{upgini-1.1.152.dist-info → upgini-1.1.154a1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: upgini
-Version: 1.1.152
+Version: 1.1.154a1
 Summary: Intelligent data search & enrichment for Machine Learning
 Home-page: https://upgini.com/
 Author: Upgini Developers

{upgini-1.1.152.dist-info → upgini-1.1.154a1.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
 upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
 upgini/dataset.py,sha256=Jmteorv9nMn0bASMUixPXkTfBsONDuol0UIyPznOmRw,44111
 upgini/errors.py,sha256=BqpvfhW2jJW5fa5KXj0alhXatGl-WK4xTl309-QNLp8,959
-upgini/features_enricher.py,sha256=q5NKJI1Waogl9j5juKU4rkzEFNAS5xAsP_oggho4514,125454
+upgini/features_enricher.py,sha256=zWIZmROTY3Fi1PsivJhMrGBpznZ-K5mMZApFD6H0AXM,126119
 upgini/fingerprint.js,sha256=wfzunoC87TdquCdABOwcrkoGOoJsX89ICTOb4rsrO50,34162
 upgini/http.py,sha256=ke85Fb1ffD29tjgpnbHF_6gtV3nBJe4Xoxpp9i1GuLA,37176
 upgini/metadata.py,sha256=Oefg-rkA4PsZUHIho_clZcnyZwdtVJ1gXPvEY6oBmpg,5969
@@ -42,8 +42,8 @@ upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3
 upgini/utils/target_utils.py,sha256=cu52icjhDIPpEStHYMXrD2hIl9gzvfnxZr0Ra5osV0k,1616
 upgini/utils/track_info.py,sha256=2IGGyHPXBLhWcLO8-Q-5qir52k_kD6DtdU-sv_Z2hHY,5325
 upgini/utils/warning_counter.py,sha256=vnmdFo5-7GBkU2bK9h_uC0K0Y_wtfcYstxOdeRfacO0,228
-upgini-1.1.152.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.1.152.dist-info/METADATA,sha256=4O-RTzAPG1pGfqZJTynOMxQVSEdNnGVoKImTsMypalo,47895
-upgini-1.1.152.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-upgini-1.1.152.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
-upgini-1.1.152.dist-info/RECORD,,
+upgini-1.1.154a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.1.154a1.dist-info/METADATA,sha256=fQ2EK-jmAweNVHlfc3eoRKYs3IEzPkDA6iO8mzByczs,47897
+upgini-1.1.154a1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+upgini-1.1.154a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
+upgini-1.1.154a1.dist-info/RECORD,,

{upgini-1.1.152.dist-info → upgini-1.1.154a1.dist-info}/LICENSE RENAMED Viewed

File without changes

{upgini-1.1.152.dist-info → upgini-1.1.154a1.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.1.152.dist-info → upgini-1.1.154a1.dist-info}/top_level.txt RENAMED Viewed

File without changes

upgini 1.1.152__py3-none-any.whl → 1.1.154a1__py3-none-any.whl

Potentially problematic release.

upgini 1.1.152py3-none-any.whl → 1.1.154a1py3-none-any.whl