PyPI - upgini - Versions diffs - 1.2.98a3922.dev3__py3-none-any.whl → 1.2.99a3922.dev4__py3-none-any.whl - Mend

upgini 1.2.98a3922.dev3py3-none-any.whl → 1.2.99a3922.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (11) hide show

upgini/__about__.py +1 -1
upgini/autofe/feature.py +27 -10
upgini/autofe/operator.py +8 -1
upgini/autofe/unary.py +22 -1
upgini/autofe/vector.py +1 -1
upgini/features_enricher.py +1 -1
upgini/metrics.py +6 -6
{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/METADATA +1 -1
{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/RECORD +11 -11
{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/WHEEL +0 -0
{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.~~98a3922~~.~~dev3~~"
1	+ __version__ = "1.2.99a3922.dev4"

upgini/autofe/feature.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import hashlib
 import itertools
+import logging
 from typing import Dict, List, Optional, Set, Tuple, Union
 import numpy as np
@@ -18,10 +19,7 @@ class Column:
         self.data = data
         self.calculate_all = calculate_all
-    def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
-        return self.name
-    def set_op_params(self, params: Dict[str, str]) -> "Column":
+    def set_op_params(self, params: Dict[str, str], **kwargs) -> "Column":
         return self
     def get_op_params(self, **kwargs):
@@ -37,8 +35,21 @@ class Column:
     def get_column_nodes(self) -> List["Column"]:
         return [self]
-    def get_columns(self, **kwargs) -> List[str]:
-        return [self.name]
+    def get_columns(self, unhash=False, **kwargs):
+        name = self.name
+        return [self._unhash(name) if unhash else name]
+    def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
+        return self.get_columns(**kwargs)[0]
+    def _unhash(self, feature_name: str) -> str:
+        last_component_idx = feature_name.rfind("_")
+        if not feature_name.startswith("f_"):
+            return feature_name  # etalon feature
+        elif last_component_idx == 1:
+            return feature_name[2:]  # fully hashed name, cannot unhash
+        else:
+            return feature_name[2:last_component_idx]
     @property
     def children(self) -> List[Union["Feature", "Column"]]:
@@ -81,7 +92,7 @@ class Feature:
         self.cached_display_name = cached_display_name
         self.alias = alias
-    def set_op_params(self, params: Optional[Dict[str, str]]) -> "Feature":
+    def set_op_params(self, params: Optional[Dict[str, str]], **kwargs) -> "Feature":
         obj_dict = pydantic_dump_method(self.op)().copy()
         obj_dict.update(params or {})
         self.op = pydantic_parse_method(self.op.__class__)(obj_dict)
@@ -89,13 +100,13 @@ class Feature:
         for child in self.children:
             child_params = {
-                k[len(child.get_display_name()) + 1 :]: v
+                k[len(child.get_display_name(**kwargs)) + 1 :]: v
                 for k, v in params.items()
-                if k.startswith(child.get_display_name())
+                if k.startswith(child.get_display_name(**kwargs))
             }
             if not child_params:
                 child_params = params
-            child.set_op_params(child_params)
+            child.set_op_params(child_params, **kwargs)
         return self
     def get_op_params(self, **kwargs) -> Dict[str, str]:
@@ -341,6 +352,12 @@ class Feature:
                 base_features.reverse()
                 return Feature(op, base_features)
+    def set_logger(self, logger: logging.Logger):
+        self.op.set_logger(logger)
+        for child in self.children:
+            child.set_logger(logger)
+        return self
 class FeatureGroup:
     def __init__(

upgini/autofe/operator.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import abc
+import logging
 from typing import Dict, List, Optional, Tuple, Union
 import numpy as np
 import pandas as pd
-from pydantic import BaseModel
+from pydantic import BaseModel, PrivateAttr
 class OperatorRegistry(type(BaseModel)):
@@ -64,6 +65,8 @@ class Operator(BaseModel, metaclass=OperatorRegistry):
     is_distribution_dependent: bool = False
     params: Optional[Dict[str, str]] = None
+    _logger: logging.Logger = PrivateAttr(default=logging.getLogger(__name__))
     def set_params(self, params: Dict[str, str]):
         self.params = params
         return self
@@ -79,6 +82,10 @@ class Operator(BaseModel, metaclass=OperatorRegistry):
     def get_hash_component(self) -> str:
         return self.to_formula()
+    def set_logger(self, logger: logging.Logger):
+        self._logger = logger
+        return self
 class ParametrizedOperator(Operator, abc.ABC):

upgini/autofe/unary.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Dict, List, Optional
 import numpy as np
 import pandas as pd
-from upgini.autofe.operator import PandasOperator, VectorizableMixin
+from upgini.autofe.operator import PandasOperator, ParametrizedOperator, VectorizableMixin
 from upgini.autofe.utils import pydantic_validator
@@ -198,3 +198,24 @@ class Cluster(PandasOperator):
     input_type: Optional[str] = "vector"
     output_type: Optional[str] = "category"
     is_categorical: bool = True
+class OutlierDistance(PandasOperator, ParametrizedOperator):
+    name: str = "outlier_dist"
+    is_unary: bool = True
+    input_type: Optional[str] = "vector"
+    output_type: Optional[str] = "float"
+    class_value: Optional[str] = None
+    def to_formula(self) -> str:
+        return f"outlier_dist_{self.class_value if self.class_value is not None else 'all'}"
+    @classmethod
+    def from_formula(cls, formula: str) -> Optional["OutlierDistance"]:
+        if formula == "outlier_dist":
+            return cls()
+        if formula.startswith("outlier_dist_"):
+            class_value = formula.split("_")[-1]
+            return cls(class_value=None if class_value == "all" else class_value)
+        return None

upgini/autofe/vector.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional
+from typing import List, Optional
 import pandas as pd

upgini/features_enricher.py CHANGED Viewed

@@ -4174,7 +4174,7 @@ if response.status_code == 200:
                 description = {}
-                feature_meta = get_feature_by_name(autofe_feature.get_display_name(shorten=True))
+                feature_meta = get_feature_by_name(autofe_feature.get_display_name(shorten=True, unhash=True))
                 if feature_meta is None:
                     self.logger.warning(f"Feature meta for display index {m.display_index} not found")
                     continue

upgini/metrics.py CHANGED Viewed

@@ -399,14 +399,14 @@ class EstimatorWrapper:
                 self.converted_to_str.append(c)
             elif c in self.cat_features:
                 if x[c].dtype == "bool" or (x[c].dtype == "category" and x[c].cat.categories.dtype == "bool"):
-                    x[c] = x[c].astype(np.int64)
+                    x[c] = x[c].astype(pd.Int64Dtype())
                     self.converted_to_int.append(c)
                 elif x[c].dtype == "category" and is_integer_dtype(x[c].cat.categories):
                     self.logger.info(
                         f"Convert categorical feature {c} with integer categories"
                         " to int64 and remove from cat_features"
                     )
-                    x[c] = x[c].astype(np.int64)
+                    x[c] = x[c].astype(pd.Int64Dtype())
                     self.converted_to_int.append(c)
                     self.cat_features.remove(c)
                 elif is_float_dtype(x[c]) or (x[c].dtype == "category" and is_float_dtype(x[c].cat.categories)):
@@ -419,7 +419,7 @@ class EstimatorWrapper:
             else:
                 if x[c].dtype == "bool" or (x[c].dtype == "category" and x[c].cat.categories.dtype == "bool"):
                     self.logger.info(f"Convert bool feature {c} to int64")
-                    x[c] = x[c].astype(np.int64)
+                    x[c] = x[c].astype(pd.Int64Dtype())
                     self.converted_to_int.append(c)
                 elif not is_valid_numeric_array_data(x[c]) and not is_numeric_dtype(x[c]):
                     try:
@@ -442,7 +442,7 @@ class EstimatorWrapper:
         if self.converted_to_int:
             self.logger.info(f"Convert to int features on calculate metrics: {self.converted_to_int}")
             for c in self.converted_to_int:
-                x[c] = x[c].astype(np.int64)
+                x[c] = x[c].astype(pd.Int64Dtype())
         if self.converted_to_str:
             self.logger.info(f"Convert to str features on calculate metrics: {self.converted_to_str}")
@@ -896,7 +896,7 @@ class LightGBMWrapper(EstimatorWrapper):
                     x[c] = x[c].astype("category")
         for c in x.columns:
-            if x[c].dtype not in ["category", "int64", "float64", "bool"]:
+            if x[c].dtype not in ["category", "int64", "float64", "bool", "Int64"]:
                 self.logger.warning(f"Feature {c} is not numeric and will be dropped")
                 self.dropped_features.append(c)
                 x = x.drop(columns=c, errors="ignore")
@@ -987,7 +987,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
                     x[c] = x[c].astype("category")
             params["cat_features"] = self.cat_features
         for c in x.columns:
-            if x[c].dtype not in ["category", "int64", "float64", "bool"]:
+            if x[c].dtype not in ["category", "int64", "float64", "bool", "Int64"]:
                 self.logger.warning(f"Feature {c} is not numeric and will be dropped")
                 self.dropped_features.append(c)
                 x = x.drop(columns=c, errors="ignore")

{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.98a3922.dev3
+Version: 1.2.99a3922.dev4
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=c20ALjeM25Bh-ipz7uc8Eb_tWD5utgqiELwRRlqcRlw,33
+upgini/__about__.py,sha256=5Lrxh5wP8aiUGT1GPRS8K7nPnEINmj_I5a_XBymupWQ,33
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=e6JDYTZ2AwC5aF-dqclKZKkiKrHo2f6cFmMQO2ZZmjM,32724
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=4rKoV-3jM876Fk0fM4XlnW3fLwXvk1KN2ymcwlAfPm0,219941
+upgini/features_enricher.py,sha256=KSOEzO29nY79RIW0hdbf1qXQGxa3itKZ0PkcwVPPf9U,219954
 upgini/http.py,sha256=DNcoS7qdxG0mOJn6I8r6O5I6XdIJTdzDzW3hkz3NgG4,45443
 upgini/metadata.py,sha256=vsbbHyPCP3Rs8WkeDgQg99uAA_zmsbDStAT-NwDYhO4,12455
-upgini/metrics.py,sha256=UbKEsHB7XDzoyGNqDx846zbh1t65GpqdnnhViccdoKU,45615
+upgini/metrics.py,sha256=gXr2aiw5j9QBWBo1hZp40Is679hef5q8MrT6LJfjsBk,45661
 upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -16,12 +16,12 @@ upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
 upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
 upgini/autofe/date.py,sha256=MM1S-6imNSzCDOhbNnmsc_bwSqUWBcS8vWAdHF8j1kY,11134
-upgini/autofe/feature.py,sha256=cu4xXjzVVF13ZV4RxuTrysK2qCfezlRCMOzCKRo1rNs,15558
+upgini/autofe/feature.py,sha256=71IQXztYdG2nygVJ4AZ4mOsx5w8PN239rZguKy_4lnE,16250
 upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
-upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
-upgini/autofe/unary.py,sha256=N76Pehn-hO8FWlSdqJ2Wm-yoU1MSR7m6yb2GWYBcumU,5933
+upgini/autofe/operator.py,sha256=WpMd3C-7FpiNXhVDs3MQy7Benz9B6iq6jvXohnCms9c,5178
+upgini/autofe/unary.py,sha256=FFtvkQaT0cu_zPZ1jCLcsjik-UUh12qQFF3tUW8NqsE,6675
 upgini/autofe/utils.py,sha256=dYrtyAM8Vcc_R8u4dNo54IsGrHKagTHDJTKhGho0bRg,2967
-upgini/autofe/vector.py,sha256=NBvRLXVSQf8AU5WI-rXBlO2lfs-skX_XD0KaxkfBFW8,1283
+upgini/autofe/vector.py,sha256=9T7MEUK0SavXIJy0c9Kvu5qTcMtt3fzvdRDBDxcI0JA,1277
 upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
 upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
 upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
@@ -71,7 +71,7 @@ upgini/utils/target_utils.py,sha256=i3Xt5l9ybB2_nF_ma5cfPuL3OeFTs2dY2xDI0p4Azpg,
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.98a3922.dev3.dist-info/METADATA,sha256=N0PxLZz_XaDEyH77cUCwjKE3ocLXAOo6n5Cy_1xYb8w,49538
-upgini-1.2.98a3922.dev3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.98a3922.dev3.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.98a3922.dev3.dist-info/RECORD,,
+upgini-1.2.99a3922.dev4.dist-info/METADATA,sha256=wYgu44FVyY6Bfof83_UJ1tWMWxrKTUoY_m1Q0QHDqJ8,49538
+upgini-1.2.99a3922.dev4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.99a3922.dev4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.99a3922.dev4.dist-info/RECORD,,

{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.98a3922.dev3.dist-info → upgini-1.2.99a3922.dev4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.98a3922.dev3__py3-none-any.whl → 1.2.99a3922.dev4__py3-none-any.whl

Potentially problematic release.

upgini 1.2.98a3922.dev3py3-none-any.whl → 1.2.99a3922.dev4py3-none-any.whl