PyPI - upgini - Versions diffs - 1.2.70a3832.dev2__py3-none-any.whl → 1.2.71__py3-none-any.whl - Mend

upgini 1.2.70a3832.dev2py3-none-any.whl → 1.2.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (23) hide show

upgini/__about__.py +1 -1
upgini/autofe/date.py +15 -21
upgini/autofe/feature.py +5 -1
upgini/autofe/timeseries/base.py +3 -9
upgini/autofe/timeseries/cross.py +22 -12
upgini/autofe/timeseries/roll.py +2 -7
upgini/autofe/timeseries/trend.py +2 -1
upgini/autofe/utils.py +83 -0
upgini/dataset.py +8 -1
upgini/features_enricher.py +340 -275
upgini/metadata.py +4 -0
upgini/metrics.py +67 -60
upgini/resource_bundle/strings.properties +1 -0
upgini/search_task.py +7 -1
upgini/utils/mstats.py +1 -1
upgini/utils/sklearn_ext.py +11 -0
upgini/utils/sort.py +1 -1
upgini/utils/target_utils.py +4 -2
{upgini-1.2.70a3832.dev2.dist-info → upgini-1.2.71.dist-info}/METADATA +3 -4
{upgini-1.2.70a3832.dev2.dist-info → upgini-1.2.71.dist-info}/RECORD +22 -22
upgini/lazy_import.py +0 -35
{upgini-1.2.70a3832.dev2.dist-info → upgini-1.2.71.dist-info}/WHEEL +0 -0
{upgini-1.2.70a3832.dev2.dist-info → upgini-1.2.71.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.~~70a3832.dev2~~"
1	+ __version__ = "1.2.71"

upgini/autofe/date.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pandas.core.arrays.timedeltas import TimedeltaArray
 from pydantic import BaseModel, __version__ as pydantic_version
 from upgini.autofe.operator import PandasOperator, ParametrizedOperator
+from upgini.autofe.utils import pydantic_validator
 def get_pydantic_version():
@@ -209,6 +210,14 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
         return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        if self.lower_bound is not None:
+            res["lower_bound"] = str(self.lower_bound)
+        if self.upper_bound is not None:
+            res["upper_bound"] = str(self.upper_bound)
+        return res
     def _agg(self, x):
         x = x[
             (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
@@ -269,32 +278,17 @@ class DatePercentile(DatePercentileBase):
             {
                 "zero_month": self.zero_month,
                 "zero_year": self.zero_year,
-                "zero_bounds": self.zero_bounds,
+                "zero_bounds": json.dumps(self.zero_bounds),
                 "step": self.step,
             }
         )
         return res
-    # Check Pydantic version
-    if get_pydantic_version() >= 2:
-        # Use @field_validator for Pydantic 2.x
-        from pydantic import field_validator
-        @field_validator("zero_bounds", mode="before")
-        def parse_zero_bounds(cls, value):
-            if isinstance(value, str):
-                return json.loads(value)
-            return value
-    else:
-        # Use @validator for Pydantic 1.x
-        from pydantic import validator
-        @validator("zero_bounds", pre=True)
-        def parse_zero_bounds(cls, value):
-            if isinstance(value, str):
-                return json.loads(value)
-            return value
+    @pydantic_validator("zero_bounds", mode="before")
+    def parse_zero_bounds(cls, value):
+        if isinstance(value, str):
+            return json.loads(value)
+        return value
     def _get_bounds(self, date_col: pd.Series) -> pd.Series:
         months = date_col.dt.month

upgini/autofe/feature.py CHANGED Viewed

@@ -112,7 +112,11 @@ class Feature:
     def get_hash(self) -> str:
         return hashlib.sha256(
-            "_".join([self.op.get_hash_component()] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
+            "_".join(
+                [self.op.get_hash_component()]
+                + [ch.op.get_hash_component() for ch in self.children if isinstance(ch, Feature)]
+                + [ch.get_display_name() for ch in self.children]
+            ).encode("utf-8")
         ).hexdigest()[:8]
     def set_alias(self, alias: str) -> "Feature":

upgini/autofe/timeseries/base.py CHANGED Viewed

@@ -1,15 +1,9 @@
 import abc
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 import pandas as pd
 from upgini.autofe.operator import PandasOperator
-# Used in derived classes
-try:
-    from pydantic import field_validator as validator  # V2
-except ImportError:
-    from pydantic import validator  # V1
 class TimeSeriesBase(PandasOperator, abc.ABC):
     is_vector: bool = True
@@ -70,7 +64,7 @@ class TimeSeriesBase(PandasOperator, abc.ABC):
         return base_formula
     @classmethod
-    def _parse_offset_from_formula(cls, formula: str, base_regex: str) -> tuple[Optional[dict], Optional[str]]:
+    def _parse_offset_from_formula(cls, formula: str, base_regex: str) -> Tuple[Optional[Dict], Optional[str]]:
         """
         Parse the offset component from a formula.
@@ -85,7 +79,7 @@ class TimeSeriesBase(PandasOperator, abc.ABC):
         """
         import re
-        offset_regex = f"{base_regex}_offset_(\\d+)([a-zA-Z])"
+        offset_regex = f"{base_regex}_offset_(\\d+)([a-zA-Z])$"
         match = re.match(offset_regex, formula)
         if match:

upgini/autofe/timeseries/cross.py CHANGED Viewed

@@ -1,16 +1,13 @@
+import json
 from typing import Dict, List, Optional
 import numpy as np
 import pandas as pd
-try:
-    from pydantic import field_validator as validator  # V2
-except ImportError:
-    from pydantic import validator  # V1
 from upgini.autofe.all_operators import find_op
 from upgini.autofe.operator import PandasOperator, ParametrizedOperator
 from upgini.autofe.timeseries.base import TimeSeriesBase
+from upgini.autofe.utils import pydantic_validator
 class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
@@ -20,11 +17,24 @@ class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
     left_descriptor: List[str] = []
     right_descriptor: List[str] = []
-    @validator("descriptor_indices")
-    @classmethod
+    @pydantic_validator("descriptor_indices", mode="before")
     def validate_descriptor_indices(cls, v):
+        if isinstance(v, str):
+            v = json.loads(v)
         if not v:
-            raise ValueError("descriptor_indices cannot be empty for CrossSeriesInteraction")
+            raise ValueError("descriptor_indices cannot be empty")
+        return v
+    @pydantic_validator("left_descriptor", "right_descriptor", mode="before")
+    def parse_descriptors(cls, v):
+        if isinstance(v, str):
+            return json.loads(v)
+        return v
+    @pydantic_validator("interaction_op", mode="before")
+    def validate_interaction_op(cls, v):
+        if isinstance(v, str):
+            return find_op(v)
         return v
     def __init__(self, **data):
@@ -83,14 +93,14 @@ class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
         return cls(**params)
-    def get_params(self) -> Dict[str, str | None]:
+    def get_params(self) -> Dict[str, Optional[str]]:
         res = super().get_params()
         res.update(
             {
                 "interaction_op": self._get_interaction_op_name(),
-                "descriptor_indices": self.descriptor_indices,
-                "left_descriptor": self.left_descriptor,
-                "right_descriptor": self.right_descriptor,
+                "descriptor_indices": json.dumps(self.descriptor_indices),
+                "left_descriptor": json.dumps(self.left_descriptor),
+                "right_descriptor": json.dumps(self.right_descriptor),
             }
         )
         return res

upgini/autofe/timeseries/roll.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Dict, Optional
 from upgini.autofe.operator import ParametrizedOperator
 from upgini.autofe.timeseries.base import TimeSeriesBase
+from upgini.autofe.utils import pydantic_validator
 # Roll aggregation functions
 roll_aggregations = {
@@ -12,19 +13,13 @@ roll_aggregations = {
     "iqr": lambda x: x.quantile(0.75) - x.quantile(0.25),
 }
-try:
-    from pydantic import field_validator as validator  # V2
-except ImportError:
-    from pydantic import validator  # V1
 class Roll(TimeSeriesBase, ParametrizedOperator):
     aggregation: str
     window_size: int = 1
     window_unit: str = "D"
-    @validator("window_unit")
-    @classmethod
+    @pydantic_validator("window_unit")
     def validate_window_unit(cls, v: str) -> str:
         try:
             pd.tseries.frequencies.to_offset(v)

upgini/autofe/timeseries/trend.py CHANGED Viewed

@@ -2,10 +2,11 @@ from typing import Dict, Optional, Union
 import numpy as np
 import pandas as pd
+from upgini.autofe.operator import ParametrizedOperator
 from upgini.autofe.timeseries.base import TimeSeriesBase
-class TrendCoefficient(TimeSeriesBase):
+class TrendCoefficient(TimeSeriesBase, ParametrizedOperator):
     name: str = "trend_coef"
     step_size: int = 1
     step_unit: str = "D"

upgini/autofe/utils.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""
+Utility functions for autofe module.
+"""
+import functools
+from typing import Callable
+def get_pydantic_version():
+    """
+    Get the major version of pydantic.
+    Returns:
+        int: Major version number (1 or 2)
+    """
+    try:
+        from pydantic import __version__ as pydantic_version
+        major_version = int(pydantic_version.split(".")[0])
+        return major_version
+    except (ImportError, ValueError):
+        # Default to version 1 if unable to determine
+        return 1
+def pydantic_validator(field_name: str, *fields, mode: str = "before", **kwargs):
+    """
+    A decorator that applies the appropriate Pydantic validator based on the installed version.
+    This decorator handles the differences between Pydantic v1 and v2 validator syntax,
+    making it easier to write code that works with both versions.
+    Args:
+        field_name (str): The name of the field to validate
+        mode (str): The validation mode, either "before" or "after" (for Pydantic v2)
+        **kwargs: Additional arguments to pass to the validator
+    Returns:
+        Callable: A decorator that can be applied to validator methods
+    Example:
+        ```python
+        class MyModel(BaseModel):
+            items: List[int]
+            @pydantic_validator("items")
+            def parse_items(cls, value):
+                if isinstance(value, str):
+                    return [int(x) for x in value.split(",")]
+                return value
+        ```
+    """
+    pydantic_version = get_pydantic_version()
+    if pydantic_version >= 2:
+        # Use field_validator for Pydantic 2.x
+        from pydantic import field_validator
+        def decorator(func: Callable) -> Callable:
+            @field_validator(field_name, *fields, mode=mode, **kwargs)
+            @functools.wraps(func)
+            def wrapper(cls, value, **kw):
+                return func(cls, value)
+            return wrapper
+        return decorator
+    else:
+        # Use validator for Pydantic 1.x
+        from pydantic import validator
+        # Map mode to Pydantic v1 parameters
+        pre = True if mode == "before" else False
+        def decorator(func: Callable) -> Callable:
+            @validator(field_name, *fields, pre=pre, **kwargs)
+            @functools.wraps(func)
+            def wrapper(cls, value, **kw):
+                return func(cls, value)
+            return wrapper
+        return decorator

upgini/dataset.py CHANGED Viewed

@@ -22,6 +22,7 @@ from upgini.metadata import (
     EVAL_SET_INDEX,
     SYSTEM_RECORD_ID,
     TARGET,
+    AutoFEParameters,
     CVType,
     DataType,
     FeaturesFilter,
@@ -558,6 +559,7 @@ class Dataset:  # (pd.DataFrame):
         filter_features: Optional[dict] = None,
         runtime_parameters: Optional[RuntimeParameters] = None,
         metrics_calculation: Optional[bool] = False,
+        auto_fe_parameters: Optional[AutoFEParameters] = None,
     ) -> SearchCustomization:
         # self.logger.info("Constructing search customization")
         search_customization = SearchCustomization(
@@ -585,7 +587,10 @@ class Dataset:  # (pd.DataFrame):
             search_customization.featuresFilter = feature_filter
         search_customization.runtimeParameters.properties["etalon_imbalanced"] = self.imbalanced
+        if auto_fe_parameters is not None:
+            search_customization.runtimeParameters.properties["feature_generation_params.ts.gap_days"] = (
+                auto_fe_parameters.ts_gap_days
+            )
         return search_customization
     def _rename_generate_features(self, runtime_parameters: Optional[RuntimeParameters]) -> Optional[RuntimeParameters]:
@@ -640,6 +645,7 @@ class Dataset:  # (pd.DataFrame):
         max_features: Optional[int] = None,  # deprecated
         filter_features: Optional[dict] = None,  # deprecated
         runtime_parameters: Optional[RuntimeParameters] = None,
+        auto_fe_parameters: Optional[AutoFEParameters] = None,
         force_downsampling: bool = False,
     ) -> SearchTask:
         if self.etalon_def is None:
@@ -658,6 +664,7 @@ class Dataset:  # (pd.DataFrame):
             max_features=max_features,
             filter_features=filter_features,
             runtime_parameters=runtime_parameters,
+            auto_fe_parameters=auto_fe_parameters,
         )
         if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(

upgini 1.2.70a3832.dev2__py3-none-any.whl → 1.2.71__py3-none-any.whl

Potentially problematic release.

upgini 1.2.70a3832.dev2py3-none-any.whl → 1.2.71py3-none-any.whl