PyPI - upgini - Versions diffs - 1.1.280a3418.post2__py3-none-any.whl → 1.2.31__py3-none-any.whl - Mend

upgini 1.1.280a3418.post2py3-none-any.whl → 1.2.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (43) hide show

upgini/__about__.py +1 -1
upgini/__init__.py +4 -20
upgini/autofe/all_operands.py +39 -10
upgini/autofe/binary.py +148 -45
upgini/autofe/date.py +197 -26
upgini/autofe/feature.py +102 -19
upgini/autofe/groupby.py +22 -22
upgini/autofe/operand.py +9 -6
upgini/autofe/unary.py +78 -54
upgini/autofe/vector.py +8 -8
upgini/data_source/data_source_publisher.py +128 -5
upgini/dataset.py +50 -386
upgini/features_enricher.py +936 -541
upgini/http.py +27 -16
upgini/lazy_import.py +35 -0
upgini/metadata.py +84 -59
upgini/metrics.py +164 -34
upgini/normalizer/normalize_utils.py +197 -0
upgini/resource_bundle/strings.properties +66 -51
upgini/search_task.py +10 -4
upgini/utils/Roboto-Regular.ttf +0 -0
upgini/utils/base_search_key_detector.py +14 -12
upgini/utils/country_utils.py +16 -0
upgini/utils/custom_loss_utils.py +39 -36
upgini/utils/datetime_utils.py +98 -45
upgini/utils/deduplicate_utils.py +135 -112
upgini/utils/display_utils.py +46 -15
upgini/utils/email_utils.py +54 -16
upgini/utils/feature_info.py +172 -0
upgini/utils/features_validator.py +34 -20
upgini/utils/ip_utils.py +100 -1
upgini/utils/phone_utils.py +343 -0
upgini/utils/postal_code_utils.py +34 -0
upgini/utils/sklearn_ext.py +28 -19
upgini/utils/target_utils.py +113 -57
upgini/utils/warning_counter.py +1 -0
upgini/version_validator.py +8 -4
{upgini-1.1.280a3418.post2.dist-info → upgini-1.2.31.dist-info}/METADATA +31 -16
upgini-1.2.31.dist-info/RECORD +65 -0
upgini/normalizer/phone_normalizer.py +0 -340
upgini-1.1.280a3418.post2.dist-info/RECORD +0 -62
{upgini-1.1.280a3418.post2.dist-info → upgini-1.2.31.dist-info}/WHEEL +0 -0
{upgini-1.1.280a3418.post2.dist-info → upgini-1.2.31.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.1.~~280a3418-2~~"
1	+ __version__ = "1.2.31"

upgini/__init__.py CHANGED Viewed

@@ -1,21 +1,5 @@
-from typing import List
+from upgini.features_enricher import FeaturesEnricher  # noqa: F401
+from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType  # noqa: F401
+import warnings
-from .dataset import Dataset
-from .features_enricher import FeaturesEnricher  # noqa: F401
-from .metadata import (  # noqa: F401
-    FileColumnMeaningType,
-    FileMetrics,
-    ModelTaskType,
-    SearchKey,
-)
-from .search_task import SearchTask
-def search_history() -> List[SearchTask]:
-    # TODO
-    return []
-def datasets_history() -> List[Dataset]:
-    # TODO
-    return []
+warnings.filterwarnings("ignore", category=UserWarning, module="_distutils_hack")

upgini/autofe/all_operands.py CHANGED Viewed

@@ -1,10 +1,32 @@
+from copy import deepcopy
 from typing import Dict
-from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
-from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
-from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
+from upgini.autofe.binary import (
+    Add,
+    Combine,
+    CombineThenFreq,
+    Distance,
+    Divide,
+    JaroWinklerSim1,
+    JaroWinklerSim2,
+    LevenshteinSim,
+    Max,
+    Min,
+    Multiply,
+    Sim,
+    Subtract,
+)
+from upgini.autofe.date import (
+    DateDiff,
+    DateDiffType2,
+    DateListDiff,
+    DateListDiffBounded,
+    DatePercentile,
+    DatePercentileMethod2,
+)
+from upgini.autofe.groupby import GroupByThenAgg, GroupByThenFreq, GroupByThenNUnique, GroupByThenRank
 from upgini.autofe.operand import Operand
-from upgini.autofe.unary import Abs, Bin, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
+from upgini.autofe.unary import Abs, Embeddings, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
 from upgini.autofe.vector import Mean, Sum
 ALL_OPERANDS: Dict[str, Operand] = {
@@ -32,10 +54,10 @@ ALL_OPERANDS: Dict[str, Operand] = {
         GroupByThenAgg(name="GroupByThenMedian", agg="median"),
         GroupByThenAgg(name="GroupByThenStd", output_type="float", agg="std"),
         GroupByThenRank(),
-        Operand(name="Combine", has_symmetry_importance=True, output_type="object", is_categorical=True),
-        Operand(name="CombineThenFreq", has_symmetry_importance=True, output_type="float"),
-        Operand(name="GroupByThenNUnique", output_type="int", is_vectorizable=True, is_grouping=True),
-        Operand(name="GroupByThenFreq", output_type="float", is_grouping=True),
+        Combine(),
+        CombineThenFreq(),
+        GroupByThenNUnique(),
+        GroupByThenFreq(),
         Sim(),
         DateDiff(),
         DateDiffType2(),
@@ -49,10 +71,17 @@ ALL_OPERANDS: Dict[str, Operand] = {
         DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
         DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
         DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
-        Bin(),
+        DatePercentile(),
+        DatePercentileMethod2(),
+        Norm(),
+        JaroWinklerSim1(),
+        JaroWinklerSim2(),
+        LevenshteinSim(),
+        Distance(),
+        Embeddings(),
     ]
 }
 def find_op(name):
-    return ALL_OPERANDS.get(name)
+    return deepcopy(ALL_OPERANDS.get(name))

upgini/autofe/binary.py CHANGED Viewed

@@ -1,35 +1,40 @@
+import abc
+from typing import Optional
+import Levenshtein
 import numpy as np
 import pandas as pd
-from numpy import dot
-from numpy.linalg import norm
+from jarowinkler import jarowinkler_similarity
 from upgini.autofe.operand import PandasOperand, VectorizableMixin
 class Min(PandasOperand):
-    name = "min"
-    is_binary = True
-    has_symmetry_importance = True
+    name: str = "min"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return np.minimum(left, right)
 class Max(PandasOperand):
-    name = "max"
-    is_binary = True
-    has_symmetry_importance = True
+    name: str = "max"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return np.maximum(left, right)
 class Add(PandasOperand, VectorizableMixin):
-    name = "+"
-    alias = "add"
-    is_binary = True
-    has_symmetry_importance = True
-    is_vectorizable = True
+    name: str = "+"
+    alias: str = "add"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left + right
@@ -43,11 +48,12 @@ class Add(PandasOperand, VectorizableMixin):
 class Subtract(PandasOperand, VectorizableMixin):
-    name = "-"
-    alias = "sub"
-    is_binary = True
-    has_symmetry_importance = True
-    is_vectorizable = True
+    name: str = "-"
+    alias: str = "sub"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left - right
@@ -61,11 +67,12 @@ class Subtract(PandasOperand, VectorizableMixin):
 class Multiply(PandasOperand, VectorizableMixin):
-    name = "*"
-    alias = "mul"
-    is_binary = True
-    has_symmetry_importance = True
-    is_vectorizable = True
+    name: str = "*"
+    alias: str = "mul"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left * right
@@ -79,12 +86,12 @@ class Multiply(PandasOperand, VectorizableMixin):
 class Divide(PandasOperand, VectorizableMixin):
-    name = "/"
-    alias = "div"
-    is_binary = True
-    has_symmetry_importance = True
-    is_vectorizable = True
-    output_type = "float"
+    name: str = "/"
+    alias: str = "div"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
+    output_type: Optional[str] = "float"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left / right.replace(0, np.nan)
@@ -98,10 +105,10 @@ class Divide(PandasOperand, VectorizableMixin):
 class Combine(PandasOperand):
-    name = "Combine"
-    is_binary = True
-    has_symmetry_importance = True
-    output_type = "object"
+    name: str = "Combine"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
+    output_type: Optional[str] = "object"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         temp = left.astype(str) + "_" + right.astype(str)
@@ -110,12 +117,13 @@ class Combine(PandasOperand):
 class CombineThenFreq(PandasOperand):
-    name = "CombineThenFreq"
-    is_binary = True
-    has_symmetry_importance = True
-    output_type = "float"
-    is_distribution_dependent = True
-    input_type = "discrete"
+    name: str = "CombineThenFreq"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    output_type: Optional[str] = "float"
+    is_distribution_dependent: bool = True
+    input_type: Optional[str] = "discrete"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         temp = left.astype(str) + "_" + right.astype(str)
@@ -124,11 +132,106 @@ class CombineThenFreq(PandasOperand):
         self._loc(temp, value_counts)
-class Sim(PandasOperand):
-    name = "sim"
-    is_binary = True
-    output_type = "float"
-    has_symmetry_importance = True
+class Distance(PandasOperand):
+    name: str = "dist"
+    is_binary: bool = True
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
-        return dot(left, right) / (norm(left) * norm(right))
+        return pd.Series(
+            1 - self.__dot(left, right) / (self.__norm(left) * self.__norm(right)), index=left.index
+        ).astype(np.float64)
+    # row-wise dot product, handling None values
+    def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
+        left = left.apply(lambda x: np.array(x))
+        right = right.apply(lambda x: np.array(x))
+        res = (left.dropna() * right.dropna()).apply(np.sum)
+        res = res.reindex(left.index.union(right.index))
+        return res
+    # Calculate the norm of a vector, handling None values
+    def __norm(self, vector: pd.Series) -> pd.Series:
+        vector = vector.fillna(np.nan)
+        return np.sqrt(self.__dot(vector, vector))
+# Left for backward compatibility
+class Sim(Distance):
+    name: str = "sim"
+    is_binary: bool = True
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
+        return 1 - super().calculate_binary(left, right)
+class StringSim(PandasOperand, abc.ABC):
+    def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
+        sims = []
+        for i in left.index:
+            left_i = self._prepare_value(left.get(i))
+            right_i = self._prepare_value(right.get(i))
+            if left_i is not None and right_i is not None:
+                sims.append(self._similarity(left_i, right_i))
+            else:
+                sims.append(None)
+        return pd.Series(sims, index=left.index)
+    @abc.abstractmethod
+    def _prepare_value(self, value: Optional[str]) -> Optional[str]:
+        pass
+    @abc.abstractmethod
+    def _similarity(self, left: str, right: str) -> float:
+        pass
+class JaroWinklerSim1(StringSim):
+    name: str = "sim_jw1"
+    is_binary: bool = True
+    input_type: Optional[str] = "string"
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    def _prepare_value(self, value: Optional[str]) -> Optional[str]:
+        return value
+    def _similarity(self, left: str, right: str) -> float:
+        return jarowinkler_similarity(left, right)
+class JaroWinklerSim2(StringSim):
+    name: str = "sim_jw2"
+    is_binary: bool = True
+    input_type: Optional[str] = "string"
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    def _prepare_value(self, value: Optional[str]) -> Optional[str]:
+        return value[::-1] if value is not None else None
+    def _similarity(self, left: str, right: str) -> float:
+        return jarowinkler_similarity(left, right)
+class LevenshteinSim(StringSim):
+    name: str = "sim_lv"
+    is_binary: bool = True
+    input_type: Optional[str] = "string"
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    def _prepare_value(self, value: Optional[str]) -> Optional[str]:
+        return value
+    def _similarity(self, left: str, right: str) -> float:
+        return 1 - Levenshtein.distance(left, right) / max(len(left), len(right))

upgini/autofe/date.py CHANGED Viewed

@@ -1,13 +1,20 @@
-from typing import Any, Optional, Union
+import abc
+import json
+from typing import Any, Dict, List, Optional, Union
 import numpy as np
 import pandas as pd
 from pandas.core.arrays.timedeltas import TimedeltaArray
-from pydantic import BaseModel
+from pydantic import BaseModel, __version__ as pydantic_version
 from upgini.autofe.operand import PandasOperand
+def get_pydantic_version():
+    major_version = int(pydantic_version.split('.')[0])
+    return major_version
 class DateDiffMixin(BaseModel):
     diff_unit: str = "D"
     left_unit: Optional[str] = None
@@ -19,34 +26,76 @@ class DateDiffMixin(BaseModel):
         if isinstance(x, pd.DataFrame):
             return x.apply(lambda y: self._convert_to_date(y, unit), axis=1)
-        return pd.to_datetime(x, unit=unit)
+        return pd.to_datetime(x, unit=unit, errors="coerce")
+    def _convert_diff_to_unit(self, diff: Union[pd.Series, TimedeltaArray]) -> Union[pd.Series, TimedeltaArray]:
+        if self.diff_unit == "D":
+            if isinstance(diff, pd.Series) and diff.dtype == "object":
+                return diff.apply(lambda x: None if isinstance(x, float) and np.isnan(x) else x.days)
+            else:
+                return diff / np.timedelta64(1, self.diff_unit)
+        elif self.diff_unit == "Y":
+            if isinstance(diff, TimedeltaArray):
+                return (diff / 365 / 24 / 60 / 60 / 10**9).astype(int)
+            else:
+                return (diff / 365 / 24 / 60 / 60 / 10**9).dt.nanoseconds
+        else:
+            raise Exception(f"Unsupported difference unit: {self.diff_unit}")
 class DateDiff(PandasOperand, DateDiffMixin):
-    name = "date_diff"
-    is_binary = True
-    has_symmetry_importance = True
+    name: str = "date_diff"
+    alias: Optional[str] = "date_diff_type1"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
+    replace_negative: bool = False
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        res.update(
+            {
+                "diff_unit": self.diff_unit,
+                "left_unit": self.left_unit,
+                "right_unit": self.right_unit,
+                "replace_negative": self.replace_negative,
+            }
+        )
+        return res
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         left = self._convert_to_date(left, self.left_unit)
         right = self._convert_to_date(right, self.right_unit)
-        return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
+        diff = self._convert_diff_to_unit(left.dt.date - right.dt.date)
+        return self.__replace_negative(diff)
     def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
-        x[x < 0] = None
+        if self.replace_negative:
+            x[x < 0] = None
         return x
 class DateDiffType2(PandasOperand, DateDiffMixin):
-    name = "date_diff_type2"
-    is_binary = True
-    has_symmetry_importance = True
+    name: str = "date_diff_type2"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        res.update(
+            {
+                "diff_unit": self.diff_unit,
+                "left_unit": self.left_unit,
+                "right_unit": self.right_unit,
+            }
+        )
+        return res
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         left = self._convert_to_date(left, self.left_unit)
         right = self._convert_to_date(right, self.right_unit)
         future = right + (left.dt.year - right.dt.year).apply(
-            lambda y: np.datetime64("NaT") if np.isnan(y) else pd.tseries.offsets.DateOffset(years=y)
+            lambda y: pd.tseries.offsets.DateOffset(years=0 if np.isnan(y) else y)
         )
         future = pd.to_datetime(future)
         before = future[future < left]
@@ -57,12 +106,28 @@ class DateDiffType2(PandasOperand, DateDiffMixin):
 _ext_aggregations = {"nunique": (lambda x: len(np.unique(x)), 0), "count": (len, 0)}
+_count_aggregations = ["nunique", "count"]
 class DateListDiff(PandasOperand, DateDiffMixin):
-    is_binary = True
-    has_symmetry_importance = True
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
     aggregation: str
+    replace_negative: bool = False
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        res.update(
+            {
+                "aggregation": self.aggregation,
+                "diff_unit": self.diff_unit,
+                "left_unit": self.left_unit,
+                "right_unit": self.right_unit,
+                "replace_negative": self.replace_negative,
+            }
+        )
+        return res
     def __init__(self, **data: Any) -> None:
         if "name" not in data:
@@ -71,18 +136,28 @@ class DateListDiff(PandasOperand, DateDiffMixin):
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         left = self._convert_to_date(left, self.left_unit)
-        right = right.apply(lambda x: pd.arrays.DatetimeArray(self._convert_to_date(x, self.right_unit)))
+        right_mask = right.apply(lambda x: len(x) > 0)
+        mask = left.notna() & right.notna() & right_mask
+        right_masked = right[mask].apply(lambda x: pd.arrays.DatetimeArray(self._convert_to_date(x, self.right_unit)))
+        if len(right_masked) == 0:
+            diff = []
+        elif len(right_masked) < 2:
+            diff = [left[mask].iloc[0] - right_masked.iloc[0]]
+        else:
+            diff = left[mask] - right_masked.values
-        return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
+        res_masked = pd.Series(diff, index=left[mask].index).apply(lambda x: self._agg(self._diff(x)))
+        res = res_masked.reindex(left.index.union(right.index))
+        if self.aggregation in _count_aggregations:
+            res[~right_mask] = 0.0
+        res = res.astype(np.float64)
+        return res
     def _diff(self, x: TimedeltaArray):
-        if self.diff_unit == "Y":
-            x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
-        elif self.diff_unit == "M":
-            raise Exception("Unsupported difference unit: Month")
-        else:
-            x = x / np.timedelta64(1, self.diff_unit)
-        return x[x > 0]
+        x = self._convert_diff_to_unit(x)
+        return x[x > 0] if self.replace_negative else x
     def _agg(self, x):
         method = getattr(np, self.aggregation, None)
@@ -96,8 +171,8 @@ class DateListDiff(PandasOperand, DateDiffMixin):
 class DateListDiffBounded(DateListDiff):
-    lower_bound: Optional[int]
-    upper_bound: Optional[int]
+    lower_bound: Optional[int] = None
+    upper_bound: Optional[int] = None
     def __init__(self, **data: Any) -> None:
         if "name" not in data:
@@ -114,5 +189,101 @@ class DateListDiffBounded(DateListDiff):
         super().__init__(**data)
     def _agg(self, x):
-        x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
+        x = x[
+            (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
+            & (x < (self.upper_bound if self.upper_bound is not None else np.inf))
+        ]
         return super()._agg(x)
+class DatePercentileBase(PandasOperand, abc.ABC):
+    is_binary: bool = True
+    output_type: Optional[str] = "float"
+    date_unit: Optional[str] = None
+    def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
+        # Assuming that left is a date column, right is a feature column
+        left = pd.to_datetime(left, unit=self.date_unit)
+        bounds = self._get_bounds(left)
+        return right.index.to_series().apply(lambda i: self._perc(right[i], bounds[i]))
+    @abc.abstractmethod
+    def _get_bounds(self, date_col: pd.Series) -> pd.Series:
+        pass
+    def _perc(self, f, bounds):
+        hit = np.where(f >= np.array(bounds))[0]
+        if hit.size > 0:
+            return np.max(hit) + 1
+        else:
+            return np.nan
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        res.update(
+            {
+                "date_unit": self.date_unit,
+            }
+        )
+        return res
+class DatePercentile(DatePercentileBase):
+    name: str = "date_per"
+    alias: Optional[str] = "date_per_method1"
+    zero_month: Optional[int] = None
+    zero_year: Optional[int] = None
+    zero_bounds: Optional[List[float]] = None
+    step: int = 30
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        res.update(
+            {
+                "zero_month": self.zero_month,
+                "zero_year": self.zero_year,
+                "zero_bounds": self.zero_bounds,
+                "step": self.step,
+            }
+        )
+        return res
+    # Check Pydantic version
+    if get_pydantic_version() >= 2:
+        # Use @field_validator for Pydantic 2.x
+        from pydantic import field_validator
+        @field_validator('zero_bounds', mode='before')
+        def parse_zero_bounds(cls, value):
+            if isinstance(value, str):
+                return json.loads(value)
+            return value
+    else:
+        # Use @validator for Pydantic 1.x
+        from pydantic import validator
+        @validator('zero_bounds', pre=True)
+        def parse_zero_bounds(cls, value):
+            if isinstance(value, str):
+                return json.loads(value)
+            return value
+    def _get_bounds(self, date_col: pd.Series) -> pd.Series:
+        months = date_col.dt.month
+        years = date_col.dt.year
+        month_diffs = 12 * (years - (self.zero_year or 0)) + (months - (self.zero_month or 0))
+        return month_diffs.apply(
+            lambda d: np.array(self.zero_bounds if self.zero_bounds is not None else []) + d * self.step
+        )
+class DatePercentileMethod2(DatePercentileBase):
+    name: str = "date_per_method2"
+    def _get_bounds(self, date_col: pd.Series) -> pd.Series:
+        pass

upgini 1.1.280a3418.post2__py3-none-any.whl → 1.2.31__py3-none-any.whl

Potentially problematic release.

upgini 1.1.280a3418.post2py3-none-any.whl → 1.2.31py3-none-any.whl