PyPI - upgini - Versions diffs - 1.1.316__py3-none-any.whl → 1.1.316a1__py3-none-any.whl - Mend

upgini 1.1.316py3-none-any.whl → 1.1.316a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (20) hide show

upgini/__about__.py +1 -1
upgini/autofe/binary.py +72 -75
upgini/autofe/date.py +21 -21
upgini/autofe/feature.py +2 -2
upgini/autofe/groupby.py +22 -22
upgini/autofe/operand.py +4 -4
upgini/autofe/unary.py +47 -46
upgini/autofe/vector.py +8 -8
upgini/features_enricher.py +3 -2
upgini/http.py +20 -31
upgini/lazy_import.py +14 -1
upgini/metadata.py +57 -57
upgini/normalizer/normalize_utils.py +1 -2
upgini/utils/datetime_utils.py +5 -5
upgini/utils/phone_utils.py +5 -7
upgini/utils/target_utils.py +4 -1
{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/METADATA +3 -3
{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/RECORD +20 -20
{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/WHEEL +0 -0
{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.1.~~316~~"
1	+ __version__ = "1.1.316a1"

upgini/autofe/binary.py CHANGED Viewed

@@ -9,32 +9,32 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
 class Min(PandasOperand):
-    name = "min"
-    is_binary = True
-    is_symmetrical = True
-    has_symmetry_importance = True
+    name: str = "min"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return np.minimum(left, right)
 class Max(PandasOperand):
-    name = "max"
-    is_binary = True
-    is_symmetrical = True
-    has_symmetry_importance = True
+    name: str = "max"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return np.maximum(left, right)
 class Add(PandasOperand, VectorizableMixin):
-    name = "+"
-    alias = "add"
-    is_binary = True
-    is_symmetrical = True
-    has_symmetry_importance = True
-    is_vectorizable = True
+    name: str = "+"
+    alias: str = "add"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left + right
@@ -48,12 +48,12 @@ class Add(PandasOperand, VectorizableMixin):
 class Subtract(PandasOperand, VectorizableMixin):
-    name = "-"
-    alias = "sub"
-    is_binary = True
-    is_symmetrical = True
-    has_symmetry_importance = True
-    is_vectorizable = True
+    name: str = "-"
+    alias: str = "sub"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left - right
@@ -67,12 +67,12 @@ class Subtract(PandasOperand, VectorizableMixin):
 class Multiply(PandasOperand, VectorizableMixin):
-    name = "*"
-    alias = "mul"
-    is_binary = True
-    is_symmetrical = True
-    has_symmetry_importance = True
-    is_vectorizable = True
+    name: str = "*"
+    alias: str = "mul"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left * right
@@ -86,12 +86,12 @@ class Multiply(PandasOperand, VectorizableMixin):
 class Divide(PandasOperand, VectorizableMixin):
-    name = "/"
-    alias = "div"
-    is_binary = True
-    has_symmetry_importance = True
-    is_vectorizable = True
-    output_type = "float"
+    name: str = "/"
+    alias: str = "div"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
+    is_vectorizable: bool = True
+    output_type: Optional[str] = "float"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return left / right.replace(0, np.nan)
@@ -105,10 +105,10 @@ class Divide(PandasOperand, VectorizableMixin):
 class Combine(PandasOperand):
-    name = "Combine"
-    is_binary = True
-    has_symmetry_importance = True
-    output_type = "object"
+    name: str = "Combine"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
+    output_type: Optional[str] = "object"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         temp = left.astype(str) + "_" + right.astype(str)
@@ -117,13 +117,13 @@ class Combine(PandasOperand):
 class CombineThenFreq(PandasOperand):
-    name = "CombineThenFreq"
-    is_binary = True
-    is_symmetrical = True
-    has_symmetry_importance = True
-    output_type = "float"
-    is_distribution_dependent = True
-    input_type = "discrete"
+    name: str = "CombineThenFreq"
+    is_binary: bool = True
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
+    output_type: Optional[str] = "float"
+    is_distribution_dependent: bool = True
+    input_type: Optional[str] = "discrete"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         temp = left.astype(str) + "_" + right.astype(str)
@@ -133,15 +133,15 @@ class CombineThenFreq(PandasOperand):
 class Distance(PandasOperand):
-    name = "dist"
-    is_binary = True
-    output_type = "float"
-    is_symmetrical = True
-    has_symmetry_importance = True
+    name: str = "dist"
+    is_binary: bool = True
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return pd.Series(
-            1 - self.__dot(left, right) / (self.__norm(left) * self.__norm(right)), index=left.index
+            1 - self.__dot(left, right) / (self.__dot(left, left) * self.__dot(right, right)), index=left.index
         )
     # row-wise dot product
@@ -152,17 +152,14 @@ class Distance(PandasOperand):
         res = res.reindex(left.index.union(right.index))
         return res
-    def __norm(self, vector: pd.Series) -> pd.Series:
-        return np.sqrt(self.__dot(vector, vector))
 # Left for backward compatibility
 class Sim(Distance):
-    name = "sim"
-    is_binary = True
-    output_type = "float"
-    is_symmetrical = True
-    has_symmetry_importance = True
+    name: str = "sim"
+    is_binary: bool = True
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         return 1 - super().calculate_binary(left, right)
@@ -191,12 +188,12 @@ class StringSim(PandasOperand, abc.ABC):
 class JaroWinklerSim1(StringSim):
-    name = "sim_jw1"
-    is_binary = True
-    input_type = "string"
-    output_type = "float"
-    is_symmetrical = True
-    has_symmetry_importance = True
+    name: str = "sim_jw1"
+    is_binary: bool = True
+    input_type: Optional[str] = "string"
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def _prepare_value(self, value: Optional[str]) -> Optional[str]:
         return value
@@ -206,12 +203,12 @@ class JaroWinklerSim1(StringSim):
 class JaroWinklerSim2(StringSim):
-    name = "sim_jw2"
-    is_binary = True
-    input_type = "string"
-    output_type = "float"
-    is_symmetrical = True
-    has_symmetry_importance = True
+    name: str = "sim_jw2"
+    is_binary: bool = True
+    input_type: Optional[str] = "string"
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def _prepare_value(self, value: Optional[str]) -> Optional[str]:
         return value[::-1] if value is not None else None
@@ -221,12 +218,12 @@ class JaroWinklerSim2(StringSim):
 class LevenshteinSim(StringSim):
-    name = "sim_lv"
-    is_binary = True
-    input_type = "string"
-    output_type = "float"
-    is_symmetrical = True
-    has_symmetry_importance = True
+    name: str = "sim_lv"
+    is_binary: bool = True
+    input_type: Optional[str] = "string"
+    output_type: Optional[str] = "float"
+    is_symmetrical: bool = True
+    has_symmetry_importance: bool = True
     def _prepare_value(self, value: Optional[str]) -> Optional[str]:
         return value

upgini/autofe/date.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional, Union
 import numpy as np
 import pandas as pd
 from pandas.core.arrays.timedeltas import TimedeltaArray
-from pydantic import BaseModel, validator
+from pydantic import BaseModel, field_validator
 from upgini.autofe.operand import PandasOperand
@@ -38,10 +38,10 @@ class DateDiffMixin(BaseModel):
 class DateDiff(PandasOperand, DateDiffMixin):
-    name = "date_diff"
-    alias = "date_diff_type1"
-    is_binary = True
-    has_symmetry_importance = True
+    name: str = "date_diff"
+    alias: Optional[str] = "date_diff_type1"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
     replace_negative: bool = False
@@ -70,9 +70,9 @@ class DateDiff(PandasOperand, DateDiffMixin):
 class DateDiffType2(PandasOperand, DateDiffMixin):
-    name = "date_diff_type2"
-    is_binary = True
-    has_symmetry_importance = True
+    name: str = "date_diff_type2"
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
     def get_params(self) -> Dict[str, Optional[str]]:
         res = super().get_params()
@@ -104,8 +104,8 @@ _count_aggregations = ["nunique", "count"]
 class DateListDiff(PandasOperand, DateDiffMixin):
-    is_binary = True
-    has_symmetry_importance = True
+    is_binary: bool = True
+    has_symmetry_importance: bool = True
     aggregation: str
     replace_negative: bool = False
@@ -165,8 +165,8 @@ class DateListDiff(PandasOperand, DateDiffMixin):
 class DateListDiffBounded(DateListDiff):
-    lower_bound: Optional[int]
-    upper_bound: Optional[int]
+    lower_bound: Optional[int] = None
+    upper_bound: Optional[int] = None
     def __init__(self, **data: Any) -> None:
         if "name" not in data:
@@ -191,8 +191,8 @@ class DateListDiffBounded(DateListDiff):
 class DatePercentileBase(PandasOperand, abc.ABC):
-    is_binary = True
-    output_type = "float"
+    is_binary: bool = True
+    output_type: Optional[str] = "float"
     date_unit: Optional[str] = None
@@ -226,12 +226,12 @@ class DatePercentileBase(PandasOperand, abc.ABC):
 class DatePercentile(DatePercentileBase):
-    name = "date_per"
-    alias = "date_per_method1"
+    name: str = "date_per"
+    alias: Optional[str] = "date_per_method1"
-    zero_month: Optional[int]
-    zero_year: Optional[int]
-    zero_bounds: Optional[List[float]]
+    zero_month: Optional[int] = None
+    zero_year: Optional[int] = None
+    zero_bounds: Optional[List[float]] = None
     step: int = 30
     def get_params(self) -> Dict[str, Optional[str]]:
@@ -246,7 +246,7 @@ class DatePercentile(DatePercentileBase):
         )
         return res
-    @validator("zero_bounds", pre=True)
+    @field_validator("zero_bounds", mode="before")
     def validate_bounds(cls, value):
         if value is None or isinstance(value, list):
             return value
@@ -264,7 +264,7 @@ class DatePercentile(DatePercentileBase):
 class DatePercentileMethod2(DatePercentileBase):
-    name = "date_per_method2"
+    name: str = "date_per_method2"
     def _get_bounds(self, date_col: pd.Series) -> pd.Series:
         pass

upgini/autofe/feature.py CHANGED Viewed

@@ -82,9 +82,9 @@ class Feature:
         self.alias = alias
     def set_op_params(self, params: Optional[Dict[str, str]]) -> "Feature":
-        obj_dict = self.op.dict().copy()
+        obj_dict = self.op.model_dump().copy()
         obj_dict.update(params or {})
-        self.op = self.op.__class__.parse_obj(obj_dict)
+        self.op = self.op.__class__.model_validate(obj_dict)
         self.op.set_params(params)
         for child in self.children:

upgini/autofe/groupby.py CHANGED Viewed

@@ -7,9 +7,9 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
 class GroupByThenAgg(PandasOperand, VectorizableMixin):
     agg: Optional[str]
-    is_vectorizable = True
-    is_grouping = True
-    is_distribution_dependent = True
+    is_vectorizable: bool = True
+    is_grouping: bool = True
+    is_distribution_dependent: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         temp = left.groupby(right).agg(self.agg)
@@ -24,17 +24,17 @@ class GroupByThenAgg(PandasOperand, VectorizableMixin):
 class GroupByThenMedian(GroupByThenAgg):
-    name = "GroupByThenMedian"
-    pandas_agg = "median"
-    is_distribution_dependent = True
+    name: str = "GroupByThenMedian"
+    pandas_agg: str = "median"
+    is_distribution_dependent: bool = True
 class GroupByThenRank(PandasOperand, VectorizableMixin):
-    name = "GroupByThenRank"
-    is_vectorizable = True
-    is_grouping = True
-    output_type = "float"
-    is_distribution_dependent = True
+    name: str = "GroupByThenRank"
+    is_vectorizable: bool = True
+    is_grouping: bool = True
+    output_type: Optional[str] = "float"
+    is_distribution_dependent: bool = True
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         temp = pd.DataFrame(left[~right.isna()].groupby(right).rank(ascending=True, pct=True)).reset_index()
@@ -49,12 +49,12 @@ class GroupByThenRank(PandasOperand, VectorizableMixin):
 class GroupByThenNUnique(PandasOperand, VectorizableMixin):
-    name = "GroupByThenNUnique"
-    is_vectorizable = True
-    is_grouping = True
-    output_type = "int"
-    is_distribution_dependent = True
-    input_type = "discrete"
+    name: str = "GroupByThenNUnique"
+    is_vectorizable: bool = True
+    is_grouping: bool = True
+    output_type: Optional[str] = "int"
+    is_distribution_dependent: bool = True
+    input_type: Optional[str] = "discrete"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         nunique = left.groupby(right).nunique()
@@ -69,11 +69,11 @@ class GroupByThenNUnique(PandasOperand, VectorizableMixin):
 class GroupByThenFreq(PandasOperand):
-    name = "GroupByThenFreq"
-    is_grouping = True
-    output_type = "float"
-    is_distribution_dependent = True
-    input_type = "discrete"
+    name: str = "GroupByThenFreq"
+    is_grouping: bool = True
+    output_type: Optional[str] = "float"
+    is_distribution_dependent: bool = True
+    input_type: Optional[str] = "discrete"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         def _f(x):

upgini/autofe/operand.py CHANGED Viewed

@@ -8,19 +8,19 @@ from pydantic import BaseModel
 class Operand(BaseModel):
     name: str
-    alias: Optional[str]
+    alias: Optional[str] = None
     is_unary: bool = False
     is_symmetrical: bool = False
     has_symmetry_importance: bool = False
-    input_type: Optional[str]
-    output_type: Optional[str]
+    input_type: Optional[str] = None
+    output_type: Optional[str] = None
     is_categorical: bool = False
     is_vectorizable: bool = False
     is_grouping: bool = False
     is_binary: bool = False
     is_vector: bool = False
     is_distribution_dependent: bool = False
-    params: Optional[Dict[str, str]]
+    params: Optional[Dict[str, str]] = None
     def set_params(self, params: Dict[str, str]):
         self.params = params

upgini/autofe/unary.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from typing import Optional
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import Normalizer
@@ -6,10 +7,10 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
 class Abs(PandasOperand, VectorizableMixin):
-    name = "abs"
-    is_unary = True
-    is_vectorizable = True
-    group_index = 0
+    name: str = "abs"
+    is_unary: bool = True
+    is_vectorizable: bool = True
+    group_index: int = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return data.abs()
@@ -19,11 +20,11 @@ class Abs(PandasOperand, VectorizableMixin):
 class Log(PandasOperand, VectorizableMixin):
-    name = "log"
-    is_unary = True
-    is_vectorizable = True
-    output_type = "float"
-    group_index = 0
+    name: str = "log"
+    is_unary: bool = True
+    is_vectorizable: bool = True
+    output_type: Optional[str] = "float"
+    group_index: int = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return self._round_value(np.log(np.abs(data.replace(0, np.nan))), 10)
@@ -33,11 +34,11 @@ class Log(PandasOperand, VectorizableMixin):
 class Sqrt(PandasOperand, VectorizableMixin):
-    name = "sqrt"
-    is_unary = True
-    is_vectorizable = True
-    output_type = "float"
-    group_index = 0
+    name: str = "sqrt"
+    is_unary: bool = True
+    is_vectorizable: bool = True
+    output_type: Optional[str] = "float"
+    group_index: int = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return self._round_value(np.sqrt(np.abs(data)))
@@ -47,10 +48,10 @@ class Sqrt(PandasOperand, VectorizableMixin):
 class Square(PandasOperand, VectorizableMixin):
-    name = "square"
-    is_unary = True
-    is_vectorizable = True
-    group_index = 0
+    name: str = "square"
+    is_unary: bool = True
+    is_vectorizable: bool = True
+    group_index: int = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return np.square(data)
@@ -60,11 +61,11 @@ class Square(PandasOperand, VectorizableMixin):
 class Sigmoid(PandasOperand, VectorizableMixin):
-    name = "sigmoid"
-    is_unary = True
-    is_vectorizable = True
-    output_type = "float"
-    group_index = 0
+    name: str = "sigmoid"
+    is_unary: bool = True
+    is_vectorizable: bool = True
+    output_type: Optional[str] = "float"
+    group_index: int = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return self._round_value(1 / (1 + np.exp(-data)))
@@ -74,12 +75,12 @@ class Sigmoid(PandasOperand, VectorizableMixin):
 class Floor(PandasOperand, VectorizableMixin):
-    name = "floor"
-    is_unary = True
-    is_vectorizable = True
-    output_type = "int"
-    input_type = "continuous"
-    group_index = 0
+    name: str = "floor"
+    is_unary: bool = True
+    is_vectorizable: bool = True
+    output_type: Optional[str] = "int"
+    input_type: Optional[str] = "continuous"
+    group_index: int = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return np.floor(data)
@@ -89,11 +90,11 @@ class Floor(PandasOperand, VectorizableMixin):
 class Residual(PandasOperand, VectorizableMixin):
-    name = "residual"
-    is_unary = True
-    is_vectorizable = True
-    input_type = "continuous"
-    group_index = 0
+    name: str = "residual"
+    is_unary: bool = True
+    is_vectorizable: bool = True
+    input_type: Optional[str] = "continuous"
+    group_index: int = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return data - np.floor(data)
@@ -103,11 +104,11 @@ class Residual(PandasOperand, VectorizableMixin):
 class Freq(PandasOperand):
-    name = "freq"
-    is_unary = True
-    output_type = "float"
-    is_distribution_dependent = True
-    input_type = "discrete"
+    name: str = "freq"
+    is_unary: bool = True
+    output_type: Optional[str] = "float"
+    is_distribution_dependent: bool = True
+    input_type: Optional[str] = "discrete"
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         value_counts = data.value_counts(normalize=True)
@@ -115,9 +116,9 @@ class Freq(PandasOperand):
 class Norm(PandasOperand):
-    name = "norm"
-    is_unary = True
-    output_type = "float"
+    name: str = "norm"
+    is_unary: bool = True
+    output_type: Optional[str] = "float"
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         data_dropna = data.dropna()
@@ -131,7 +132,7 @@ class Norm(PandasOperand):
 class Embeddings(PandasOperand):
-    name = "emb"
-    is_unary = True
-    input_type = "string"
-    output_type = "vector"
+    name: str = "emb"
+    is_unary: bool = True
+    input_type: Optional[str] = "string"
+    output_type: Optional[str] = "vector"

upgini/autofe/vector.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Optional
 import pandas as pd
@@ -6,19 +6,19 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
 class Mean(PandasOperand, VectorizableMixin):
-    name = "mean"
-    output_type = "float"
-    is_vector = True
-    group_index = 0
+    name: str = "mean"
+    output_type: Optional[str] = "float"
+    is_vector: bool = True
+    group_index: int = 0
     def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
         return pd.DataFrame(data).T.fillna(0).mean(axis=1)
 class Sum(PandasOperand, VectorizableMixin):
-    name = "sum"
-    is_vector = True
-    group_index = 0
+    name: str = "sum"
+    is_vector: bool = True
+    group_index: int = 0
     def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
         return pd.DataFrame(data).T.fillna(0).sum(axis=1)

upgini/features_enricher.py CHANGED Viewed

@@ -23,7 +23,6 @@ from pandas.api.types import (
     is_datetime64_any_dtype,
     is_numeric_dtype,
     is_object_dtype,
-    is_period_dtype,
     is_string_dtype,
 )
 from scipy.stats import ks_2samp
@@ -1408,7 +1407,9 @@ class FeaturesEnricher(TransformerMixin):
         # TODO maybe there is no more need for these convertions
         # Remove datetime features
         datetime_features = [
-            f for f in fitting_X.columns if is_datetime64_any_dtype(fitting_X[f]) or is_period_dtype(fitting_X[f])
+            f
+            for f in fitting_X.columns
+            if is_datetime64_any_dtype(fitting_X[f]) or isinstance(fitting_X[f].dtype, pd.PeriodDtype)
         ]
         if len(datetime_features) > 0:
             self.logger.warning(self.bundle.get("dataset_date_features").format(datetime_features))

upgini/http.py CHANGED Viewed

@@ -39,17 +39,6 @@ from upgini.metadata import (
 from upgini.resource_bundle import bundle
 from upgini.utils.track_info import get_track_metrics
-# try:
-#     from importlib.metadata import version  # type: ignore
-#     __version__ = version("upgini")
-# except ImportError:
-#     try:
-#         from importlib_metadata import version  # type: ignore
-#         __version__ = version("upgini")
-#     except ImportError:
-#         __version__ = "Upgini wasn't installed"
 UPGINI_URL: str = "UPGINI_URL"
 UPGINI_API_KEY: str = "UPGINI_API_KEY"
@@ -452,18 +441,18 @@ class _RestClient:
                 content = file.read()
                 md5_hash.update(content)
                 digest = md5_hash.hexdigest()
-                metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
+                metadata_with_md5 = metadata.model_copy(update={"checksumMD5": digest})
             digest_sha256 = hashlib.sha256(
                 pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
             ).hexdigest()
-            metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
+            metadata_with_md5 = metadata_with_md5.model_copy(update={"digest": digest_sha256})
             with open(file_path, "rb") as file:
                 files = {
                     "metadata": (
                         "metadata.json",
-                        metadata_with_md5.json(exclude_none=True).encode(),
+                        metadata_with_md5.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     ),
                     "tracking": (
@@ -471,13 +460,13 @@ class _RestClient:
                         dumps(track_metrics).encode(),
                         "application/json",
                     ),
-                    "metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
+                    "metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
                     "file": (metadata_with_md5.name, file, "application/octet-stream"),
                 }
                 if search_customization is not None:
                     files["customization"] = (
                         "customization.json",
-                        search_customization.json(exclude_none=True).encode(),
+                        search_customization.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     )
                 additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
@@ -492,7 +481,7 @@ class _RestClient:
     def check_uploaded_file_v2(self, trace_id: str, file_upload_id: str, metadata: FileMetadata) -> bool:
         api_path = self.CHECK_UPLOADED_FILE_URL_FMT_V2.format(file_upload_id)
         response = self._with_unauth_retry(
-            lambda: self._send_post_req(api_path, trace_id, metadata.json(exclude_none=True))
+            lambda: self._send_post_req(api_path, trace_id, metadata.model_dump_json(exclude_none=True))
         )
         return bool(response)
@@ -506,11 +495,11 @@ class _RestClient:
     ) -> SearchTaskResponse:
         api_path = self.INITIAL_SEARCH_WITHOUT_UPLOAD_URI_FMT_V2.format(file_upload_id)
         files = {
-            "metadata": ("metadata.json", metadata.json(exclude_none=True).encode(), "application/json"),
-            "metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
+            "metadata": ("metadata.json", metadata.model_dump_json(exclude_none=True).encode(), "application/json"),
+            "metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
         }
         if search_customization is not None:
-            files["customization"] = search_customization.json(exclude_none=True).encode()
+            files["customization"] = search_customization.model_dump_json(exclude_none=True).encode()
         additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
         response = self._with_unauth_retry(
             lambda: self._send_post_file_req_v2(
@@ -536,18 +525,18 @@ class _RestClient:
                 content = file.read()
                 md5_hash.update(content)
                 digest = md5_hash.hexdigest()
-                metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
+                metadata_with_md5 = metadata.model_copy(update={"checksumMD5": digest})
             digest_sha256 = hashlib.sha256(
                 pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
             ).hexdigest()
-            metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
+            metadata_with_md5 = metadata_with_md5.model_copy(update={"digest": digest_sha256})
             with open(file_path, "rb") as file:
                 files = {
                     "metadata": (
                         "metadata.json",
-                        metadata_with_md5.json(exclude_none=True).encode(),
+                        metadata_with_md5.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     ),
                     "tracking": (
@@ -555,13 +544,13 @@ class _RestClient:
                         dumps(get_track_metrics(self.client_ip, self.client_visitorid)).encode(),
                         "application/json",
                     ),
-                    "metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
+                    "metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
                     "file": (metadata_with_md5.name, file, "application/octet-stream"),
                 }
                 if search_customization is not None:
                     files["customization"] = (
                         "customization.json",
-                        search_customization.json(exclude_none=True).encode(),
+                        search_customization.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     )
@@ -585,11 +574,11 @@ class _RestClient:
     ) -> SearchTaskResponse:
         api_path = self.VALIDATION_SEARCH_WITHOUT_UPLOAD_URI_FMT_V2.format(file_upload_id, initial_search_task_id)
         files = {
-            "metadata": ("metadata.json", metadata.json(exclude_none=True).encode(), "application/json"),
-            "metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
+            "metadata": ("metadata.json", metadata.model_dump_json(exclude_none=True).encode(), "application/json"),
+            "metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
         }
         if search_customization is not None:
-            files["customization"] = search_customization.json(exclude_none=True).encode()
+            files["customization"] = search_customization.model_dump_json(exclude_none=True).encode()
         additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
         response = self._with_unauth_retry(
             lambda: self._send_post_file_req_v2(
@@ -651,7 +640,7 @@ class _RestClient:
             with open(file_path, "rb") as file:
                 files = {
                     "file": (metadata.name, file, "application/octet-stream"),
-                    "metadata": ("metadata.json", metadata.json(exclude_none=True).encode(), "application/json"),
+                    "metadata": ("metadata.json", metadata.model_dump_json(exclude_none=True).encode(), "application/json"),
                 }
                 return self._send_post_file_req_v2(api_path, files)
@@ -661,12 +650,12 @@ class _RestClient:
     def get_search_file_metadata(self, search_task_id: str, trace_id: str) -> FileMetadata:
         api_path = self.SEARCH_FILE_METADATA_URI_FMT_V2.format(search_task_id)
         response = self._with_unauth_retry(lambda: self._send_get_req(api_path, trace_id))
-        return FileMetadata.parse_obj(response)
+        return FileMetadata.model_validate(response)
     def get_provider_search_metadata_v3(self, provider_search_task_id: str, trace_id: str) -> ProviderTaskMetadataV2:
         api_path = self.SEARCH_TASK_METADATA_FMT_V3.format(provider_search_task_id)
         response = self._with_unauth_retry(lambda: self._send_get_req(api_path, trace_id))
-        return ProviderTaskMetadataV2.parse_obj(response)
+        return ProviderTaskMetadataV2.model_validate(response)
     def get_current_transform_usage(self, trace_id) -> TransformUsage:
         track_metrics = get_track_metrics(self.client_ip, self.client_visitorid)

upgini/lazy_import.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import importlib
+import importlib.util
+import importlib.machinery
 class LazyImport:
@@ -10,7 +12,18 @@ class LazyImport:
     def _load(self):
         if self._module is None:
-            self._module = importlib.import_module(self.module_name)
+            # Load module and save link to it
+            spec = importlib.util.find_spec(self.module_name)
+            if spec is None:
+                raise ImportError(f"Module {self.module_name} not found")
+            # Create module
+            self._module = importlib.util.module_from_spec(spec)
+            # Execute module
+            spec.loader.exec_module(self._module)
+            # Get class from module
             self._class = getattr(self._module, self.class_name)
     def __call__(self, *args, **kwargs):

upgini/metadata.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 from enum import Enum
-from typing import Dict, List, Optional, Set, Union
+from typing import Any, Dict, List, Optional, Set, Union
 from pydantic import BaseModel
@@ -172,23 +172,23 @@ class FileMetricsInterval(BaseModel):
     date_cut: float
     count: float
     valid_count: float
-    avg_target: Optional[float]  # not for multiclass
-    avg_score_etalon: Optional[float]
+    avg_target: Optional[float] = None  # not for multiclass
+    avg_score_etalon: Optional[float] = None
 class FileMetrics(BaseModel):
     # etalon metadata
-    task_type: Optional[ModelTaskType]
-    label: Optional[ModelLabelType]
-    count: Optional[int]
-    valid_count: Optional[int]
-    valid_rate: Optional[float]
-    avg_target: Optional[float]
-    metrics_binary_etalon: Optional[BinaryTask]
-    metrics_regression_etalon: Optional[RegressionTask]
-    metrics_multiclass_etalon: Optional[MulticlassTask]
-    cuts: Optional[List[float]]
-    interval: Optional[List[FileMetricsInterval]]
+    task_type: Optional[ModelTaskType] = None
+    label: Optional[ModelLabelType] = None
+    count: Optional[int] = None
+    valid_count: Optional[int] = None
+    valid_rate: Optional[float] = None
+    avg_target: Optional[float] = None
+    metrics_binary_etalon: Optional[BinaryTask] = None
+    metrics_regression_etalon: Optional[RegressionTask] = None
+    metrics_multiclass_etalon: Optional[MulticlassTask] = None
+    cuts: Optional[List[float]] = None
+    interval: Optional[List[FileMetricsInterval]] = None
 class NumericInterval(BaseModel):
@@ -202,25 +202,25 @@ class FileColumnMetadata(BaseModel):
     dataType: DataType
     meaningType: FileColumnMeaningType
     minMaxValues: Optional[NumericInterval] = None
-    originalName: Optional[str]
+    originalName: Optional[str] = None
     # is this column contains keys from multiple key columns like msisdn1, msisdn2
     isUnnest: bool = False
     # list of original etalon key column names like msisdn1, msisdn2
-    unnestKeyNames: Optional[List[str]]
+    unnestKeyNames: Optional[List[str]] = None
 class FileMetadata(BaseModel):
     name: str
-    description: Optional[str]
+    description: Optional[str] = None
     columns: List[FileColumnMetadata]
     searchKeys: List[List[str]]
-    excludeFeaturesSources: Optional[List[str]]
-    hierarchicalGroupKeys: Optional[List[str]]
-    hierarchicalSubgroupKeys: Optional[List[str]]
-    taskType: Optional[ModelTaskType]
-    rowsCount: Optional[int]
-    checksumMD5: Optional[str]
-    digest: Optional[str]
+    excludeFeaturesSources: Optional[List[str]] = None
+    hierarchicalGroupKeys: Optional[List[str]] = None
+    hierarchicalSubgroupKeys: Optional[List[str]] = None
+    taskType: Optional[ModelTaskType] = None
+    rowsCount: Optional[int] = None
+    checksumMD5: Optional[str] = None
+    digest: Optional[str] = None
     def column_by_name(self, name: str) -> Optional[FileColumnMetadata]:
         for c in self.columns:
@@ -244,17 +244,17 @@ class FeaturesMetadataV2(BaseModel):
     source: str
     hit_rate: float
     shap_value: float
-    commercial_schema: Optional[str]
-    data_provider: Optional[str]
-    data_providers: Optional[List[str]]
-    data_provider_link: Optional[str]
-    data_provider_links: Optional[List[str]]
-    data_source: Optional[str]
-    data_sources: Optional[List[str]]
-    data_source_link: Optional[str]
-    data_source_links: Optional[List[str]]
-    doc_link: Optional[str]
-    update_frequency: Optional[str]
+    commercial_schema: Optional[str] = None
+    data_provider: Optional[str] = None
+    data_providers: Optional[List[str]] = None
+    data_provider_link: Optional[str] = None
+    data_provider_links: Optional[List[str]] = None
+    data_source: Optional[str] = None
+    data_sources: Optional[List[str]] = None
+    data_source_link: Optional[str] = None
+    data_source_links: Optional[List[str]] = None
+    doc_link: Optional[str] = None
+    update_frequency: Optional[str] = None
 class HitRateMetrics(BaseModel):
@@ -274,48 +274,48 @@ class ModelEvalSet(BaseModel):
 class BaseColumnMetadata(BaseModel):
     original_name: str
     hashed_name: str
-    ads_definition_id: Optional[str]
+    ads_definition_id: Optional[str] = None
     is_augmented: bool
 class GeneratedFeatureMetadata(BaseModel):
-    alias: Optional[str]
+    alias: Optional[str] = None
     formula: str
     display_index: str
     base_columns: List[BaseColumnMetadata]
-    operator_params: Optional[Dict[str, str]]
+    operator_params: Optional[Dict[str, str]] = None
 class ProviderTaskMetadataV2(BaseModel):
     features: List[FeaturesMetadataV2]
-    hit_rate_metrics: Optional[HitRateMetrics]
-    eval_set_metrics: Optional[List[ModelEvalSet]]
-    zero_hit_rate_search_keys: Optional[List[str]]
-    features_used_for_embeddings: Optional[List[str]]
-    shuffle_kfold: Optional[bool]
-    generated_features: Optional[List[GeneratedFeatureMetadata]]
+    hit_rate_metrics: Optional[HitRateMetrics] = None
+    eval_set_metrics: Optional[List[ModelEvalSet]] = None
+    zero_hit_rate_search_keys: Optional[List[str]] = None
+    features_used_for_embeddings: Optional[List[str]] = None
+    shuffle_kfold: Optional[bool] = None
+    generated_features: Optional[List[GeneratedFeatureMetadata]] = None
 class FeaturesFilter(BaseModel):
-    minImportance: Optional[float]
-    maxPSI: Optional[float]
-    maxCount: Optional[int]
-    selectedFeatures: Optional[List[str]]
+    minImportance: Optional[float] = None
+    maxPSI: Optional[float] = None
+    maxCount: Optional[int] = None
+    selectedFeatures: Optional[List[str]] = None
 class RuntimeParameters(BaseModel):
-    properties: Dict[str, str] = {}
+    properties: Dict[str, Any] = {}
 class SearchCustomization(BaseModel):
-    featuresFilter: Optional[FeaturesFilter]
-    extractFeatures: Optional[bool]
-    accurateModel: Optional[bool]
-    importanceThreshold: Optional[float]
-    maxFeatures: Optional[int]
-    returnScores: Optional[bool]
-    runtimeParameters: Optional[RuntimeParameters]
-    metricsCalculation: Optional[bool]
+    featuresFilter: Optional[FeaturesFilter] = None
+    extractFeatures: Optional[bool] = None
+    accurateModel: Optional[bool] = None
+    importanceThreshold: Optional[float] = None
+    maxFeatures: Optional[int] = None
+    returnScores: Optional[bool] = None
+    runtimeParameters: Optional[RuntimeParameters] = None
+    metricsCalculation: Optional[bool] = None
     def __repr__(self):
         return (

upgini/normalizer/normalize_utils.py CHANGED Viewed

@@ -10,7 +10,6 @@ from pandas.api.types import (
     is_float_dtype,
     is_numeric_dtype,
     is_object_dtype,
-    is_period_dtype,
     is_string_dtype,
 )
@@ -135,7 +134,7 @@ class Normalizer:
         removed_features = []
         for f in features:
-            if is_datetime(df[f]) or is_period_dtype(df[f]):
+            if is_datetime(df[f]) or isinstance(df[f].dtype, pd.PeriodDtype):
                 removed_features.append(f)
                 df.drop(columns=f, inplace=True)

upgini/utils/datetime_utils.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Dict, List, Optional
 import numpy as np
 import pandas as pd
 from dateutil.relativedelta import relativedelta
-from pandas.api.types import is_numeric_dtype, is_period_dtype
+from pandas.api.types import is_numeric_dtype
 from upgini.errors import ValidationError
 from upgini.metadata import EVAL_SET_INDEX, SearchKey
@@ -84,7 +84,7 @@ class DateTimeSearchKeyConverter:
             df[self.date_column] = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
         elif isinstance(df[self.date_column].values[0], datetime.date):
             df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
-        elif is_period_dtype(df[self.date_column]):
+        elif isinstance(df[self.date_column].dtype, pd.PeriodDtype):
             df[self.date_column] = df[self.date_column].dt.to_timestamp()
         elif is_numeric_dtype(df[self.date_column]):
             # 315532801 - 2524608001    - seconds
@@ -207,7 +207,7 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
 def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
     df = df.copy()
     seconds = "datetime_seconds"
-    if is_period_dtype(df[date_col]):
+    if isinstance(df[date_col].dtype, pd.PeriodDtype):
         df[date_col] = df[date_col].dt.to_timestamp()
     else:
         df[date_col] = pd.to_datetime(df[date_col])
@@ -275,7 +275,7 @@ def validate_dates_distribution(
             if col in search_keys:
                 continue
             try:
-                if is_period_dtype(X[col]):
+                if isinstance(X[col].dtype, pd.PeriodDtype):
                     pass
                 elif pd.__version__ >= "2.0.0":
                     # Format mixed to avoid massive warnings
@@ -290,7 +290,7 @@ def validate_dates_distribution(
     if maybe_date_col is None:
         return
-    if is_period_dtype(X[maybe_date_col]):
+    if isinstance(X[maybe_date_col].dtype, pd.PeriodDtype):
         dates = X[maybe_date_col].dt.to_timestamp().dt.date
     elif pd.__version__ >= "2.0.0":
         dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date

upgini/utils/phone_utils.py CHANGED Viewed

@@ -1,12 +1,8 @@
 from typing import Optional
+import numpy as np
 import pandas as pd
-from pandas.api.types import (
-    is_float_dtype,
-    is_int64_dtype,
-    is_object_dtype,
-    is_string_dtype,
-)
+from pandas.api.types import is_float_dtype, is_object_dtype, is_string_dtype
 from upgini.errors import ValidationError
 from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
@@ -63,7 +59,9 @@ class PhoneSearchKeyConverter:
             convert_func = self.phone_str_to_int_safe
         elif is_float_dtype(df[self.phone_column]):
             convert_func = self.phone_float_to_int_safe
-        elif is_int64_dtype(df[self.phone_column]):
+        elif df[self.phone_column].dtype == np.int64 or isinstance(
+            df[self.phone_column].dtype, pd.Int64Dtype
+        ):
             convert_func = self.phone_int_to_int_safe
         else:
             raise ValidationError(

upgini/utils/target_utils.py CHANGED Viewed

@@ -194,4 +194,7 @@ def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
     test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
     # Calculate the PSI
-    return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
+    try:
+        return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
+    except Exception:
+        return np.nan

{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.1.316
+Version: 1.1.316a1
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/
@@ -29,9 +29,9 @@ Requires-Dist: ipywidgets>=8.1.0
 Requires-Dist: jarowinkler>=2.0.0
 Requires-Dist: levenshtein>=0.25.1
 Requires-Dist: lightgbm>=3.3.2
-Requires-Dist: numpy>=1.19.0
+Requires-Dist: numpy<=1.26.4,>=1.19.0
 Requires-Dist: pandas<3.0.0,>=1.1.0
-Requires-Dist: pydantic<2.0.0,>=1.8.2
+Requires-Dist: pydantic>=2.7.0
 Requires-Dist: pyjwt>=2.8.0
 Requires-Dist: python-bidi==0.4.2
 Requires-Dist: python-dateutil>=2.8.0

{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=DQCLPSfZIiyKQ88S6JJcAEA3dURvJk2NhtYNJeB5Mq8,24
+upgini/__about__.py,sha256=31UCeRaXiz7DRsmJ3BKvypU0ky5w4Itv5qqPPf4BU9I,26
 upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=yAWIygHejxdKXOA4g3QjtCu0VRa9at-4nPPuugCr77U,30857
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=Gu4gsnMVjcsfWnJlu4Np3jpE9Au1UywhuHQb0Xv5YNg,187982
-upgini/http.py,sha256=a4Epc9YLIJBuYk4t8E_2-QDLBtJFqKO35jn2SnYQZCg,42920
-upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
-upgini/metadata.py,sha256=YQ-1HZGyPOksP2iM50ff_pMHXLyzvpChqSfNh8Z0ke4,10833
+upgini/features_enricher.py,sha256=_d8ya5RRoYN0o6mV6gda-bLdOngQ4rb1SA51SlM_TG0,188002
+upgini/http.py,sha256=_A_DGMk8gkygdVFCDp8I6js_re4YX34PB9TpJV8aPqo,42784
+upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
+upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
 upgini/metrics.py,sha256=Tu5cN8RlhOSSMWUTXRSkdl8SWBqR1N_2eJpBum9pZxc,30926
 upgini/search_task.py,sha256=LtRJ9bCPjMo1gJ-sUDKERhDwGcWKImrzwVFHjkMSQHQ,17071
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
@@ -15,19 +15,19 @@ upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9Jvf
 upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
 upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/autofe/all_operands.py,sha256=3LiH9iU-ArGmYpS8FHWH7yCFx40ILfvlSXJlKIa75BQ,2542
-upgini/autofe/binary.py,sha256=2Z5FrfdCtesKEHBuabEBiRvwOAzcRoFKAX1wvGpHL0I,7003
-upgini/autofe/date.py,sha256=AO3P8GtUHD6vPE_1Vrj3nsnXYBxiXe7vun6aLHReZgQ,9064
-upgini/autofe/feature.py,sha256=gwGWY2UcX_0wHAvfEiu1rRU7GFZyzMWZIaPVcf6kD80,14223
-upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
-upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
-upgini/autofe/unary.py,sha256=oIMf-IVy7L7GkzxMmQyExX0tOH9RhWeQh7cGxxMDiPk,3832
-upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
+upgini/autofe/binary.py,sha256=xRBT7RNqQ7pprz6cRpO1KnvZCb7PvU3QXBfaP6Omqi4,7425
+upgini/autofe/date.py,sha256=eLPrO2Cgm74VB4rPtIaeUDuI5vmLiGnygHSvU4aGHWU,9223
+upgini/autofe/feature.py,sha256=CivPkE7YrAtDrgF8WhVPnDAnNDR8gbRQ-8_hXiQE6ew,14234
+upgini/autofe/groupby.py,sha256=r-xl_keZZgm_tpiEoDhjYSkT6NHv7a4cRQR4wJ4uCp8,3263
+upgini/autofe/operand.py,sha256=uk883RaNqgXqtkaRqA1re1d9OFnnpv0JVvelYx09Yw0,2943
+upgini/autofe/unary.py,sha256=RiK-Fz3fgjPlqWWfro6x7qChjEZ8W8RTnl5-MT1kaQA,4218
+upgini/autofe/vector.py,sha256=ehcZUDqV71TfbU8EmKfdYp603gS2dJY_-fpr10ho5sI,663
 upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/data_source/data_source_publisher.py,sha256=Vg0biG86YB0OEaoxbK9YYrr4yARm11_h3bTWIBgoScA,22115
 upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
 upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
 upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-upgini/normalizer/normalize_utils.py,sha256=8gH1oabPNZrC1kHSRFxGGcO0o6yNDlOJXCLzzExq-3s,7451
+upgini/normalizer/normalize_utils.py,sha256=bHRPWCNrUvt2R9qMX6dZFCJ0i8ENVCQ2Rw3dHH9IJEg,7447
 upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
 upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
 upgini/resource_bundle/strings.properties,sha256=WZAuYPX2Dpn6BHoA3RX8uvMNMr-yJE2fF7Gz0i24x2s,26459
@@ -42,7 +42,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
 upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
 upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
 upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
-upgini/utils/datetime_utils.py,sha256=niZcf2YqAwokUFUW474zajlzv9HAMf7nv9v_WPJHpyc,12123
+upgini/utils/datetime_utils.py,sha256=4tsGeehU0KS6wqNsc9gEEWZ9s6T9E0UReUIO3rSuXNU,12174
 upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
 upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
 upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
@@ -50,14 +50,14 @@ upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0-
 upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
 upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
 upgini/utils/ip_utils.py,sha256=ZZj_uQFTHhagzt-MRew__ZBOp2DdnkMrachS7PElkSE,5143
-upgini/utils/phone_utils.py,sha256=PTSRfGAWCuLy8R6I8X6clcc1K7bZXIIrZ_alIB8irC8,10368
+upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
 upgini/utils/postal_code_utils.py,sha256=C899tJS8qM_ps4I3g-Ve6qzIa22O_UqwNmGFoyy9sO8,1716
 upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
 upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
-upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
+upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
-upgini-1.1.316.dist-info/METADATA,sha256=12UKpdX0d9nky8XWhKtyQjDK2MVWtbsEr811NSWrKmE,48222
-upgini-1.1.316.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.1.316.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.1.316.dist-info/RECORD,,
+upgini-1.1.316a1.dist-info/METADATA,sha256=eJXt7Ga1qWst0_EIHCQYMTnAf0EkeO73tt5hbx4K_5g,48226
+upgini-1.1.316a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.1.316a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.1.316a1.dist-info/RECORD,,

{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.1.316.dist-info → upgini-1.1.316a1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.1.316__py3-none-any.whl → 1.1.316a1__py3-none-any.whl

Potentially problematic release.

upgini 1.1.316py3-none-any.whl → 1.1.316a1py3-none-any.whl