PyPI - upgini - Versions diffs - 1.2.32__py3-none-any.whl → 1.2.34a3657.dev1__py3-none-any.whl - Mend

upgini 1.2.32py3-none-any.whl → 1.2.34a3657.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (13) hide show

upgini/__about__.py +1 -1
upgini/autofe/all_operands.py +7 -84
upgini/autofe/date.py +39 -23
upgini/autofe/feature.py +7 -7
upgini/autofe/groupby.py +20 -10
upgini/autofe/operand.py +57 -2
upgini/autofe/vector.py +120 -2
upgini/features_enricher.py +47 -6
upgini/metadata.py +1 -0
{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/METADATA +2 -2
{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/RECORD +13 -13
{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/WHEEL +1 -1
{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.32"
1	+ __version__ = "1.2.34a3657.dev1"

upgini/autofe/all_operands.py CHANGED Viewed

@@ -1,87 +1,10 @@
-from copy import deepcopy
-from typing import Dict
-from upgini.autofe.binary import (
-    Add,
-    Combine,
-    CombineThenFreq,
-    Distance,
-    Divide,
-    JaroWinklerSim1,
-    JaroWinklerSim2,
-    LevenshteinSim,
-    Max,
-    Min,
-    Multiply,
-    Sim,
-    Subtract,
-)
-from upgini.autofe.date import (
-    DateDiff,
-    DateDiffType2,
-    DateListDiff,
-    DateListDiffBounded,
-    DatePercentile,
-    DatePercentileMethod2,
-)
-from upgini.autofe.groupby import GroupByThenAgg, GroupByThenFreq, GroupByThenNUnique, GroupByThenRank
-from upgini.autofe.operand import Operand
-from upgini.autofe.unary import Abs, Embeddings, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
-from upgini.autofe.vector import Mean, Sum
-ALL_OPERANDS: Dict[str, Operand] = {
-    op.name: op
-    for op in [
-        Freq(),
-        Mean(),
-        Sum(),
-        Abs(),
-        Log(),
-        Sqrt(),
-        Square(),
-        Sigmoid(),
-        Floor(),
-        Residual(),
-        Min(),
-        Max(),
-        Add(),
-        Subtract(),
-        Multiply(),
-        Divide(),
-        GroupByThenAgg(name="GroupByThenMin", agg="min"),
-        GroupByThenAgg(name="GroupByThenMax", agg="max"),
-        GroupByThenAgg(name="GroupByThenMean", agg="mean"),
-        GroupByThenAgg(name="GroupByThenMedian", agg="median"),
-        GroupByThenAgg(name="GroupByThenStd", output_type="float", agg="std"),
-        GroupByThenRank(),
-        Combine(),
-        CombineThenFreq(),
-        GroupByThenNUnique(),
-        GroupByThenFreq(),
-        Sim(),
-        DateDiff(),
-        DateDiffType2(),
-        DateListDiff(aggregation="min"),
-        DateListDiff(aggregation="max"),
-        DateListDiff(aggregation="mean"),
-        DateListDiff(aggregation="nunique"),
-        DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=0, upper_bound=18),
-        DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=18, upper_bound=23),
-        DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=23, upper_bound=30),
-        DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
-        DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
-        DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
-        DatePercentile(),
-        DatePercentileMethod2(),
-        Norm(),
-        JaroWinklerSim1(),
-        JaroWinklerSim2(),
-        LevenshteinSim(),
-        Distance(),
-        Embeddings(),
-    ]
-}
+from upgini.autofe.operand import OperandRegistry
+from upgini.autofe.unary import *  # noqa
+from upgini.autofe.binary import *  # noqa
+from upgini.autofe.groupby import *  # noqa
+from upgini.autofe.date import *  # noqa
+from upgini.autofe.vector import *  # noqa
 def find_op(name):
-    return deepcopy(ALL_OPERANDS.get(name))
+    return OperandRegistry.get_operand(name)

upgini/autofe/date.py CHANGED Viewed

@@ -7,11 +7,11 @@ import pandas as pd
 from pandas.core.arrays.timedeltas import TimedeltaArray
 from pydantic import BaseModel, __version__ as pydantic_version
-from upgini.autofe.operand import PandasOperand
+from upgini.autofe.operand import PandasOperand, ParametrizedOperand
 def get_pydantic_version():
-    major_version = int(pydantic_version.split('.')[0])
+    major_version = int(pydantic_version.split(".")[0])
     return major_version
@@ -109,7 +109,7 @@ _ext_aggregations = {"nunique": (lambda x: len(np.unique(x)), 0), "count": (len,
 _count_aggregations = ["nunique", "count"]
-class DateListDiff(PandasOperand, DateDiffMixin):
+class DateListDiff(PandasOperand, DateDiffMixin, ParametrizedOperand):
     is_binary: bool = True
     has_symmetry_importance: bool = True
@@ -129,10 +129,17 @@ class DateListDiff(PandasOperand, DateDiffMixin):
         )
         return res
-    def __init__(self, **data: Any) -> None:
-        if "name" not in data:
-            data["name"] = f"date_diff_{data.get('aggregation')}"
-        super().__init__(**data)
+    def to_formula(self) -> str:
+        return f"date_diff_{self.aggregation}"
+    @classmethod
+    def from_formula(cls, formula: str) -> Optional["DateListDiff"]:
+        if not formula.startswith("date_diff_"):
+            return None
+        aggregation = formula.replace("date_diff_", "")
+        if "_" in aggregation:
+            return None
+        return cls(aggregation=aggregation)
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
         left = self._convert_to_date(left, self.left_unit)
@@ -170,23 +177,31 @@ class DateListDiff(PandasOperand, DateDiffMixin):
         return method(x) if len(x) > 0 else default
-class DateListDiffBounded(DateListDiff):
+class DateListDiffBounded(DateListDiff, ParametrizedOperand):
     lower_bound: Optional[int] = None
     upper_bound: Optional[int] = None
-    def __init__(self, **data: Any) -> None:
-        if "name" not in data:
-            lower_bound = data.get("lower_bound")
-            upper_bound = data.get("upper_bound")
-            components = [
-                "date_diff",
-                data.get("diff_unit"),
-                str(lower_bound if lower_bound is not None else "minusinf"),
-                str(upper_bound if upper_bound is not None else "plusinf"),
-            ]
-            components.append(data.get("aggregation"))
-            data["name"] = "_".join(components)
-        super().__init__(**data)
+    def to_formula(self) -> str:
+        lower_bound = "minusinf" if self.lower_bound is None else self.lower_bound
+        upper_bound = "plusinf" if self.upper_bound is None else self.upper_bound
+        return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}"
+    @classmethod
+    def from_formula(cls, formula: str) -> Optional["DateListDiffBounded"]:
+        import re
+        pattern = r"^date_diff_([^_]+)_((minusinf|\d+))_((plusinf|\d+))_(\w+)$"
+        match = re.match(pattern, formula)
+        if not match:
+            return None
+        diff_unit = match.group(1)
+        lower_bound = None if match.group(2) == "minusinf" else int(match.group(2))
+        upper_bound = None if match.group(4) == "plusinf" else int(match.group(4))
+        aggregation = match.group(6)
+        return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
     def _agg(self, x):
         x = x[
@@ -257,16 +272,17 @@ class DatePercentile(DatePercentileBase):
         # Use @field_validator for Pydantic 2.x
         from pydantic import field_validator
-        @field_validator('zero_bounds', mode='before')
+        @field_validator("zero_bounds", mode="before")
         def parse_zero_bounds(cls, value):
             if isinstance(value, str):
                 return json.loads(value)
             return value
     else:
         # Use @validator for Pydantic 1.x
         from pydantic import validator
-        @validator('zero_bounds', pre=True)
+        @validator("zero_bounds", pre=True)
         def parse_zero_bounds(cls, value):
             if isinstance(value, str):
                 return json.loads(value)

upgini/autofe/feature.py CHANGED Viewed

@@ -121,7 +121,7 @@ class Feature:
     def get_hash(self) -> str:
         return hashlib.sha256(
-            "_".join([self.op.name] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
+            "_".join([self.op.to_formula()] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
         ).hexdigest()[:8]
     def set_alias(self, alias: str) -> "Feature":
@@ -129,7 +129,7 @@ class Feature:
         return self
     def get_all_operand_names(self) -> Set[str]:
-        return {self.op.name}.union(
+        return {self.op.to_formula()}.union(
             {n for f in self.children if isinstance(f, Feature) for n in f.get_all_operand_names()}
         )
@@ -160,7 +160,7 @@ class Feature:
             child.delete_data()
     def get_op_display_name(self) -> str:
-        return self.op.alias or self.op.name.lower()
+        return (self.op.alias or self.op.to_formula()).lower()
     def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
         if self.cached_display_name is not None and cache:
@@ -239,9 +239,9 @@ class Feature:
         if self.op.name in ["+", "-", "*", "/"]:
             left = self.children[0].to_formula(**kwargs)
             right = self.children[1].to_formula(**kwargs)
-            return f"({left}{self.op.name}{right})"
+            return f"({left}{self.op.to_formula()}{right})"
         else:
-            result = [self.op.name, "("]
+            result = [self.op.to_formula(), "("]
             for i in range(len(self.children)):
                 string_i = self.children[i].to_formula(**kwargs)
                 result.append(string_i)
@@ -254,9 +254,9 @@ class Feature:
         if self.op.name in ["+", "-", "*", "/"]:
             left = self.children[0].to_pretty_formula()
             right = self.children[1].to_pretty_formula()
-            return f"{left} {self.op.name} {right}"
+            return f"{left} {self.op.to_formula()} {right}"
         else:
-            result = [self.op.name, "("]
+            result = [self.op.to_formula(), "("]
             for i in range(len(self.children)):
                 string_i = self.children[i].to_pretty_formula()
                 result.append(string_i)

upgini/autofe/groupby.py CHANGED Viewed

@@ -2,33 +2,43 @@ from typing import Optional
 import pandas as pd
-from upgini.autofe.operand import PandasOperand, VectorizableMixin
+from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
-class GroupByThenAgg(PandasOperand, VectorizableMixin):
+class GroupByThenAgg(
+    PandasOperand,
+    VectorizableMixin,
+    ParametrizedOperand,
+):
     agg: Optional[str]
     is_vectorizable: bool = True
     is_grouping: bool = True
     is_distribution_dependent: bool = True
+    def to_formula(self) -> str:
+        return f"GroupByThen{self.agg}"
+    @classmethod
+    def from_formula(cls, formula: str) -> Optional["GroupByThenAgg"]:
+        if not formula.startswith("GroupByThen"):
+            return None
+        agg = formula[len("GroupByThen") :]
+        if agg.lower() in ["rank", "nunique", "freq"]:  # other implementation
+            return None
+        return cls(agg=agg)
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
-        temp = left.groupby(right).agg(self.agg)
+        temp = left.groupby(right).agg(self.agg.lower())
         return self._loc(right, temp)
     def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
         group_column, value_columns = self.validate_calculation(data.columns, **kwargs)
         d1 = data[value_columns]
         d2 = data[group_column]
-        temp = d1.groupby(d2).agg(self.agg)
+        temp = d1.groupby(d2).agg(self.agg.lower())
         return temp.merge(d2, how="right", on=[group_column])[value_columns]
-class GroupByThenMedian(GroupByThenAgg):
-    name: str = "GroupByThenMedian"
-    pandas_agg: str = "median"
-    is_distribution_dependent: bool = True
 class GroupByThenRank(PandasOperand, VectorizableMixin):
     name: str = "GroupByThenRank"
     is_vectorizable: bool = True

upgini/autofe/operand.py CHANGED Viewed

@@ -6,8 +6,48 @@ import pandas as pd
 from pydantic import BaseModel
-class Operand(BaseModel):
-    name: str
+class OperandRegistry(type(BaseModel)):
+    _registry = {}
+    _parametrized_registry = []
+    def __new__(cls, name, bases, attrs):
+        new_class = super().__new__(cls, name, bases, attrs)
+        # Only register if it's a concrete class that inherits from Operand
+        base_classes = [b for b in bases]
+        base_names = {b.__name__ for b in bases}
+        while base_classes:
+            base = base_classes.pop()
+            base_names.update(b.__name__ for b in base.__bases__)
+            base_classes.extend(base.__bases__)
+        if "Operand" in base_names:
+            # Track parametrized operands separately
+            if "ParametrizedOperand" in base_names:
+                cls._parametrized_registry.append(new_class)
+            else:
+                try:
+                    instance = new_class()
+                    cls._registry[instance.name] = new_class
+                except Exception:
+                    pass
+        return new_class
+    @classmethod
+    def get_operand(cls, name: str) -> Optional["Operand"]:
+        # First try to resolve as a parametrized operand formula
+        for operand_cls in cls._parametrized_registry:
+            resolved = operand_cls.from_formula(name)
+            if resolved is not None:
+                return resolved
+        # Fall back to direct registry lookup
+        non_parametrized = cls._registry.get(name)
+        if non_parametrized is not None:
+            return non_parametrized()
+        return None
+class Operand(BaseModel, metaclass=OperandRegistry):
+    name: Optional[str] = None
     alias: Optional[str] = None
     is_unary: bool = False
     is_symmetrical: bool = False
@@ -31,6 +71,21 @@ class Operand(BaseModel):
         res.update(self.params or {})
         return res
+    def to_formula(self) -> str:
+        return self.name
+class ParametrizedOperand(Operand, abc.ABC):
+    @abc.abstractmethod
+    def to_formula(self) -> str:
+        pass
+    @classmethod
+    @abc.abstractmethod
+    def from_formula(cls, formula: str) -> Optional["Operand"]:
+        pass
 MAIN_COLUMN = "main_column"

upgini/autofe/vector.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from typing import List, Optional
+import abc
+from typing import Dict, List, Optional
 import pandas as pd
+from pydantic import validator
-from upgini.autofe.operand import PandasOperand, VectorizableMixin
+from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
 class Mean(PandasOperand, VectorizableMixin):
@@ -22,3 +24,119 @@ class Sum(PandasOperand, VectorizableMixin):
     def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
         return pd.DataFrame(data).T.fillna(0).sum(axis=1)
+class TimeSeriesBase(PandasOperand, abc.ABC):
+    is_vector: bool = True
+    date_unit: Optional[str] = None
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        res.update(
+            {
+                "date_unit": self.date_unit,
+            }
+        )
+        return res
+    def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
+        # assuming first is date, last is value, rest is group columns
+        date = pd.to_datetime(data[0], unit=self.date_unit, errors="coerce")
+        ts = pd.concat([date] + data[1:], axis=1)
+        ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
+        ts.set_index(date.name, inplace=True)
+        ts = ts[ts.index.notna()].sort_index()
+        ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
+        ts = self._aggregate(ts)
+        ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
+        return ts.iloc[:, -1]
+    @abc.abstractmethod
+    def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
+        pass
+_roll_aggregations = {"norm_mean": lambda x: x[-1] / x.mean()}
+class Roll(TimeSeriesBase, ParametrizedOperand):
+    aggregation: str
+    window_size: int = 1
+    window_unit: str = "D"
+    @validator("window_unit")
+    def validate_window_unit(cls, v: str) -> str:
+        try:
+            pd.tseries.frequencies.to_offset(v)
+            return v
+        except ValueError:
+            raise ValueError(
+                f"Invalid window_unit: {v}. Must be a valid pandas frequency string (e.g. 'D', 'H', 'T', etc)"
+            )
+    def to_formula(self) -> str:
+        return f"roll_{self.window_size}{self.window_unit}_{self.aggregation}"
+    @classmethod
+    def from_formula(cls, formula: str) -> Optional["Roll"]:
+        import re
+        pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
+        match = re.match(pattern, formula)
+        if not match:
+            return None
+        window_size = int(match.group(1))
+        window_unit = match.group(2)
+        aggregation = match.group(3)
+        return cls(window_size=window_size, window_unit=window_unit, aggregation=aggregation)
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        res.update(
+            {
+                "window_size": self.window_size,
+                "window_unit": self.window_unit,
+                "aggregation": self.aggregation,
+            }
+        )
+        return res
+    def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
+        return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=self.window_size).agg(
+            _roll_aggregations.get(self.aggregation, self.aggregation)
+        )
+class Lag(TimeSeriesBase, ParametrizedOperand):
+    lag_size: int
+    lag_unit: str = "D"
+    def to_formula(self) -> str:
+        return f"lag_{self.lag_size}{self.lag_unit}"
+    @classmethod
+    def from_formula(cls, formula: str) -> Optional["Lag"]:
+        import re
+        pattern = r"^lag_(\d+)([a-zA-Z])$"
+        match = re.match(pattern, formula)
+        if not match:
+            return None
+        lag_size = int(match.group(1))
+        lag_unit = match.group(2)
+        return cls(lag_size=lag_size, lag_unit=lag_unit)
+    def get_params(self) -> Dict[str, Optional[str]]:
+        res = super().get_params()
+        return res
+    def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
+        lag_window = self.lag_size + 1
+        return ts.rolling(f"{lag_window}{self.lag_unit}", min_periods=lag_window).agg(lambda x: x[0])

upgini/features_enricher.py CHANGED Viewed

@@ -111,7 +111,11 @@ try:
 except Exception:
     from upgini.utils.fallback_progress_bar import CustomFallbackProgressBar as ProgressBar
-from upgini.utils.target_utils import calculate_psi, define_task
+from upgini.utils.target_utils import (
+    balance_undersample_forced,
+    calculate_psi,
+    define_task,
+)
 from upgini.utils.warning_counter import WarningCounter
 from upgini.version_validator import validate_version
@@ -967,6 +971,13 @@ class FeaturesEnricher(TransformerMixin):
                     self.__log_warning(self.bundle.get("metrics_no_important_free_features"))
                     return None
+                maybe_phone_column = self._get_phone_column(self.search_keys)
+                text_features = (
+                    [f for f in self.generate_features if f != maybe_phone_column]
+                    if self.generate_features is not None
+                    else None
+                )
                 print(self.bundle.get("metrics_start"))
                 with Spinner():
                     self._check_train_and_eval_target_distribution(y_sorted, fitting_eval_set_dict)
@@ -982,7 +993,7 @@ class FeaturesEnricher(TransformerMixin):
                         fitting_enriched_X,
                         scoring,
                         groups=groups,
-                        text_features=self.generate_features,
+                        text_features=text_features,
                         has_date=has_date,
                     )
                     metric = wrapper.metric_name
@@ -1009,7 +1020,7 @@ class FeaturesEnricher(TransformerMixin):
                             cat_features,
                             add_params=custom_loss_add_params,
                             groups=groups,
-                            text_features=self.generate_features,
+                            text_features=text_features,
                             has_date=has_date,
                         )
                         etalon_cv_result = baseline_estimator.cross_val_predict(
@@ -1044,7 +1055,7 @@ class FeaturesEnricher(TransformerMixin):
                             cat_features,
                             add_params=custom_loss_add_params,
                             groups=groups,
-                            text_features=self.generate_features,
+                            text_features=text_features,
                             has_date=has_date,
                         )
                         enriched_cv_result = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
@@ -1827,7 +1838,27 @@ class FeaturesEnricher(TransformerMixin):
             # downsample if need to eval_set threshold
             num_samples = _num_samples(df)
-            if num_samples > Dataset.FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD:
+            phone_column = self._get_phone_column(self.search_keys)
+            force_downsampling = (
+                not self.disable_force_downsampling
+                and self.generate_features is not None
+                and phone_column is not None
+                and self.fit_columns_renaming[phone_column] in self.generate_features
+                and num_samples > Dataset.FORCE_SAMPLE_SIZE
+            )
+            if force_downsampling:
+                self.logger.info(f"Force downsampling from {num_samples} to {Dataset.FORCE_SAMPLE_SIZE}")
+                df = balance_undersample_forced(
+                    df=df,
+                    target_column=TARGET,
+                    task_type=self.model_task_type,
+                    random_state=self.random_state,
+                    sample_size=Dataset.FORCE_SAMPLE_SIZE,
+                    logger=self.logger,
+                    bundle=self.bundle,
+                    warning_callback=self.__log_warning,
+                )
+            elif num_samples > Dataset.FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD:
                 self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS}")
                 df = df.sample(n=Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS, random_state=self.random_state)
@@ -2063,6 +2094,15 @@ class FeaturesEnricher(TransformerMixin):
                 self.__display_support_link(msg)
                 return None, {c: c for c in X.columns}, []
+            features_meta = self._search_task.get_all_features_metadata_v2()
+            online_api_features = [fm.name for fm in features_meta if fm.from_online_api]
+            if len(online_api_features) > 0:
+                self.logger.warning(
+                    f"There are important features for transform, that generated by online API: {online_api_features}"
+                )
+                # TODO
+                raise Exception("There are features selected that are paid. Contact support (sales@upgini.com)")
             if not metrics_calculation:
                 transform_usage = self.rest_client.get_current_transform_usage(trace_id)
                 self.logger.info(f"Current transform usage: {transform_usage}. Transforming {len(X)} rows")
@@ -2708,8 +2748,9 @@ class FeaturesEnricher(TransformerMixin):
             and self.generate_features is not None
             and phone_column is not None
             and self.fit_columns_renaming[phone_column] in self.generate_features
+            and len(df) > Dataset.FORCE_SAMPLE_SIZE
         )
-        if force_downsampling and len(df) > Dataset.FORCE_SAMPLE_SIZE:
+        if force_downsampling:
             runtime_parameters.properties["fast_fit"] = True
         dataset = Dataset(

upgini/metadata.py CHANGED Viewed

@@ -255,6 +255,7 @@ class FeaturesMetadataV2(BaseModel):
     data_source_links: Optional[List[str]] = None
     doc_link: Optional[str] = None
     update_frequency: Optional[str] = None
+    from_online_api: Optional[bool] = None
 class HitRateMetrics(BaseModel):

{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.32
+Version: 1.2.34a3657.dev1
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/
@@ -110,7 +110,7 @@ Description-Content-Type: text/markdown
   </tr>
 </table>
-⭐️ [Simple Drag & Drop Search UI](https://upgini.com/upgini-widget):
+⭐️ [Simple Drag & Drop Search UI](https://www.upgini.com/data-search-widget):
 <a href="https://upgini.com/upgini-widget">
 <img width="710" alt="Drag & Drop Search UI" src="https://github.com/upgini/upgini/assets/95645411/36b6460c-51f3-400e-9f04-445b938bf45e">
 </a>

{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=UIgtmuCowO7KL6tTNZJcBfDcGQ4kWH7MjulZqCj9os8,23
+upgini/__about__.py,sha256=JBP_tvOiBuuOyLx7mNqZYU1UEW5bf82plZzE0AvVsfI,33
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=KnkqV7Nnx3kxfQ89giDao3bmCm4MFJWqJUrONy85E-k,32030
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=ZUbq6ZsIYznFSyU7tTmCPNzEyr-yRI0r-FJcz4i_Ads,192899
+upgini/features_enricher.py,sha256=q11aMFPlCJy1m4sOFfGZFfb4vdG3-hdd0wgm2BXgs9A,194748
 upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
 upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
-upgini/metadata.py,sha256=lUa2xYhBhnCeTqNt6lWc9iP_YuikYGIsDSn8Vwyjv1I,11235
+upgini/metadata.py,sha256=ACzIQQwCHCFHlUqXqKpxd3IQ4bBAaVvy8UaCGTqLGQs,11278
 upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
 upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
@@ -14,14 +14,14 @@ upgini/version_validator.py,sha256=h1GViOWzULy5vf6M4dpTJuIk-4V38UCrTY1sb9yLa5I,1
 upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
 upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
 upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-upgini/autofe/all_operands.py,sha256=cCCB44qvkmuWyiRM5Xykx8tkHPIjQthrWyj67STWN80,2578
+upgini/autofe/all_operands.py,sha256=v0_NozalvvzeojSAA0d7UJ5INS654ZVaLn4S8djK6Ac,329
 upgini/autofe/binary.py,sha256=zMhtHVuGUAFLUqem-XiXqJj-GRXxS88tdz8tFuDfSNM,7659
-upgini/autofe/date.py,sha256=OpFc3Al0xO3qlESn2Uokfxw51ArVqmh3xngWwdrsaqE,9762
-upgini/autofe/feature.py,sha256=eL7wABUhDKZzv3E-RPJNcyGwSfB0UptcfU2RbvsOks4,15082
-upgini/autofe/groupby.py,sha256=r-xl_keZZgm_tpiEoDhjYSkT6NHv7a4cRQR4wJ4uCp8,3263
-upgini/autofe/operand.py,sha256=uk883RaNqgXqtkaRqA1re1d9OFnnpv0JVvelYx09Yw0,2943
+upgini/autofe/date.py,sha256=Sd1Bm_uby9liSgsUkxsFgnCFaHxmj9MLX0ymR9DLQuQ,10401
+upgini/autofe/feature.py,sha256=l8A8E3BH2BmYvqEC81zbcIEfH6KEEhcesJ2BH4fn0-4,15140
+upgini/autofe/groupby.py,sha256=G48_sQZw016eGx3cOy8YQrEIOp95puWqYUpFWd-gdeM,3595
+upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
 upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
-upgini/autofe/vector.py,sha256=ehcZUDqV71TfbU8EmKfdYp603gS2dJY_-fpr10ho5sI,663
+upgini/autofe/vector.py,sha256=MyNPuqZ5J2vqRSn2UQcKp0ekXWv-d6lImEwqfU3pbCM,4328
 upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
 upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
 upgini/utils/target_utils.py,sha256=Ed5IXkPjV9AfAZQAwCYksAmKaPGQliplvDYS_yeWdfk,11330
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.32.dist-info/METADATA,sha256=GTTmeHuetD3Mrl8pR9K3YFzJcPE8Zl8UdWb23vG-R_s,48578
-upgini-1.2.32.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.32.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.32.dist-info/RECORD,,
+upgini-1.2.34a3657.dev1.dist-info/METADATA,sha256=marFhP2NoGmDk3lYZemMPRXcBRCB6jr_3tgx-I7fhIE,48597
+upgini-1.2.34a3657.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
+upgini-1.2.34a3657.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.34a3657.dev1.dist-info/RECORD,,

{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.25.0
+Generator: hatchling 1.24.2
 Root-Is-Purelib: true
 Tag: py3-none-any

{upgini-1.2.32.dist-info → upgini-1.2.34a3657.dev1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.32__py3-none-any.whl → 1.2.34a3657.dev1__py3-none-any.whl

Potentially problematic release.

upgini 1.2.32py3-none-any.whl → 1.2.34a3657.dev1py3-none-any.whl