upgini 1.2.19a1__py3-none-any.whl → 1.2.20a3657.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/__init__.py +1 -0
- upgini/autofe/all_operands.py +2 -84
- upgini/autofe/date.py +33 -6
- upgini/autofe/operand.py +47 -1
- upgini/autofe/vector.py +133 -2
- upgini/features_enricher.py +9 -10
- {upgini-1.2.19a1.dist-info → upgini-1.2.20a3657.dev1.dist-info}/METADATA +1 -1
- {upgini-1.2.19a1.dist-info → upgini-1.2.20a3657.dev1.dist-info}/RECORD +11 -11
- {upgini-1.2.19a1.dist-info → upgini-1.2.20a3657.dev1.dist-info}/WHEEL +1 -1
- {upgini-1.2.19a1.dist-info → upgini-1.2.20a3657.dev1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.20a3657.dev1"
|
upgini/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
|
2
2
|
|
|
3
3
|
from upgini.features_enricher import FeaturesEnricher # noqa: F401
|
|
4
4
|
from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType # noqa: F401
|
|
5
|
+
|
|
5
6
|
# from .lazy_import import LazyImport
|
|
6
7
|
|
|
7
8
|
os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
|
upgini/autofe/all_operands.py
CHANGED
|
@@ -1,87 +1,5 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Dict
|
|
3
|
-
|
|
4
|
-
from upgini.autofe.binary import (
|
|
5
|
-
Add,
|
|
6
|
-
Combine,
|
|
7
|
-
CombineThenFreq,
|
|
8
|
-
Distance,
|
|
9
|
-
Divide,
|
|
10
|
-
JaroWinklerSim1,
|
|
11
|
-
JaroWinklerSim2,
|
|
12
|
-
LevenshteinSim,
|
|
13
|
-
Max,
|
|
14
|
-
Min,
|
|
15
|
-
Multiply,
|
|
16
|
-
Sim,
|
|
17
|
-
Subtract,
|
|
18
|
-
)
|
|
19
|
-
from upgini.autofe.date import (
|
|
20
|
-
DateDiff,
|
|
21
|
-
DateDiffType2,
|
|
22
|
-
DateListDiff,
|
|
23
|
-
DateListDiffBounded,
|
|
24
|
-
DatePercentile,
|
|
25
|
-
DatePercentileMethod2,
|
|
26
|
-
)
|
|
27
|
-
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenFreq, GroupByThenNUnique, GroupByThenRank
|
|
28
|
-
from upgini.autofe.operand import Operand
|
|
29
|
-
from upgini.autofe.unary import Abs, Embeddings, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
|
|
30
|
-
from upgini.autofe.vector import Mean, Sum
|
|
31
|
-
|
|
32
|
-
ALL_OPERANDS: Dict[str, Operand] = {
|
|
33
|
-
op.name: op
|
|
34
|
-
for op in [
|
|
35
|
-
Freq(),
|
|
36
|
-
Mean(),
|
|
37
|
-
Sum(),
|
|
38
|
-
Abs(),
|
|
39
|
-
Log(),
|
|
40
|
-
Sqrt(),
|
|
41
|
-
Square(),
|
|
42
|
-
Sigmoid(),
|
|
43
|
-
Floor(),
|
|
44
|
-
Residual(),
|
|
45
|
-
Min(),
|
|
46
|
-
Max(),
|
|
47
|
-
Add(),
|
|
48
|
-
Subtract(),
|
|
49
|
-
Multiply(),
|
|
50
|
-
Divide(),
|
|
51
|
-
GroupByThenAgg(name="GroupByThenMin", agg="min"),
|
|
52
|
-
GroupByThenAgg(name="GroupByThenMax", agg="max"),
|
|
53
|
-
GroupByThenAgg(name="GroupByThenMean", agg="mean"),
|
|
54
|
-
GroupByThenAgg(name="GroupByThenMedian", agg="median"),
|
|
55
|
-
GroupByThenAgg(name="GroupByThenStd", output_type="float", agg="std"),
|
|
56
|
-
GroupByThenRank(),
|
|
57
|
-
Combine(),
|
|
58
|
-
CombineThenFreq(),
|
|
59
|
-
GroupByThenNUnique(),
|
|
60
|
-
GroupByThenFreq(),
|
|
61
|
-
Sim(),
|
|
62
|
-
DateDiff(),
|
|
63
|
-
DateDiffType2(),
|
|
64
|
-
DateListDiff(aggregation="min"),
|
|
65
|
-
DateListDiff(aggregation="max"),
|
|
66
|
-
DateListDiff(aggregation="mean"),
|
|
67
|
-
DateListDiff(aggregation="nunique"),
|
|
68
|
-
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=0, upper_bound=18),
|
|
69
|
-
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=18, upper_bound=23),
|
|
70
|
-
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=23, upper_bound=30),
|
|
71
|
-
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
|
|
72
|
-
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
73
|
-
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
74
|
-
DatePercentile(),
|
|
75
|
-
DatePercentileMethod2(),
|
|
76
|
-
Norm(),
|
|
77
|
-
JaroWinklerSim1(),
|
|
78
|
-
JaroWinklerSim2(),
|
|
79
|
-
LevenshteinSim(),
|
|
80
|
-
Distance(),
|
|
81
|
-
Embeddings(),
|
|
82
|
-
]
|
|
83
|
-
}
|
|
1
|
+
from upgini.autofe.operand import OperandRegistry
|
|
84
2
|
|
|
85
3
|
|
|
86
4
|
def find_op(name):
|
|
87
|
-
return
|
|
5
|
+
return OperandRegistry.get_operand(name)
|
upgini/autofe/date.py
CHANGED
|
@@ -7,11 +7,11 @@ import pandas as pd
|
|
|
7
7
|
from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
8
8
|
from pydantic import BaseModel, __version__ as pydantic_version
|
|
9
9
|
|
|
10
|
-
from upgini.autofe.operand import PandasOperand
|
|
10
|
+
from upgini.autofe.operand import PandasOperand, ParametrizedOperand
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_pydantic_version():
|
|
14
|
-
major_version = int(pydantic_version.split(
|
|
14
|
+
major_version = int(pydantic_version.split(".")[0])
|
|
15
15
|
return major_version
|
|
16
16
|
|
|
17
17
|
|
|
@@ -109,7 +109,7 @@ _ext_aggregations = {"nunique": (lambda x: len(np.unique(x)), 0), "count": (len,
|
|
|
109
109
|
_count_aggregations = ["nunique", "count"]
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
class DateListDiff(PandasOperand, DateDiffMixin):
|
|
112
|
+
class DateListDiff(PandasOperand, DateDiffMixin, ParametrizedOperand):
|
|
113
113
|
is_binary: bool = True
|
|
114
114
|
has_symmetry_importance: bool = True
|
|
115
115
|
|
|
@@ -134,6 +134,15 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
134
134
|
data["name"] = f"date_diff_{data.get('aggregation')}"
|
|
135
135
|
super().__init__(**data)
|
|
136
136
|
|
|
137
|
+
@classmethod
|
|
138
|
+
def from_formula(cls, formula: str) -> Optional["DateListDiff"]:
|
|
139
|
+
if not formula.startswith("date_diff_"):
|
|
140
|
+
return None
|
|
141
|
+
aggregation = formula.replace("date_diff_", "")
|
|
142
|
+
if "_" in aggregation:
|
|
143
|
+
return None
|
|
144
|
+
return cls(aggregation=aggregation)
|
|
145
|
+
|
|
137
146
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
138
147
|
left = self._convert_to_date(left, self.left_unit)
|
|
139
148
|
right_mask = right.apply(lambda x: len(x) > 0)
|
|
@@ -170,7 +179,7 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
170
179
|
return method(x) if len(x) > 0 else default
|
|
171
180
|
|
|
172
181
|
|
|
173
|
-
class DateListDiffBounded(DateListDiff):
|
|
182
|
+
class DateListDiffBounded(DateListDiff, ParametrizedOperand):
|
|
174
183
|
lower_bound: Optional[int] = None
|
|
175
184
|
upper_bound: Optional[int] = None
|
|
176
185
|
|
|
@@ -188,6 +197,23 @@ class DateListDiffBounded(DateListDiff):
|
|
|
188
197
|
data["name"] = "_".join(components)
|
|
189
198
|
super().__init__(**data)
|
|
190
199
|
|
|
200
|
+
@classmethod
|
|
201
|
+
def from_formula(cls, formula: str) -> Optional["DateListDiffBounded"]:
|
|
202
|
+
import re
|
|
203
|
+
|
|
204
|
+
pattern = r"^date_diff_([^_]+)_((minusinf|\d+))_((plusinf|\d+))_(\w+)$"
|
|
205
|
+
match = re.match(pattern, formula)
|
|
206
|
+
|
|
207
|
+
if not match:
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
diff_unit = match.group(1)
|
|
211
|
+
lower_bound = None if match.group(2) == "minusinf" else int(match.group(2))
|
|
212
|
+
upper_bound = None if match.group(4) == "plusinf" else int(match.group(4))
|
|
213
|
+
aggregation = match.group(6)
|
|
214
|
+
|
|
215
|
+
return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
|
|
216
|
+
|
|
191
217
|
def _agg(self, x):
|
|
192
218
|
x = x[
|
|
193
219
|
(x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
|
|
@@ -257,16 +283,17 @@ class DatePercentile(DatePercentileBase):
|
|
|
257
283
|
# Use @field_validator for Pydantic 2.x
|
|
258
284
|
from pydantic import field_validator
|
|
259
285
|
|
|
260
|
-
@field_validator(
|
|
286
|
+
@field_validator("zero_bounds", mode="before")
|
|
261
287
|
def parse_zero_bounds(cls, value):
|
|
262
288
|
if isinstance(value, str):
|
|
263
289
|
return json.loads(value)
|
|
264
290
|
return value
|
|
291
|
+
|
|
265
292
|
else:
|
|
266
293
|
# Use @validator for Pydantic 1.x
|
|
267
294
|
from pydantic import validator
|
|
268
295
|
|
|
269
|
-
@validator(
|
|
296
|
+
@validator("zero_bounds", pre=True)
|
|
270
297
|
def parse_zero_bounds(cls, value):
|
|
271
298
|
if isinstance(value, str):
|
|
272
299
|
return json.loads(value)
|
upgini/autofe/operand.py
CHANGED
|
@@ -6,7 +6,47 @@ import pandas as pd
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
9
|
+
class OperandRegistry(type(BaseModel)):
|
|
10
|
+
_registry = {}
|
|
11
|
+
_parametrized_registry = []
|
|
12
|
+
|
|
13
|
+
def __new__(cls, name, bases, attrs):
|
|
14
|
+
new_class = super().__new__(cls, name, bases, attrs)
|
|
15
|
+
# Only register if it's a concrete class that inherits from Operand
|
|
16
|
+
base_classes = [b for b in bases]
|
|
17
|
+
base_names = {b.__name__ for b in bases}
|
|
18
|
+
while base_classes:
|
|
19
|
+
base = base_classes.pop()
|
|
20
|
+
base_names.update(b.__name__ for b in base.__bases__)
|
|
21
|
+
base_classes.extend(base.__bases__)
|
|
22
|
+
|
|
23
|
+
if "Operand" in base_names:
|
|
24
|
+
# Track parametrized operands separately
|
|
25
|
+
if "ParametrizedOperand" in base_names:
|
|
26
|
+
cls._parametrized_registry.append(new_class)
|
|
27
|
+
else:
|
|
28
|
+
try:
|
|
29
|
+
instance = new_class()
|
|
30
|
+
cls._registry[instance.name] = new_class
|
|
31
|
+
except Exception:
|
|
32
|
+
pass
|
|
33
|
+
return new_class
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_operand(cls, name: str) -> Optional["Operand"]:
|
|
37
|
+
# First try to resolve as a parametrized operand formula
|
|
38
|
+
for operand_cls in cls._parametrized_registry:
|
|
39
|
+
resolved = operand_cls.from_formula(name)
|
|
40
|
+
if resolved is not None:
|
|
41
|
+
return resolved
|
|
42
|
+
# Fall back to direct registry lookup
|
|
43
|
+
non_parametrized = cls._registry.get(name)
|
|
44
|
+
if non_parametrized is not None:
|
|
45
|
+
return non_parametrized()
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Operand(BaseModel, metaclass=OperandRegistry):
|
|
10
50
|
name: str
|
|
11
51
|
alias: Optional[str] = None
|
|
12
52
|
is_unary: bool = False
|
|
@@ -32,6 +72,12 @@ class Operand(BaseModel):
|
|
|
32
72
|
return res
|
|
33
73
|
|
|
34
74
|
|
|
75
|
+
class ParametrizedOperand(Operand):
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_formula(cls, formula: str) -> Optional["Operand"]:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
|
|
35
81
|
MAIN_COLUMN = "main_column"
|
|
36
82
|
|
|
37
83
|
|
upgini/autofe/vector.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
2
3
|
|
|
3
4
|
import pandas as pd
|
|
5
|
+
from pydantic import validator
|
|
4
6
|
|
|
5
|
-
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
7
|
+
from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
class Mean(PandasOperand, VectorizableMixin):
|
|
@@ -22,3 +24,132 @@ class Sum(PandasOperand, VectorizableMixin):
|
|
|
22
24
|
|
|
23
25
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
24
26
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TimeSeriesBase(PandasOperand, abc.ABC):
|
|
30
|
+
is_vector: bool = True
|
|
31
|
+
date_unit: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
34
|
+
res = super().get_params()
|
|
35
|
+
res.update(
|
|
36
|
+
{
|
|
37
|
+
"date_unit": self.date_unit,
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
return res
|
|
41
|
+
|
|
42
|
+
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
43
|
+
# assuming first is date, last is value, rest is group columns
|
|
44
|
+
date = pd.to_datetime(data[0], unit=self.date_unit, errors="coerce")
|
|
45
|
+
ts = pd.concat([date] + data[1:], axis=1)
|
|
46
|
+
ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
|
|
47
|
+
ts.set_index(date.name, inplace=True)
|
|
48
|
+
ts = ts[ts.index.notna()].sort_index()
|
|
49
|
+
ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
|
|
50
|
+
ts = self._aggregate(ts)
|
|
51
|
+
ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
|
|
52
|
+
|
|
53
|
+
return ts.iloc[:, -1]
|
|
54
|
+
|
|
55
|
+
@abc.abstractmethod
|
|
56
|
+
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
_roll_aggregations = {"norm_mean": lambda x: x[-1] / x.mean()}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Roll(TimeSeriesBase, ParametrizedOperand):
|
|
64
|
+
aggregation: str
|
|
65
|
+
window_size: int = 1
|
|
66
|
+
window_unit: str = "D"
|
|
67
|
+
|
|
68
|
+
@validator("window_unit")
|
|
69
|
+
def validate_window_unit(cls, v: str) -> str:
|
|
70
|
+
try:
|
|
71
|
+
pd.tseries.frequencies.to_offset(v)
|
|
72
|
+
return v
|
|
73
|
+
except ValueError:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"Invalid window_unit: {v}. Must be a valid pandas frequency string (e.g. 'D', 'H', 'T', etc)"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def __init__(self, **data: Any) -> None:
|
|
79
|
+
if "name" not in data:
|
|
80
|
+
components = [
|
|
81
|
+
"roll",
|
|
82
|
+
str(data.get("window_size") or 1) + str(data.get("window_unit") or "D"),
|
|
83
|
+
data.get("aggregation"),
|
|
84
|
+
]
|
|
85
|
+
data["name"] = "_".join(components).lower()
|
|
86
|
+
super().__init__(**data)
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def from_formula(cls, formula: str) -> Optional["Roll"]:
|
|
90
|
+
import re
|
|
91
|
+
|
|
92
|
+
pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
|
|
93
|
+
match = re.match(pattern, formula)
|
|
94
|
+
|
|
95
|
+
if not match:
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
window_size = int(match.group(1))
|
|
99
|
+
window_unit = match.group(2)
|
|
100
|
+
aggregation = match.group(3)
|
|
101
|
+
|
|
102
|
+
return cls(window_size=window_size, window_unit=window_unit, aggregation=aggregation)
|
|
103
|
+
|
|
104
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
105
|
+
res = super().get_params()
|
|
106
|
+
res.update(
|
|
107
|
+
{
|
|
108
|
+
"window_size": self.window_size,
|
|
109
|
+
"window_unit": self.window_unit,
|
|
110
|
+
"aggregation": self.aggregation,
|
|
111
|
+
}
|
|
112
|
+
)
|
|
113
|
+
return res
|
|
114
|
+
|
|
115
|
+
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
116
|
+
return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=self.window_size).agg(
|
|
117
|
+
_roll_aggregations.get(self.aggregation, self.aggregation)
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Lag(TimeSeriesBase, ParametrizedOperand):
|
|
122
|
+
lag_size: int
|
|
123
|
+
lag_unit: str = "D"
|
|
124
|
+
|
|
125
|
+
def __init__(self, **data: Any) -> None:
|
|
126
|
+
if "name" not in data:
|
|
127
|
+
components = [
|
|
128
|
+
"lag",
|
|
129
|
+
str(data.get("lag_size") or 1) + str(data.get("lag_unit") or "D"),
|
|
130
|
+
]
|
|
131
|
+
data["name"] = "_".join(components).lower()
|
|
132
|
+
super().__init__(**data)
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def from_formula(cls, formula: str) -> Optional["Lag"]:
|
|
136
|
+
import re
|
|
137
|
+
|
|
138
|
+
pattern = r"^lag_(\d+)([a-zA-Z])$"
|
|
139
|
+
match = re.match(pattern, formula)
|
|
140
|
+
|
|
141
|
+
if not match:
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
lag_size = int(match.group(1))
|
|
145
|
+
lag_unit = match.group(2)
|
|
146
|
+
|
|
147
|
+
return cls(lag_size=lag_size, lag_unit=lag_unit)
|
|
148
|
+
|
|
149
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
150
|
+
res = super().get_params()
|
|
151
|
+
return res
|
|
152
|
+
|
|
153
|
+
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
154
|
+
lag_window = self.lag_size + 1
|
|
155
|
+
return ts.rolling(f"{lag_window}{self.lag_unit}", min_periods=lag_window).agg(lambda x: x[0])
|
upgini/features_enricher.py
CHANGED
|
@@ -2474,8 +2474,16 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2474
2474
|
|
|
2475
2475
|
df = pd.concat([validated_X, validated_y], axis=1)
|
|
2476
2476
|
|
|
2477
|
+
if validated_eval_set is not None and len(validated_eval_set) > 0:
|
|
2478
|
+
df[EVAL_SET_INDEX] = 0
|
|
2479
|
+
for idx, (eval_X, eval_y) in enumerate(validated_eval_set):
|
|
2480
|
+
eval_df = pd.concat([eval_X, eval_y], axis=1)
|
|
2481
|
+
eval_df[EVAL_SET_INDEX] = idx + 1
|
|
2482
|
+
df = pd.concat([df, eval_df])
|
|
2483
|
+
|
|
2477
2484
|
self.fit_search_keys = self.search_keys.copy()
|
|
2478
|
-
|
|
2485
|
+
df = self.__handle_index_search_keys(df, self.fit_search_keys)
|
|
2486
|
+
self.fit_search_keys = self.__prepare_search_keys(df, self.fit_search_keys, is_demo_dataset)
|
|
2479
2487
|
|
|
2480
2488
|
maybe_date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
|
|
2481
2489
|
has_date = maybe_date_column is not None
|
|
@@ -2487,17 +2495,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2487
2495
|
self.loss, self.model_task_type, self.runtime_parameters, self.logger
|
|
2488
2496
|
)
|
|
2489
2497
|
|
|
2490
|
-
if validated_eval_set is not None and len(validated_eval_set) > 0:
|
|
2491
|
-
df[EVAL_SET_INDEX] = 0
|
|
2492
|
-
for idx, (eval_X, eval_y) in enumerate(validated_eval_set):
|
|
2493
|
-
eval_df = pd.concat([eval_X, eval_y], axis=1)
|
|
2494
|
-
eval_df[EVAL_SET_INDEX] = idx + 1
|
|
2495
|
-
df = pd.concat([df, eval_df])
|
|
2496
|
-
|
|
2497
2498
|
df = self.__correct_target(df)
|
|
2498
2499
|
|
|
2499
|
-
df = self.__handle_index_search_keys(df, self.fit_search_keys)
|
|
2500
|
-
|
|
2501
2500
|
if DEFAULT_INDEX in df.columns:
|
|
2502
2501
|
msg = self.bundle.get("unsupported_index_column")
|
|
2503
2502
|
self.logger.info(msg)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
2
|
-
upgini/__init__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=YkZ_uLYHtqgChcjML_VbuHRPzZ0weOtfhilztAaEx10,33
|
|
2
|
+
upgini/__init__.py,sha256=Mb_sTh-IiGiyQLExOF226RsqnpVH8u1ozaCSW3Scdx4,590
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=RAeZFb_0VA23rHjWXo2hKEhoTxp0z9PdFD3KhCSOS_0,192779
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -14,14 +14,14 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
|
|
|
14
14
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
15
15
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
upgini/autofe/all_operands.py,sha256=
|
|
17
|
+
upgini/autofe/all_operands.py,sha256=z3RSj98mkIXOkkmXHVCV7ese6V6rgD4uXyHge65HMVA,116
|
|
18
18
|
upgini/autofe/binary.py,sha256=zMhtHVuGUAFLUqem-XiXqJj-GRXxS88tdz8tFuDfSNM,7659
|
|
19
|
-
upgini/autofe/date.py,sha256=
|
|
19
|
+
upgini/autofe/date.py,sha256=kC1oQ_LKaqq-JTiqzIbUti-JB3bWizaB5nvXQ_BoD6Y,10780
|
|
20
20
|
upgini/autofe/feature.py,sha256=eL7wABUhDKZzv3E-RPJNcyGwSfB0UptcfU2RbvsOks4,15082
|
|
21
21
|
upgini/autofe/groupby.py,sha256=r-xl_keZZgm_tpiEoDhjYSkT6NHv7a4cRQR4wJ4uCp8,3263
|
|
22
|
-
upgini/autofe/operand.py,sha256=
|
|
22
|
+
upgini/autofe/operand.py,sha256=sEyFD_SdQ5tqJ5yGUZlXSqUnQb6WxOqZ0bMS6oKDjdU,4593
|
|
23
23
|
upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
|
|
24
|
-
upgini/autofe/vector.py,sha256=
|
|
24
|
+
upgini/autofe/vector.py,sha256=KBoEcRywc1xdgYLCPlkUnKi5w0wCF0j3IYQP5eSmmgY,4807
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=qHzZRmICFbLNCrmVqGkaBcjm91L2ERRZMppci36acV4,10085
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.2.
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
60
|
+
upgini-1.2.20a3657.dev1.dist-info/METADATA,sha256=bgrk-SB81K0mrOkFRfrSl04-TuA2wxZWIbYdQOJePKA,48588
|
|
61
|
+
upgini-1.2.20a3657.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
62
|
+
upgini-1.2.20a3657.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.2.20a3657.dev1.dist-info/RECORD,,
|
|
File without changes
|