upgini 1.1.309a3511.dev3__py3-none-any.whl → 1.1.310__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/all_operands.py +7 -26
- upgini/autofe/binary.py +2 -95
- upgini/autofe/date.py +3 -16
- upgini/autofe/feature.py +11 -24
- upgini/autofe/unary.py +0 -7
- upgini/data_source/data_source_publisher.py +21 -5
- {upgini-1.1.309a3511.dev3.dist-info → upgini-1.1.310.dist-info}/METADATA +1 -3
- {upgini-1.1.309a3511.dev3.dist-info → upgini-1.1.310.dist-info}/RECORD +11 -11
- {upgini-1.1.309a3511.dev3.dist-info → upgini-1.1.310.dist-info}/WHEEL +1 -1
- {upgini-1.1.309a3511.dev3.dist-info → upgini-1.1.310.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.310"
|
upgini/autofe/all_operands.py
CHANGED
|
@@ -1,20 +1,6 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
|
-
from upgini.autofe.binary import
|
|
4
|
-
Add,
|
|
5
|
-
Combine,
|
|
6
|
-
CombineThenFreq,
|
|
7
|
-
Distance,
|
|
8
|
-
Divide,
|
|
9
|
-
JaroWinklerSim1,
|
|
10
|
-
JaroWinklerSim2,
|
|
11
|
-
LevenshteinSim,
|
|
12
|
-
Max,
|
|
13
|
-
Min,
|
|
14
|
-
Multiply,
|
|
15
|
-
Sim,
|
|
16
|
-
Subtract,
|
|
17
|
-
)
|
|
3
|
+
from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
18
4
|
from upgini.autofe.date import (
|
|
19
5
|
DateDiff,
|
|
20
6
|
DateDiffType2,
|
|
@@ -23,9 +9,9 @@ from upgini.autofe.date import (
|
|
|
23
9
|
DatePercentile,
|
|
24
10
|
DatePercentileMethod2,
|
|
25
11
|
)
|
|
26
|
-
from upgini.autofe.groupby import GroupByThenAgg,
|
|
12
|
+
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
27
13
|
from upgini.autofe.operand import Operand
|
|
28
|
-
from upgini.autofe.unary import Abs,
|
|
14
|
+
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
|
|
29
15
|
from upgini.autofe.vector import Mean, Sum
|
|
30
16
|
|
|
31
17
|
ALL_OPERANDS: Dict[str, Operand] = {
|
|
@@ -53,10 +39,10 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
53
39
|
GroupByThenAgg(name="GroupByThenMedian", agg="median"),
|
|
54
40
|
GroupByThenAgg(name="GroupByThenStd", output_type="float", agg="std"),
|
|
55
41
|
GroupByThenRank(),
|
|
56
|
-
Combine
|
|
57
|
-
CombineThenFreq
|
|
58
|
-
GroupByThenNUnique
|
|
59
|
-
GroupByThenFreq
|
|
42
|
+
Operand(name="Combine", has_symmetry_importance=True, output_type="object", is_categorical=True),
|
|
43
|
+
Operand(name="CombineThenFreq", has_symmetry_importance=True, output_type="float"),
|
|
44
|
+
Operand(name="GroupByThenNUnique", output_type="int", is_vectorizable=True, is_grouping=True),
|
|
45
|
+
Operand(name="GroupByThenFreq", output_type="float", is_grouping=True),
|
|
60
46
|
Sim(),
|
|
61
47
|
DateDiff(),
|
|
62
48
|
DateDiffType2(),
|
|
@@ -73,11 +59,6 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
73
59
|
DatePercentile(),
|
|
74
60
|
DatePercentileMethod2(),
|
|
75
61
|
Norm(),
|
|
76
|
-
JaroWinklerSim1(),
|
|
77
|
-
JaroWinklerSim2(),
|
|
78
|
-
LevenshteinSim(),
|
|
79
|
-
Distance(),
|
|
80
|
-
Embeddings(),
|
|
81
62
|
]
|
|
82
63
|
}
|
|
83
64
|
|
upgini/autofe/binary.py
CHANGED
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
import abc
|
|
2
|
-
from typing import Optional
|
|
3
|
-
import Levenshtein
|
|
4
1
|
import numpy as np
|
|
5
2
|
import pandas as pd
|
|
6
3
|
from numpy import dot
|
|
7
4
|
from numpy.linalg import norm
|
|
8
|
-
from jarowinkler import jarowinkler_similarity
|
|
9
5
|
|
|
10
6
|
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
11
7
|
|
|
@@ -134,29 +130,7 @@ class CombineThenFreq(PandasOperand):
|
|
|
134
130
|
self._loc(temp, value_counts)
|
|
135
131
|
|
|
136
132
|
|
|
137
|
-
class
|
|
138
|
-
name = "dist"
|
|
139
|
-
is_binary = True
|
|
140
|
-
output_type = "float"
|
|
141
|
-
is_symmetrical = True
|
|
142
|
-
has_symmetry_importance = True
|
|
143
|
-
|
|
144
|
-
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
145
|
-
return pd.Series(
|
|
146
|
-
1 - self.__dot(left, right) / (self.__dot(left, left) * self.__dot(right, right)), index=left.index
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
# row-wise dot product
|
|
150
|
-
def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
151
|
-
left = left.apply(lambda x: np.array(x))
|
|
152
|
-
right = right.apply(lambda x: np.array(x))
|
|
153
|
-
res = (left.dropna() * right.dropna()).apply(np.sum)
|
|
154
|
-
res = res.reindex(left.index.union(right.index))
|
|
155
|
-
return res
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# Left for backward compatibility
|
|
159
|
-
class Sim(Distance):
|
|
133
|
+
class Sim(PandasOperand):
|
|
160
134
|
name = "sim"
|
|
161
135
|
is_binary = True
|
|
162
136
|
output_type = "float"
|
|
@@ -164,71 +138,4 @@ class Sim(Distance):
|
|
|
164
138
|
has_symmetry_importance = True
|
|
165
139
|
|
|
166
140
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
167
|
-
return
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class StringSim(PandasOperand, abc.ABC):
|
|
171
|
-
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
172
|
-
sims = []
|
|
173
|
-
for i in left.index:
|
|
174
|
-
left_i = self._prepare_value(left.get(i))
|
|
175
|
-
right_i = self._prepare_value(right.get(i))
|
|
176
|
-
if left_i is not None and right_i is not None:
|
|
177
|
-
sims.append(self._similarity(left_i, right_i))
|
|
178
|
-
else:
|
|
179
|
-
sims.append(None)
|
|
180
|
-
|
|
181
|
-
return pd.Series(sims, index=left.index)
|
|
182
|
-
|
|
183
|
-
@abc.abstractmethod
|
|
184
|
-
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
185
|
-
pass
|
|
186
|
-
|
|
187
|
-
@abc.abstractmethod
|
|
188
|
-
def _similarity(self, left: str, right: str) -> float:
|
|
189
|
-
pass
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
class JaroWinklerSim1(StringSim):
|
|
193
|
-
name = "sim_jw1"
|
|
194
|
-
is_binary = True
|
|
195
|
-
input_type = "string"
|
|
196
|
-
output_type = "float"
|
|
197
|
-
is_symmetrical = True
|
|
198
|
-
has_symmetry_importance = True
|
|
199
|
-
|
|
200
|
-
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
201
|
-
return value
|
|
202
|
-
|
|
203
|
-
def _similarity(self, left: str, right: str) -> float:
|
|
204
|
-
return jarowinkler_similarity(left, right)
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
class JaroWinklerSim2(StringSim):
|
|
208
|
-
name = "sim_jw2"
|
|
209
|
-
is_binary = True
|
|
210
|
-
input_type = "string"
|
|
211
|
-
output_type = "float"
|
|
212
|
-
is_symmetrical = True
|
|
213
|
-
has_symmetry_importance = True
|
|
214
|
-
|
|
215
|
-
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
216
|
-
return value[::-1] if value is not None else None
|
|
217
|
-
|
|
218
|
-
def _similarity(self, left: str, right: str) -> float:
|
|
219
|
-
return jarowinkler_similarity(left, right)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
class LevenshteinSim(StringSim):
|
|
223
|
-
name = "sim_lv"
|
|
224
|
-
is_binary = True
|
|
225
|
-
input_type = "string"
|
|
226
|
-
output_type = "float"
|
|
227
|
-
is_symmetrical = True
|
|
228
|
-
has_symmetry_importance = True
|
|
229
|
-
|
|
230
|
-
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
231
|
-
return value
|
|
232
|
-
|
|
233
|
-
def _similarity(self, left: str, right: str) -> float:
|
|
234
|
-
return 1 - Levenshtein.distance(left, right) / max(len(left), len(right))
|
|
141
|
+
return dot(left, right) / (norm(left) * norm(right))
|
upgini/autofe/date.py
CHANGED
|
@@ -43,8 +43,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
43
43
|
is_binary = True
|
|
44
44
|
has_symmetry_importance = True
|
|
45
45
|
|
|
46
|
-
replace_negative: bool = False
|
|
47
|
-
|
|
48
46
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
49
47
|
res = super().get_params()
|
|
50
48
|
res.update(
|
|
@@ -52,7 +50,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
52
50
|
"diff_unit": self.diff_unit,
|
|
53
51
|
"left_unit": self.left_unit,
|
|
54
52
|
"right_unit": self.right_unit,
|
|
55
|
-
"replace_negative": self.replace_negative,
|
|
56
53
|
}
|
|
57
54
|
)
|
|
58
55
|
return res
|
|
@@ -64,8 +61,7 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
64
61
|
return self.__replace_negative(diff)
|
|
65
62
|
|
|
66
63
|
def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
|
|
67
|
-
|
|
68
|
-
x[x < 0] = None
|
|
64
|
+
x[x < 0] = None
|
|
69
65
|
return x
|
|
70
66
|
|
|
71
67
|
|
|
@@ -105,19 +101,13 @@ _ext_aggregations = {"nunique": (lambda x: len(np.unique(x)), 0), "count": (len,
|
|
|
105
101
|
class DateListDiff(PandasOperand, DateDiffMixin):
|
|
106
102
|
is_binary = True
|
|
107
103
|
has_symmetry_importance = True
|
|
108
|
-
|
|
109
104
|
aggregation: str
|
|
110
|
-
replace_negative: bool = False
|
|
111
105
|
|
|
112
106
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
113
107
|
res = super().get_params()
|
|
114
108
|
res.update(
|
|
115
109
|
{
|
|
116
110
|
"aggregation": self.aggregation,
|
|
117
|
-
"diff_unit": self.diff_unit,
|
|
118
|
-
"left_unit": self.left_unit,
|
|
119
|
-
"right_unit": self.right_unit,
|
|
120
|
-
"replace_negative": self.replace_negative,
|
|
121
111
|
}
|
|
122
112
|
)
|
|
123
113
|
return res
|
|
@@ -135,7 +125,7 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
135
125
|
|
|
136
126
|
def _diff(self, x: TimedeltaArray):
|
|
137
127
|
x = self._convert_diff_to_unit(x)
|
|
138
|
-
return x[x > 0]
|
|
128
|
+
return x[x > 0]
|
|
139
129
|
|
|
140
130
|
def _agg(self, x):
|
|
141
131
|
method = getattr(np, self.aggregation, None)
|
|
@@ -167,10 +157,7 @@ class DateListDiffBounded(DateListDiff):
|
|
|
167
157
|
super().__init__(**data)
|
|
168
158
|
|
|
169
159
|
def _agg(self, x):
|
|
170
|
-
x = x[
|
|
171
|
-
(x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
|
|
172
|
-
& (x < (self.upper_bound if self.upper_bound is not None else np.inf))
|
|
173
|
-
]
|
|
160
|
+
x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
|
|
174
161
|
return super()._agg(x)
|
|
175
162
|
|
|
176
163
|
|
upgini/autofe/feature.py
CHANGED
|
@@ -138,17 +138,15 @@ class Feature:
|
|
|
138
138
|
if self.cached_display_name is not None and cache:
|
|
139
139
|
return self.cached_display_name
|
|
140
140
|
|
|
141
|
-
should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary else False
|
|
142
|
-
prev_name = [self.children[0].get_op_display_name()] if should_stack_op else []
|
|
143
|
-
|
|
144
141
|
if self.alias:
|
|
145
142
|
components = ["f_autofe", self.alias]
|
|
146
|
-
elif shorten and
|
|
147
|
-
components = ["f_autofe"
|
|
143
|
+
elif shorten and not self.op.is_unary:
|
|
144
|
+
components = ["f_autofe", self.get_op_display_name()]
|
|
148
145
|
else:
|
|
149
|
-
components = (
|
|
150
|
-
|
|
151
|
-
|
|
146
|
+
components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
|
|
147
|
+
"autofe",
|
|
148
|
+
self.get_op_display_name(),
|
|
149
|
+
]
|
|
152
150
|
components.extend([str(self.display_index)] if self.display_index is not None else [])
|
|
153
151
|
display_name = "_".join(components)
|
|
154
152
|
|
|
@@ -239,18 +237,12 @@ class Feature:
|
|
|
239
237
|
|
|
240
238
|
@staticmethod
|
|
241
239
|
def from_formula(string: str) -> Union[Column, "Feature"]:
|
|
240
|
+
if string[-1] != ")":
|
|
241
|
+
return Column(string)
|
|
242
242
|
|
|
243
243
|
def is_trivial_char(c: str) -> bool:
|
|
244
244
|
return c not in "()+-*/,"
|
|
245
245
|
|
|
246
|
-
if string[-1] != ")":
|
|
247
|
-
if all(is_trivial_char(c) for c in string):
|
|
248
|
-
return Column(string)
|
|
249
|
-
else:
|
|
250
|
-
raise ValueError(
|
|
251
|
-
f"Unsupported column name: {string}. Column names should not have characters: ['(', ')', '+', '-', '*', '/', ',']"
|
|
252
|
-
)
|
|
253
|
-
|
|
254
246
|
def find_prev(string: str) -> int:
|
|
255
247
|
if string[-1] != ")":
|
|
256
248
|
return max([(0 if is_trivial_char(c) else i + 1) for i, c in enumerate(string)])
|
|
@@ -272,11 +264,8 @@ class Feature:
|
|
|
272
264
|
return Feature(find_op(string[: p2 - 1]), [Feature.from_formula(string[p2:-1])])
|
|
273
265
|
p1 = find_prev(string[: p2 - 1])
|
|
274
266
|
if string[0] == "(":
|
|
275
|
-
op = find_op(string[p2 - 1])
|
|
276
|
-
if op is None:
|
|
277
|
-
raise ValueError(f"Unsupported operand: {string[p2 - 1]}")
|
|
278
267
|
return Feature(
|
|
279
|
-
|
|
268
|
+
find_op(string[p2 - 1]),
|
|
280
269
|
[Feature.from_formula(string[p1 : p2 - 1]), Feature.from_formula(string[p2:-1])],
|
|
281
270
|
)
|
|
282
271
|
else:
|
|
@@ -287,8 +276,6 @@ class Feature:
|
|
|
287
276
|
[Feature.from_formula(string[p1 : p2 - 1]), Feature.from_formula(string[p2:-1])],
|
|
288
277
|
)
|
|
289
278
|
else:
|
|
290
|
-
if string[p1 - 1] == "(":
|
|
291
|
-
raise ValueError(f"Unsupported operand: {string[: p1 - 1]}")
|
|
292
279
|
base_features = [
|
|
293
280
|
Feature.from_formula(string[p2:-1]),
|
|
294
281
|
Feature.from_formula(string[p1 : p2 - 1]),
|
|
@@ -334,10 +321,10 @@ class FeatureGroup:
|
|
|
334
321
|
lower_order_names = [ch.get_display_name() for ch in lower_order_children]
|
|
335
322
|
if any(isinstance(f, Feature) for f in lower_order_children):
|
|
336
323
|
child_data = pd.concat(
|
|
337
|
-
[data[main_column
|
|
324
|
+
[data[main_column]] + [ch.calculate(data) for ch in lower_order_children],
|
|
338
325
|
axis=1,
|
|
339
326
|
)
|
|
340
|
-
child_data.columns =
|
|
327
|
+
child_data.columns = [main_column] + lower_order_names
|
|
341
328
|
else:
|
|
342
329
|
child_data = data[columns]
|
|
343
330
|
|
upgini/autofe/unary.py
CHANGED
|
@@ -125,10 +125,3 @@ class Norm(PandasOperand):
|
|
|
125
125
|
normalized_data = pd.Series(normalized_data[:, 0], index=data_dropna.index, name=data.name)
|
|
126
126
|
normalized_data = normalized_data.reindex(data.index)
|
|
127
127
|
return normalized_data
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
class Embeddings(PandasOperand):
|
|
131
|
-
name = "emb"
|
|
132
|
-
is_unary = True
|
|
133
|
-
input_type = "string"
|
|
134
|
-
output_type = "vector"
|
|
@@ -3,7 +3,7 @@ import time
|
|
|
3
3
|
import uuid
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import Dict, List, Optional, Union
|
|
6
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
7
7
|
|
|
8
8
|
from upgini.errors import HttpError, ValidationError
|
|
9
9
|
from upgini.http import LoggerFactory, get_rest_client
|
|
@@ -47,7 +47,9 @@ class DataSourcePublisher:
|
|
|
47
47
|
self,
|
|
48
48
|
data_table_uri: str,
|
|
49
49
|
search_keys: Dict[str, SearchKey],
|
|
50
|
-
update_frequency:
|
|
50
|
+
update_frequency: Union[
|
|
51
|
+
Literal["Daily"], Literal["Weekly"], Literal["Monthly"], Literal["Quarterly"], Literal["Annually"]
|
|
52
|
+
],
|
|
51
53
|
exclude_from_autofe_generation: Optional[List[str]],
|
|
52
54
|
secondary_search_keys: Optional[Dict[str, SearchKey]] = None,
|
|
53
55
|
sort_column: Optional[str] = None,
|
|
@@ -233,11 +235,17 @@ class DataSourcePublisher:
|
|
|
233
235
|
self.logger.exception("Failed to register data table")
|
|
234
236
|
raise
|
|
235
237
|
|
|
236
|
-
def remove(self, data_table_ids: List[str]):
|
|
238
|
+
def remove(self, data_table_ids: List[str] | str):
|
|
237
239
|
trace_id = str(uuid.uuid4())
|
|
238
240
|
with MDC(trace_id=trace_id):
|
|
239
241
|
try:
|
|
240
|
-
if
|
|
242
|
+
if not data_table_ids:
|
|
243
|
+
raise ValidationError("Empty data table ids")
|
|
244
|
+
if isinstance(data_table_ids, str):
|
|
245
|
+
data_table_ids = [data_table_ids]
|
|
246
|
+
if not isinstance(data_table_ids, list):
|
|
247
|
+
raise ValidationError("Invalid format of data_table_ids argument")
|
|
248
|
+
if len(data_table_ids) == 0:
|
|
241
249
|
raise ValidationError("Empty data table ids")
|
|
242
250
|
|
|
243
251
|
for data_table_id in data_table_ids:
|
|
@@ -266,16 +274,20 @@ class DataSourcePublisher:
|
|
|
266
274
|
source_link: Optional[str] = None,
|
|
267
275
|
update_frequency: Optional[str] = None,
|
|
268
276
|
client_emails: Optional[List[str]] = None,
|
|
277
|
+
date_features: Optional[List[str]] = None,
|
|
278
|
+
date_vector_features: Optional[List[str]] = None,
|
|
269
279
|
):
|
|
270
280
|
trace_id = str(uuid.uuid4())
|
|
271
281
|
with MDC(trace_id=trace_id):
|
|
272
282
|
try:
|
|
273
|
-
if data_table_ids is None
|
|
283
|
+
if data_table_ids is None:
|
|
274
284
|
raise ValidationError("Empty data table ids")
|
|
275
285
|
if isinstance(data_table_ids, str):
|
|
276
286
|
data_table_ids = [data_table_ids]
|
|
277
287
|
if not isinstance(data_table_ids, list):
|
|
278
288
|
raise ValidationError("data_table_ids should be string or list of strings")
|
|
289
|
+
if len(data_table_ids) == 0:
|
|
290
|
+
raise ValidationError("Empty data table ids")
|
|
279
291
|
if update_frequency is not None and update_frequency not in self.ACCEPTABLE_UPDATE_FREQUENCIES:
|
|
280
292
|
raise ValidationError(
|
|
281
293
|
f"Invalid update frequency: {update_frequency}. "
|
|
@@ -311,6 +323,10 @@ class DataSourcePublisher:
|
|
|
311
323
|
request["updateFrequency"] = update_frequency
|
|
312
324
|
if client_emails is not None:
|
|
313
325
|
request["clientEmails"] = client_emails
|
|
326
|
+
if date_features is not None:
|
|
327
|
+
request["dateFeatures"] = date_features
|
|
328
|
+
if date_vector_features is not None:
|
|
329
|
+
request["dateVectorFeatures"] = date_vector_features
|
|
314
330
|
self.logger.info(f"Activating data tables with request {request}")
|
|
315
331
|
|
|
316
332
|
self._rest_client.activate_datatables(request, trace_id)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.310
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -26,8 +26,6 @@ Requires-Python: <3.11,>=3.8
|
|
|
26
26
|
Requires-Dist: catboost>=1.0.3
|
|
27
27
|
Requires-Dist: fastparquet>=0.8.1
|
|
28
28
|
Requires-Dist: ipywidgets>=8.1.0
|
|
29
|
-
Requires-Dist: jarowinkler>=2.0.0
|
|
30
|
-
Requires-Dist: levenshtein>=0.25.1
|
|
31
29
|
Requires-Dist: lightgbm>=3.3.2
|
|
32
30
|
Requires-Dist: numpy>=1.19.0
|
|
33
31
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=3SqcXx-xK0LOUPXP-TodeMAQbqqvXrJD_bDPJxYUINw,24
|
|
2
2
|
upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=MOzBVsvzlHLxNfPWtMaXC_jIPeW7_gUvbSGeXnsPgNI,46158
|
|
@@ -14,16 +14,16 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
|
|
|
14
14
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
15
15
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
upgini/autofe/all_operands.py,sha256=
|
|
18
|
-
upgini/autofe/binary.py,sha256=
|
|
19
|
-
upgini/autofe/date.py,sha256=
|
|
20
|
-
upgini/autofe/feature.py,sha256=
|
|
17
|
+
upgini/autofe/all_operands.py,sha256=XbvgX2IU4aee9rJZ--d5MdmrfKhON_emle5-RU1qlEY,2506
|
|
18
|
+
upgini/autofe/binary.py,sha256=8FXPJxN7fnC5wphO0Dp1tQCa0lFMSDGQGvBMkSIVAcE,4155
|
|
19
|
+
upgini/autofe/date.py,sha256=tshVUTioOLVy8on8b5xjgtVrSSrXz_8fMCLeIkpo808,7941
|
|
20
|
+
upgini/autofe/feature.py,sha256=ayxiF8Ip1ww_pt_BC9Pk127fAHZ_3fuluulS1EYLolk,13423
|
|
21
21
|
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
22
22
|
upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
|
|
23
|
-
upgini/autofe/unary.py,sha256=
|
|
23
|
+
upgini/autofe/unary.py,sha256=ZWjLd-CUkNt_PpM8YuWLLipW1v_RdBlsl4JxXIVo9aM,3652
|
|
24
24
|
upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
26
|
+
upgini/data_source/data_source_publisher.py,sha256=gfeoZznFj1hDlIsHb2z1F6M7YpuwObtmnwqeH-z_Ne4,21444
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
28
28
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
29
29
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
63
|
-
upgini-1.1.
|
|
60
|
+
upgini-1.1.310.dist-info/METADATA,sha256=xRiSeLpNb60svINdhnaNSROMur5XGaHdrQhbxjTjdWU,48153
|
|
61
|
+
upgini-1.1.310.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.1.310.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.1.310.dist-info/RECORD,,
|
|
File without changes
|