upgini 1.1.316a5__py3-none-any.whl → 1.1.317__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/binary.py +75 -72
- upgini/autofe/date.py +26 -43
- upgini/autofe/groupby.py +22 -22
- upgini/autofe/operand.py +4 -4
- upgini/autofe/unary.py +46 -47
- upgini/autofe/vector.py +8 -8
- upgini/dataset.py +3 -8
- upgini/features_enricher.py +4 -5
- upgini/http.py +15 -15
- upgini/lazy_import.py +1 -14
- upgini/metadata.py +57 -57
- upgini/normalizer/normalize_utils.py +2 -1
- upgini/utils/datetime_utils.py +5 -5
- upgini/utils/phone_utils.py +7 -5
- upgini/utils/postal_code_utils.py +1 -1
- upgini/utils/target_utils.py +1 -4
- {upgini-1.1.316a5.dist-info → upgini-1.1.317.dist-info}/METADATA +3 -3
- {upgini-1.1.316a5.dist-info → upgini-1.1.317.dist-info}/RECORD +21 -21
- {upgini-1.1.316a5.dist-info → upgini-1.1.317.dist-info}/WHEEL +0 -0
- {upgini-1.1.316a5.dist-info → upgini-1.1.317.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.317"
|
upgini/autofe/binary.py
CHANGED
|
@@ -9,32 +9,32 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class Min(PandasOperand):
|
|
12
|
-
name
|
|
13
|
-
is_binary
|
|
14
|
-
is_symmetrical
|
|
15
|
-
has_symmetry_importance
|
|
12
|
+
name = "min"
|
|
13
|
+
is_binary = True
|
|
14
|
+
is_symmetrical = True
|
|
15
|
+
has_symmetry_importance = True
|
|
16
16
|
|
|
17
17
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
18
18
|
return np.minimum(left, right)
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class Max(PandasOperand):
|
|
22
|
-
name
|
|
23
|
-
is_binary
|
|
24
|
-
is_symmetrical
|
|
25
|
-
has_symmetry_importance
|
|
22
|
+
name = "max"
|
|
23
|
+
is_binary = True
|
|
24
|
+
is_symmetrical = True
|
|
25
|
+
has_symmetry_importance = True
|
|
26
26
|
|
|
27
27
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
28
28
|
return np.maximum(left, right)
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class Add(PandasOperand, VectorizableMixin):
|
|
32
|
-
name
|
|
33
|
-
alias
|
|
34
|
-
is_binary
|
|
35
|
-
is_symmetrical
|
|
36
|
-
has_symmetry_importance
|
|
37
|
-
is_vectorizable
|
|
32
|
+
name = "+"
|
|
33
|
+
alias = "add"
|
|
34
|
+
is_binary = True
|
|
35
|
+
is_symmetrical = True
|
|
36
|
+
has_symmetry_importance = True
|
|
37
|
+
is_vectorizable = True
|
|
38
38
|
|
|
39
39
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
40
40
|
return left + right
|
|
@@ -48,12 +48,12 @@ class Add(PandasOperand, VectorizableMixin):
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
class Subtract(PandasOperand, VectorizableMixin):
|
|
51
|
-
name
|
|
52
|
-
alias
|
|
53
|
-
is_binary
|
|
54
|
-
is_symmetrical
|
|
55
|
-
has_symmetry_importance
|
|
56
|
-
is_vectorizable
|
|
51
|
+
name = "-"
|
|
52
|
+
alias = "sub"
|
|
53
|
+
is_binary = True
|
|
54
|
+
is_symmetrical = True
|
|
55
|
+
has_symmetry_importance = True
|
|
56
|
+
is_vectorizable = True
|
|
57
57
|
|
|
58
58
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
59
59
|
return left - right
|
|
@@ -67,12 +67,12 @@ class Subtract(PandasOperand, VectorizableMixin):
|
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
class Multiply(PandasOperand, VectorizableMixin):
|
|
70
|
-
name
|
|
71
|
-
alias
|
|
72
|
-
is_binary
|
|
73
|
-
is_symmetrical
|
|
74
|
-
has_symmetry_importance
|
|
75
|
-
is_vectorizable
|
|
70
|
+
name = "*"
|
|
71
|
+
alias = "mul"
|
|
72
|
+
is_binary = True
|
|
73
|
+
is_symmetrical = True
|
|
74
|
+
has_symmetry_importance = True
|
|
75
|
+
is_vectorizable = True
|
|
76
76
|
|
|
77
77
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
78
78
|
return left * right
|
|
@@ -86,12 +86,12 @@ class Multiply(PandasOperand, VectorizableMixin):
|
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
class Divide(PandasOperand, VectorizableMixin):
|
|
89
|
-
name
|
|
90
|
-
alias
|
|
91
|
-
is_binary
|
|
92
|
-
has_symmetry_importance
|
|
93
|
-
is_vectorizable
|
|
94
|
-
output_type
|
|
89
|
+
name = "/"
|
|
90
|
+
alias = "div"
|
|
91
|
+
is_binary = True
|
|
92
|
+
has_symmetry_importance = True
|
|
93
|
+
is_vectorizable = True
|
|
94
|
+
output_type = "float"
|
|
95
95
|
|
|
96
96
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
97
97
|
return left / right.replace(0, np.nan)
|
|
@@ -105,10 +105,10 @@ class Divide(PandasOperand, VectorizableMixin):
|
|
|
105
105
|
|
|
106
106
|
|
|
107
107
|
class Combine(PandasOperand):
|
|
108
|
-
name
|
|
109
|
-
is_binary
|
|
110
|
-
has_symmetry_importance
|
|
111
|
-
output_type
|
|
108
|
+
name = "Combine"
|
|
109
|
+
is_binary = True
|
|
110
|
+
has_symmetry_importance = True
|
|
111
|
+
output_type = "object"
|
|
112
112
|
|
|
113
113
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
114
114
|
temp = left.astype(str) + "_" + right.astype(str)
|
|
@@ -117,13 +117,13 @@ class Combine(PandasOperand):
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
class CombineThenFreq(PandasOperand):
|
|
120
|
-
name
|
|
121
|
-
is_binary
|
|
122
|
-
is_symmetrical
|
|
123
|
-
has_symmetry_importance
|
|
124
|
-
output_type
|
|
125
|
-
is_distribution_dependent
|
|
126
|
-
input_type
|
|
120
|
+
name = "CombineThenFreq"
|
|
121
|
+
is_binary = True
|
|
122
|
+
is_symmetrical = True
|
|
123
|
+
has_symmetry_importance = True
|
|
124
|
+
output_type = "float"
|
|
125
|
+
is_distribution_dependent = True
|
|
126
|
+
input_type = "discrete"
|
|
127
127
|
|
|
128
128
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
129
129
|
temp = left.astype(str) + "_" + right.astype(str)
|
|
@@ -133,15 +133,15 @@ class CombineThenFreq(PandasOperand):
|
|
|
133
133
|
|
|
134
134
|
|
|
135
135
|
class Distance(PandasOperand):
|
|
136
|
-
name
|
|
137
|
-
is_binary
|
|
138
|
-
output_type
|
|
139
|
-
is_symmetrical
|
|
140
|
-
has_symmetry_importance
|
|
136
|
+
name = "dist"
|
|
137
|
+
is_binary = True
|
|
138
|
+
output_type = "float"
|
|
139
|
+
is_symmetrical = True
|
|
140
|
+
has_symmetry_importance = True
|
|
141
141
|
|
|
142
142
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
143
143
|
return pd.Series(
|
|
144
|
-
1 - self.__dot(left, right) / (self.
|
|
144
|
+
1 - self.__dot(left, right) / (self.__norm(left) * self.__norm(right)), index=left.index
|
|
145
145
|
)
|
|
146
146
|
|
|
147
147
|
# row-wise dot product
|
|
@@ -152,14 +152,17 @@ class Distance(PandasOperand):
|
|
|
152
152
|
res = res.reindex(left.index.union(right.index))
|
|
153
153
|
return res
|
|
154
154
|
|
|
155
|
+
def __norm(self, vector: pd.Series) -> pd.Series:
|
|
156
|
+
return np.sqrt(self.__dot(vector, vector))
|
|
157
|
+
|
|
155
158
|
|
|
156
159
|
# Left for backward compatibility
|
|
157
160
|
class Sim(Distance):
|
|
158
|
-
name
|
|
159
|
-
is_binary
|
|
160
|
-
output_type
|
|
161
|
-
is_symmetrical
|
|
162
|
-
has_symmetry_importance
|
|
161
|
+
name = "sim"
|
|
162
|
+
is_binary = True
|
|
163
|
+
output_type = "float"
|
|
164
|
+
is_symmetrical = True
|
|
165
|
+
has_symmetry_importance = True
|
|
163
166
|
|
|
164
167
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
165
168
|
return 1 - super().calculate_binary(left, right)
|
|
@@ -188,12 +191,12 @@ class StringSim(PandasOperand, abc.ABC):
|
|
|
188
191
|
|
|
189
192
|
|
|
190
193
|
class JaroWinklerSim1(StringSim):
|
|
191
|
-
name
|
|
192
|
-
is_binary
|
|
193
|
-
input_type
|
|
194
|
-
output_type
|
|
195
|
-
is_symmetrical
|
|
196
|
-
has_symmetry_importance
|
|
194
|
+
name = "sim_jw1"
|
|
195
|
+
is_binary = True
|
|
196
|
+
input_type = "string"
|
|
197
|
+
output_type = "float"
|
|
198
|
+
is_symmetrical = True
|
|
199
|
+
has_symmetry_importance = True
|
|
197
200
|
|
|
198
201
|
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
199
202
|
return value
|
|
@@ -203,12 +206,12 @@ class JaroWinklerSim1(StringSim):
|
|
|
203
206
|
|
|
204
207
|
|
|
205
208
|
class JaroWinklerSim2(StringSim):
|
|
206
|
-
name
|
|
207
|
-
is_binary
|
|
208
|
-
input_type
|
|
209
|
-
output_type
|
|
210
|
-
is_symmetrical
|
|
211
|
-
has_symmetry_importance
|
|
209
|
+
name = "sim_jw2"
|
|
210
|
+
is_binary = True
|
|
211
|
+
input_type = "string"
|
|
212
|
+
output_type = "float"
|
|
213
|
+
is_symmetrical = True
|
|
214
|
+
has_symmetry_importance = True
|
|
212
215
|
|
|
213
216
|
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
214
217
|
return value[::-1] if value is not None else None
|
|
@@ -218,12 +221,12 @@ class JaroWinklerSim2(StringSim):
|
|
|
218
221
|
|
|
219
222
|
|
|
220
223
|
class LevenshteinSim(StringSim):
|
|
221
|
-
name
|
|
222
|
-
is_binary
|
|
223
|
-
input_type
|
|
224
|
-
output_type
|
|
225
|
-
is_symmetrical
|
|
226
|
-
has_symmetry_importance
|
|
224
|
+
name = "sim_lv"
|
|
225
|
+
is_binary = True
|
|
226
|
+
input_type = "string"
|
|
227
|
+
output_type = "float"
|
|
228
|
+
is_symmetrical = True
|
|
229
|
+
has_symmetry_importance = True
|
|
227
230
|
|
|
228
231
|
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
229
232
|
return value
|
upgini/autofe/date.py
CHANGED
|
@@ -1,19 +1,15 @@
|
|
|
1
1
|
import abc
|
|
2
|
+
import json
|
|
2
3
|
from typing import Any, Dict, List, Optional, Union
|
|
3
4
|
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
6
7
|
from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
7
|
-
from pydantic import BaseModel,
|
|
8
|
+
from pydantic import BaseModel, validator
|
|
8
9
|
|
|
9
10
|
from upgini.autofe.operand import PandasOperand
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
def get_pydantic_version():
|
|
13
|
-
major_version = int(pydantic_version.split('.')[0])
|
|
14
|
-
return major_version
|
|
15
|
-
|
|
16
|
-
|
|
17
13
|
class DateDiffMixin(BaseModel):
|
|
18
14
|
diff_unit: str = "D"
|
|
19
15
|
left_unit: Optional[str] = None
|
|
@@ -43,10 +39,10 @@ class DateDiffMixin(BaseModel):
|
|
|
43
39
|
|
|
44
40
|
|
|
45
41
|
class DateDiff(PandasOperand, DateDiffMixin):
|
|
46
|
-
name
|
|
47
|
-
alias
|
|
48
|
-
is_binary
|
|
49
|
-
has_symmetry_importance
|
|
42
|
+
name = "date_diff"
|
|
43
|
+
alias = "date_diff_type1"
|
|
44
|
+
is_binary = True
|
|
45
|
+
has_symmetry_importance = True
|
|
50
46
|
|
|
51
47
|
replace_negative: bool = False
|
|
52
48
|
|
|
@@ -75,9 +71,9 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
75
71
|
|
|
76
72
|
|
|
77
73
|
class DateDiffType2(PandasOperand, DateDiffMixin):
|
|
78
|
-
name
|
|
79
|
-
is_binary
|
|
80
|
-
has_symmetry_importance
|
|
74
|
+
name = "date_diff_type2"
|
|
75
|
+
is_binary = True
|
|
76
|
+
has_symmetry_importance = True
|
|
81
77
|
|
|
82
78
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
83
79
|
res = super().get_params()
|
|
@@ -109,8 +105,8 @@ _count_aggregations = ["nunique", "count"]
|
|
|
109
105
|
|
|
110
106
|
|
|
111
107
|
class DateListDiff(PandasOperand, DateDiffMixin):
|
|
112
|
-
is_binary
|
|
113
|
-
has_symmetry_importance
|
|
108
|
+
is_binary = True
|
|
109
|
+
has_symmetry_importance = True
|
|
114
110
|
|
|
115
111
|
aggregation: str
|
|
116
112
|
replace_negative: bool = False
|
|
@@ -170,8 +166,8 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
170
166
|
|
|
171
167
|
|
|
172
168
|
class DateListDiffBounded(DateListDiff):
|
|
173
|
-
lower_bound: Optional[int]
|
|
174
|
-
upper_bound: Optional[int]
|
|
169
|
+
lower_bound: Optional[int]
|
|
170
|
+
upper_bound: Optional[int]
|
|
175
171
|
|
|
176
172
|
def __init__(self, **data: Any) -> None:
|
|
177
173
|
if "name" not in data:
|
|
@@ -196,8 +192,8 @@ class DateListDiffBounded(DateListDiff):
|
|
|
196
192
|
|
|
197
193
|
|
|
198
194
|
class DatePercentileBase(PandasOperand, abc.ABC):
|
|
199
|
-
is_binary
|
|
200
|
-
output_type
|
|
195
|
+
is_binary = True
|
|
196
|
+
output_type = "float"
|
|
201
197
|
|
|
202
198
|
date_unit: Optional[str] = None
|
|
203
199
|
|
|
@@ -231,12 +227,12 @@ class DatePercentileBase(PandasOperand, abc.ABC):
|
|
|
231
227
|
|
|
232
228
|
|
|
233
229
|
class DatePercentile(DatePercentileBase):
|
|
234
|
-
name
|
|
235
|
-
alias
|
|
230
|
+
name = "date_per"
|
|
231
|
+
alias = "date_per_method1"
|
|
236
232
|
|
|
237
|
-
zero_month: Optional[int]
|
|
238
|
-
zero_year: Optional[int]
|
|
239
|
-
zero_bounds: Optional[List[float]]
|
|
233
|
+
zero_month: Optional[int]
|
|
234
|
+
zero_year: Optional[int]
|
|
235
|
+
zero_bounds: Optional[List[float]]
|
|
240
236
|
step: int = 30
|
|
241
237
|
|
|
242
238
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
@@ -251,25 +247,12 @@ class DatePercentile(DatePercentileBase):
|
|
|
251
247
|
)
|
|
252
248
|
return res
|
|
253
249
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
from pydantic import field_validator
|
|
258
|
-
|
|
259
|
-
@field_validator('zero_bounds', mode='before')
|
|
260
|
-
def parse_zero_bounds(cls, value):
|
|
261
|
-
if isinstance(value, str):
|
|
262
|
-
return value[1:-1].split(", ")
|
|
263
|
-
return value
|
|
264
|
-
else:
|
|
265
|
-
# Use @validator for Pydantic 1.x
|
|
266
|
-
from pydantic import validator
|
|
267
|
-
|
|
268
|
-
@validator('zero_bounds', pre=True)
|
|
269
|
-
def parse_zero_bounds(cls, value):
|
|
270
|
-
if isinstance(value, str):
|
|
271
|
-
return value[1:-1].split(", ")
|
|
250
|
+
@validator("zero_bounds", pre=True)
|
|
251
|
+
def validate_bounds(cls, value):
|
|
252
|
+
if value is None or isinstance(value, list):
|
|
272
253
|
return value
|
|
254
|
+
elif isinstance(value, str):
|
|
255
|
+
return json.loads(value)
|
|
273
256
|
|
|
274
257
|
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
275
258
|
months = date_col.dt.month
|
|
@@ -282,7 +265,7 @@ class DatePercentile(DatePercentileBase):
|
|
|
282
265
|
|
|
283
266
|
|
|
284
267
|
class DatePercentileMethod2(DatePercentileBase):
|
|
285
|
-
name
|
|
268
|
+
name = "date_per_method2"
|
|
286
269
|
|
|
287
270
|
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
288
271
|
pass
|
upgini/autofe/groupby.py
CHANGED
|
@@ -7,9 +7,9 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
|
7
7
|
|
|
8
8
|
class GroupByThenAgg(PandasOperand, VectorizableMixin):
|
|
9
9
|
agg: Optional[str]
|
|
10
|
-
is_vectorizable
|
|
11
|
-
is_grouping
|
|
12
|
-
is_distribution_dependent
|
|
10
|
+
is_vectorizable = True
|
|
11
|
+
is_grouping = True
|
|
12
|
+
is_distribution_dependent = True
|
|
13
13
|
|
|
14
14
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
15
15
|
temp = left.groupby(right).agg(self.agg)
|
|
@@ -24,17 +24,17 @@ class GroupByThenAgg(PandasOperand, VectorizableMixin):
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class GroupByThenMedian(GroupByThenAgg):
|
|
27
|
-
name
|
|
28
|
-
pandas_agg
|
|
29
|
-
is_distribution_dependent
|
|
27
|
+
name = "GroupByThenMedian"
|
|
28
|
+
pandas_agg = "median"
|
|
29
|
+
is_distribution_dependent = True
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class GroupByThenRank(PandasOperand, VectorizableMixin):
|
|
33
|
-
name
|
|
34
|
-
is_vectorizable
|
|
35
|
-
is_grouping
|
|
36
|
-
output_type
|
|
37
|
-
is_distribution_dependent
|
|
33
|
+
name = "GroupByThenRank"
|
|
34
|
+
is_vectorizable = True
|
|
35
|
+
is_grouping = True
|
|
36
|
+
output_type = "float"
|
|
37
|
+
is_distribution_dependent = True
|
|
38
38
|
|
|
39
39
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
40
40
|
temp = pd.DataFrame(left[~right.isna()].groupby(right).rank(ascending=True, pct=True)).reset_index()
|
|
@@ -49,12 +49,12 @@ class GroupByThenRank(PandasOperand, VectorizableMixin):
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
class GroupByThenNUnique(PandasOperand, VectorizableMixin):
|
|
52
|
-
name
|
|
53
|
-
is_vectorizable
|
|
54
|
-
is_grouping
|
|
55
|
-
output_type
|
|
56
|
-
is_distribution_dependent
|
|
57
|
-
input_type
|
|
52
|
+
name = "GroupByThenNUnique"
|
|
53
|
+
is_vectorizable = True
|
|
54
|
+
is_grouping = True
|
|
55
|
+
output_type = "int"
|
|
56
|
+
is_distribution_dependent = True
|
|
57
|
+
input_type = "discrete"
|
|
58
58
|
|
|
59
59
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
60
60
|
nunique = left.groupby(right).nunique()
|
|
@@ -69,11 +69,11 @@ class GroupByThenNUnique(PandasOperand, VectorizableMixin):
|
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
class GroupByThenFreq(PandasOperand):
|
|
72
|
-
name
|
|
73
|
-
is_grouping
|
|
74
|
-
output_type
|
|
75
|
-
is_distribution_dependent
|
|
76
|
-
input_type
|
|
72
|
+
name = "GroupByThenFreq"
|
|
73
|
+
is_grouping = True
|
|
74
|
+
output_type = "float"
|
|
75
|
+
is_distribution_dependent = True
|
|
76
|
+
input_type = "discrete"
|
|
77
77
|
|
|
78
78
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
79
79
|
def _f(x):
|
upgini/autofe/operand.py
CHANGED
|
@@ -8,19 +8,19 @@ from pydantic import BaseModel
|
|
|
8
8
|
|
|
9
9
|
class Operand(BaseModel):
|
|
10
10
|
name: str
|
|
11
|
-
alias: Optional[str]
|
|
11
|
+
alias: Optional[str]
|
|
12
12
|
is_unary: bool = False
|
|
13
13
|
is_symmetrical: bool = False
|
|
14
14
|
has_symmetry_importance: bool = False
|
|
15
|
-
input_type: Optional[str]
|
|
16
|
-
output_type: Optional[str]
|
|
15
|
+
input_type: Optional[str]
|
|
16
|
+
output_type: Optional[str]
|
|
17
17
|
is_categorical: bool = False
|
|
18
18
|
is_vectorizable: bool = False
|
|
19
19
|
is_grouping: bool = False
|
|
20
20
|
is_binary: bool = False
|
|
21
21
|
is_vector: bool = False
|
|
22
22
|
is_distribution_dependent: bool = False
|
|
23
|
-
params: Optional[Dict[str, str]]
|
|
23
|
+
params: Optional[Dict[str, str]]
|
|
24
24
|
|
|
25
25
|
def set_params(self, params: Dict[str, str]):
|
|
26
26
|
self.params = params
|
upgini/autofe/unary.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
1
|
import numpy as np
|
|
3
2
|
import pandas as pd
|
|
4
3
|
from sklearn.preprocessing import Normalizer
|
|
@@ -7,10 +6,10 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class Abs(PandasOperand, VectorizableMixin):
|
|
10
|
-
name
|
|
11
|
-
is_unary
|
|
12
|
-
is_vectorizable
|
|
13
|
-
group_index
|
|
9
|
+
name = "abs"
|
|
10
|
+
is_unary = True
|
|
11
|
+
is_vectorizable = True
|
|
12
|
+
group_index = 0
|
|
14
13
|
|
|
15
14
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
16
15
|
return data.abs()
|
|
@@ -20,11 +19,11 @@ class Abs(PandasOperand, VectorizableMixin):
|
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class Log(PandasOperand, VectorizableMixin):
|
|
23
|
-
name
|
|
24
|
-
is_unary
|
|
25
|
-
is_vectorizable
|
|
26
|
-
output_type
|
|
27
|
-
group_index
|
|
22
|
+
name = "log"
|
|
23
|
+
is_unary = True
|
|
24
|
+
is_vectorizable = True
|
|
25
|
+
output_type = "float"
|
|
26
|
+
group_index = 0
|
|
28
27
|
|
|
29
28
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
30
29
|
return self._round_value(np.log(np.abs(data.replace(0, np.nan))), 10)
|
|
@@ -34,11 +33,11 @@ class Log(PandasOperand, VectorizableMixin):
|
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
class Sqrt(PandasOperand, VectorizableMixin):
|
|
37
|
-
name
|
|
38
|
-
is_unary
|
|
39
|
-
is_vectorizable
|
|
40
|
-
output_type
|
|
41
|
-
group_index
|
|
36
|
+
name = "sqrt"
|
|
37
|
+
is_unary = True
|
|
38
|
+
is_vectorizable = True
|
|
39
|
+
output_type = "float"
|
|
40
|
+
group_index = 0
|
|
42
41
|
|
|
43
42
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
44
43
|
return self._round_value(np.sqrt(np.abs(data)))
|
|
@@ -48,10 +47,10 @@ class Sqrt(PandasOperand, VectorizableMixin):
|
|
|
48
47
|
|
|
49
48
|
|
|
50
49
|
class Square(PandasOperand, VectorizableMixin):
|
|
51
|
-
name
|
|
52
|
-
is_unary
|
|
53
|
-
is_vectorizable
|
|
54
|
-
group_index
|
|
50
|
+
name = "square"
|
|
51
|
+
is_unary = True
|
|
52
|
+
is_vectorizable = True
|
|
53
|
+
group_index = 0
|
|
55
54
|
|
|
56
55
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
57
56
|
return np.square(data)
|
|
@@ -61,11 +60,11 @@ class Square(PandasOperand, VectorizableMixin):
|
|
|
61
60
|
|
|
62
61
|
|
|
63
62
|
class Sigmoid(PandasOperand, VectorizableMixin):
|
|
64
|
-
name
|
|
65
|
-
is_unary
|
|
66
|
-
is_vectorizable
|
|
67
|
-
output_type
|
|
68
|
-
group_index
|
|
63
|
+
name = "sigmoid"
|
|
64
|
+
is_unary = True
|
|
65
|
+
is_vectorizable = True
|
|
66
|
+
output_type = "float"
|
|
67
|
+
group_index = 0
|
|
69
68
|
|
|
70
69
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
71
70
|
return self._round_value(1 / (1 + np.exp(-data)))
|
|
@@ -75,12 +74,12 @@ class Sigmoid(PandasOperand, VectorizableMixin):
|
|
|
75
74
|
|
|
76
75
|
|
|
77
76
|
class Floor(PandasOperand, VectorizableMixin):
|
|
78
|
-
name
|
|
79
|
-
is_unary
|
|
80
|
-
is_vectorizable
|
|
81
|
-
output_type
|
|
82
|
-
input_type
|
|
83
|
-
group_index
|
|
77
|
+
name = "floor"
|
|
78
|
+
is_unary = True
|
|
79
|
+
is_vectorizable = True
|
|
80
|
+
output_type = "int"
|
|
81
|
+
input_type = "continuous"
|
|
82
|
+
group_index = 0
|
|
84
83
|
|
|
85
84
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
86
85
|
return np.floor(data)
|
|
@@ -90,11 +89,11 @@ class Floor(PandasOperand, VectorizableMixin):
|
|
|
90
89
|
|
|
91
90
|
|
|
92
91
|
class Residual(PandasOperand, VectorizableMixin):
|
|
93
|
-
name
|
|
94
|
-
is_unary
|
|
95
|
-
is_vectorizable
|
|
96
|
-
input_type
|
|
97
|
-
group_index
|
|
92
|
+
name = "residual"
|
|
93
|
+
is_unary = True
|
|
94
|
+
is_vectorizable = True
|
|
95
|
+
input_type = "continuous"
|
|
96
|
+
group_index = 0
|
|
98
97
|
|
|
99
98
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
100
99
|
return data - np.floor(data)
|
|
@@ -104,11 +103,11 @@ class Residual(PandasOperand, VectorizableMixin):
|
|
|
104
103
|
|
|
105
104
|
|
|
106
105
|
class Freq(PandasOperand):
|
|
107
|
-
name
|
|
108
|
-
is_unary
|
|
109
|
-
output_type
|
|
110
|
-
is_distribution_dependent
|
|
111
|
-
input_type
|
|
106
|
+
name = "freq"
|
|
107
|
+
is_unary = True
|
|
108
|
+
output_type = "float"
|
|
109
|
+
is_distribution_dependent = True
|
|
110
|
+
input_type = "discrete"
|
|
112
111
|
|
|
113
112
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
114
113
|
value_counts = data.value_counts(normalize=True)
|
|
@@ -116,9 +115,9 @@ class Freq(PandasOperand):
|
|
|
116
115
|
|
|
117
116
|
|
|
118
117
|
class Norm(PandasOperand):
|
|
119
|
-
name
|
|
120
|
-
is_unary
|
|
121
|
-
output_type
|
|
118
|
+
name = "norm"
|
|
119
|
+
is_unary = True
|
|
120
|
+
output_type = "float"
|
|
122
121
|
|
|
123
122
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
124
123
|
data_dropna = data.dropna()
|
|
@@ -132,7 +131,7 @@ class Norm(PandasOperand):
|
|
|
132
131
|
|
|
133
132
|
|
|
134
133
|
class Embeddings(PandasOperand):
|
|
135
|
-
name
|
|
136
|
-
is_unary
|
|
137
|
-
input_type
|
|
138
|
-
output_type
|
|
134
|
+
name = "emb"
|
|
135
|
+
is_unary = True
|
|
136
|
+
input_type = "string"
|
|
137
|
+
output_type = "vector"
|
upgini/autofe/vector.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
@@ -6,19 +6,19 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class Mean(PandasOperand, VectorizableMixin):
|
|
9
|
-
name
|
|
10
|
-
output_type
|
|
11
|
-
is_vector
|
|
12
|
-
group_index
|
|
9
|
+
name = "mean"
|
|
10
|
+
output_type = "float"
|
|
11
|
+
is_vector = True
|
|
12
|
+
group_index = 0
|
|
13
13
|
|
|
14
14
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
15
15
|
return pd.DataFrame(data).T.fillna(0).mean(axis=1)
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Sum(PandasOperand, VectorizableMixin):
|
|
19
|
-
name
|
|
20
|
-
is_vector
|
|
21
|
-
group_index
|
|
19
|
+
name = "sum"
|
|
20
|
+
is_vector = True
|
|
21
|
+
group_index = 0
|
|
22
22
|
|
|
23
23
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
24
24
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
upgini/dataset.py
CHANGED
|
@@ -18,7 +18,6 @@ from pandas.api.types import (
|
|
|
18
18
|
from upgini.errors import ValidationError
|
|
19
19
|
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
20
20
|
from upgini.metadata import (
|
|
21
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
22
21
|
EVAL_SET_INDEX,
|
|
23
22
|
SYSTEM_RECORD_ID,
|
|
24
23
|
TARGET,
|
|
@@ -158,11 +157,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
158
157
|
raise ValidationError(self.bundle.get("dataset_too_few_rows").format(self.MIN_ROWS_COUNT))
|
|
159
158
|
|
|
160
159
|
def __validate_max_row_count(self):
|
|
161
|
-
if
|
|
162
|
-
rows_count = self.data[ENTITY_SYSTEM_RECORD_ID].nunique()
|
|
163
|
-
else:
|
|
164
|
-
rows_count = len(self.data)
|
|
165
|
-
if rows_count > self.MAX_ROWS:
|
|
160
|
+
if len(self.data) > self.MAX_ROWS:
|
|
166
161
|
raise ValidationError(self.bundle.get("dataset_too_many_rows_registered").format(self.MAX_ROWS))
|
|
167
162
|
|
|
168
163
|
def __target_value(self) -> pd.Series:
|
|
@@ -204,14 +199,14 @@ class Dataset: # (pd.DataFrame):
|
|
|
204
199
|
elif self.task_type == ModelTaskType.REGRESSION:
|
|
205
200
|
if not is_float_dtype(target):
|
|
206
201
|
try:
|
|
207
|
-
self.data[target_column] = self.data[target_column].astype("
|
|
202
|
+
self.data[target_column] = self.data[target_column].astype("float")
|
|
208
203
|
except ValueError:
|
|
209
204
|
self.logger.exception("Failed to cast target to float for regression task type")
|
|
210
205
|
raise ValidationError(self.bundle.get("dataset_invalid_regression_target").format(target.dtype))
|
|
211
206
|
elif self.task_type == ModelTaskType.TIMESERIES:
|
|
212
207
|
if not is_float_dtype(target):
|
|
213
208
|
try:
|
|
214
|
-
self.data[target_column] = self.data[target_column].astype("
|
|
209
|
+
self.data[target_column] = self.data[target_column].astype("float")
|
|
215
210
|
except ValueError:
|
|
216
211
|
self.logger.exception("Failed to cast target to float for timeseries task type")
|
|
217
212
|
raise ValidationError(self.bundle.get("dataset_invalid_timeseries_target").format(target.dtype))
|
upgini/features_enricher.py
CHANGED
|
@@ -23,6 +23,7 @@ from pandas.api.types import (
|
|
|
23
23
|
is_datetime64_any_dtype,
|
|
24
24
|
is_numeric_dtype,
|
|
25
25
|
is_object_dtype,
|
|
26
|
+
is_period_dtype,
|
|
26
27
|
is_string_dtype,
|
|
27
28
|
)
|
|
28
29
|
from scipy.stats import ks_2samp
|
|
@@ -1407,9 +1408,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1407
1408
|
# TODO maybe there is no more need for these convertions
|
|
1408
1409
|
# Remove datetime features
|
|
1409
1410
|
datetime_features = [
|
|
1410
|
-
f
|
|
1411
|
-
for f in fitting_X.columns
|
|
1412
|
-
if is_datetime64_any_dtype(fitting_X[f]) or isinstance(fitting_X[f].dtype, pd.PeriodDtype)
|
|
1411
|
+
f for f in fitting_X.columns if is_datetime64_any_dtype(fitting_X[f]) or is_period_dtype(fitting_X[f])
|
|
1413
1412
|
]
|
|
1414
1413
|
if len(datetime_features) > 0:
|
|
1415
1414
|
self.logger.warning(self.bundle.get("dataset_date_features").format(datetime_features))
|
|
@@ -2042,7 +2041,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2042
2041
|
|
|
2043
2042
|
df[ENTITY_SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(
|
|
2044
2043
|
df[columns_for_system_record_id], index=False
|
|
2045
|
-
).astype("
|
|
2044
|
+
).astype("Float64")
|
|
2046
2045
|
|
|
2047
2046
|
# Explode multiple search keys
|
|
2048
2047
|
df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys, columns_renaming)
|
|
@@ -2108,7 +2107,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2108
2107
|
# search keys might be changed after explode
|
|
2109
2108
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + features_for_transform)
|
|
2110
2109
|
df[SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(df[columns_for_system_record_id], index=False).astype(
|
|
2111
|
-
"
|
|
2110
|
+
"Float64"
|
|
2112
2111
|
)
|
|
2113
2112
|
meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
2114
2113
|
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
upgini/http.py
CHANGED
|
@@ -39,6 +39,18 @@ from upgini.metadata import (
|
|
|
39
39
|
from upgini.resource_bundle import bundle
|
|
40
40
|
from upgini.utils.track_info import get_track_metrics
|
|
41
41
|
|
|
42
|
+
# try:
|
|
43
|
+
# from importlib.metadata import version # type: ignore
|
|
44
|
+
|
|
45
|
+
# __version__ = version("upgini")
|
|
46
|
+
# except ImportError:
|
|
47
|
+
# try:
|
|
48
|
+
# from importlib_metadata import version # type: ignore
|
|
49
|
+
|
|
50
|
+
# __version__ = version("upgini")
|
|
51
|
+
# except ImportError:
|
|
52
|
+
# __version__ = "Upgini wasn't installed"
|
|
53
|
+
|
|
42
54
|
UPGINI_URL: str = "UPGINI_URL"
|
|
43
55
|
UPGINI_API_KEY: str = "UPGINI_API_KEY"
|
|
44
56
|
DEMO_API_KEY: str = "Aa4BPwGFbn1zNEXIkZ-NbhsRk0ricN6puKuga1-O5lM"
|
|
@@ -459,11 +471,7 @@ class _RestClient:
|
|
|
459
471
|
dumps(track_metrics).encode(),
|
|
460
472
|
"application/json",
|
|
461
473
|
),
|
|
462
|
-
"metrics": (
|
|
463
|
-
"metrics.json",
|
|
464
|
-
metrics.json(exclude_none=True).encode(),
|
|
465
|
-
"application/json",
|
|
466
|
-
),
|
|
474
|
+
"metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
|
|
467
475
|
"file": (metadata_with_md5.name, file, "application/octet-stream"),
|
|
468
476
|
}
|
|
469
477
|
if search_customization is not None:
|
|
@@ -547,11 +555,7 @@ class _RestClient:
|
|
|
547
555
|
dumps(get_track_metrics(self.client_ip, self.client_visitorid)).encode(),
|
|
548
556
|
"application/json",
|
|
549
557
|
),
|
|
550
|
-
"metrics": (
|
|
551
|
-
"metrics.json",
|
|
552
|
-
metrics.json(exclude_none=True).encode(),
|
|
553
|
-
"application/json",
|
|
554
|
-
),
|
|
558
|
+
"metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
|
|
555
559
|
"file": (metadata_with_md5.name, file, "application/octet-stream"),
|
|
556
560
|
}
|
|
557
561
|
if search_customization is not None:
|
|
@@ -647,11 +651,7 @@ class _RestClient:
|
|
|
647
651
|
with open(file_path, "rb") as file:
|
|
648
652
|
files = {
|
|
649
653
|
"file": (metadata.name, file, "application/octet-stream"),
|
|
650
|
-
"metadata": (
|
|
651
|
-
"metadata.json",
|
|
652
|
-
metadata.json(exclude_none=True).encode(),
|
|
653
|
-
"application/json",
|
|
654
|
-
),
|
|
654
|
+
"metadata": ("metadata.json", metadata.json(exclude_none=True).encode(), "application/json"),
|
|
655
655
|
}
|
|
656
656
|
|
|
657
657
|
return self._send_post_file_req_v2(api_path, files)
|
upgini/lazy_import.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import importlib
|
|
2
|
-
import importlib.util
|
|
3
|
-
import importlib.machinery
|
|
4
2
|
|
|
5
3
|
|
|
6
4
|
class LazyImport:
|
|
@@ -12,18 +10,7 @@ class LazyImport:
|
|
|
12
10
|
|
|
13
11
|
def _load(self):
|
|
14
12
|
if self._module is None:
|
|
15
|
-
|
|
16
|
-
spec = importlib.util.find_spec(self.module_name)
|
|
17
|
-
if spec is None:
|
|
18
|
-
raise ImportError(f"Module {self.module_name} not found")
|
|
19
|
-
|
|
20
|
-
# Create module
|
|
21
|
-
self._module = importlib.util.module_from_spec(spec)
|
|
22
|
-
|
|
23
|
-
# Execute module
|
|
24
|
-
spec.loader.exec_module(self._module)
|
|
25
|
-
|
|
26
|
-
# Get class from module
|
|
13
|
+
self._module = importlib.import_module(self.module_name)
|
|
27
14
|
self._class = getattr(self._module, self.class_name)
|
|
28
15
|
|
|
29
16
|
def __call__(self, *args, **kwargs):
|
upgini/metadata.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Dict, List, Optional, Set, Union
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
@@ -172,23 +172,23 @@ class FileMetricsInterval(BaseModel):
|
|
|
172
172
|
date_cut: float
|
|
173
173
|
count: float
|
|
174
174
|
valid_count: float
|
|
175
|
-
avg_target: Optional[float]
|
|
176
|
-
avg_score_etalon: Optional[float]
|
|
175
|
+
avg_target: Optional[float] # not for multiclass
|
|
176
|
+
avg_score_etalon: Optional[float]
|
|
177
177
|
|
|
178
178
|
|
|
179
179
|
class FileMetrics(BaseModel):
|
|
180
180
|
# etalon metadata
|
|
181
|
-
task_type: Optional[ModelTaskType]
|
|
182
|
-
label: Optional[ModelLabelType]
|
|
183
|
-
count: Optional[int]
|
|
184
|
-
valid_count: Optional[int]
|
|
185
|
-
valid_rate: Optional[float]
|
|
186
|
-
avg_target: Optional[float]
|
|
187
|
-
metrics_binary_etalon: Optional[BinaryTask]
|
|
188
|
-
metrics_regression_etalon: Optional[RegressionTask]
|
|
189
|
-
metrics_multiclass_etalon: Optional[MulticlassTask]
|
|
190
|
-
cuts: Optional[List[float]]
|
|
191
|
-
interval: Optional[List[FileMetricsInterval]]
|
|
181
|
+
task_type: Optional[ModelTaskType]
|
|
182
|
+
label: Optional[ModelLabelType]
|
|
183
|
+
count: Optional[int]
|
|
184
|
+
valid_count: Optional[int]
|
|
185
|
+
valid_rate: Optional[float]
|
|
186
|
+
avg_target: Optional[float]
|
|
187
|
+
metrics_binary_etalon: Optional[BinaryTask]
|
|
188
|
+
metrics_regression_etalon: Optional[RegressionTask]
|
|
189
|
+
metrics_multiclass_etalon: Optional[MulticlassTask]
|
|
190
|
+
cuts: Optional[List[float]]
|
|
191
|
+
interval: Optional[List[FileMetricsInterval]]
|
|
192
192
|
|
|
193
193
|
|
|
194
194
|
class NumericInterval(BaseModel):
|
|
@@ -202,25 +202,25 @@ class FileColumnMetadata(BaseModel):
|
|
|
202
202
|
dataType: DataType
|
|
203
203
|
meaningType: FileColumnMeaningType
|
|
204
204
|
minMaxValues: Optional[NumericInterval] = None
|
|
205
|
-
originalName: Optional[str]
|
|
205
|
+
originalName: Optional[str]
|
|
206
206
|
# is this column contains keys from multiple key columns like msisdn1, msisdn2
|
|
207
207
|
isUnnest: bool = False
|
|
208
208
|
# list of original etalon key column names like msisdn1, msisdn2
|
|
209
|
-
unnestKeyNames: Optional[List[str]]
|
|
209
|
+
unnestKeyNames: Optional[List[str]]
|
|
210
210
|
|
|
211
211
|
|
|
212
212
|
class FileMetadata(BaseModel):
|
|
213
213
|
name: str
|
|
214
|
-
description: Optional[str]
|
|
214
|
+
description: Optional[str]
|
|
215
215
|
columns: List[FileColumnMetadata]
|
|
216
216
|
searchKeys: List[List[str]]
|
|
217
|
-
excludeFeaturesSources: Optional[List[str]]
|
|
218
|
-
hierarchicalGroupKeys: Optional[List[str]]
|
|
219
|
-
hierarchicalSubgroupKeys: Optional[List[str]]
|
|
220
|
-
taskType: Optional[ModelTaskType]
|
|
221
|
-
rowsCount: Optional[int]
|
|
222
|
-
checksumMD5: Optional[str]
|
|
223
|
-
digest: Optional[str]
|
|
217
|
+
excludeFeaturesSources: Optional[List[str]]
|
|
218
|
+
hierarchicalGroupKeys: Optional[List[str]]
|
|
219
|
+
hierarchicalSubgroupKeys: Optional[List[str]]
|
|
220
|
+
taskType: Optional[ModelTaskType]
|
|
221
|
+
rowsCount: Optional[int]
|
|
222
|
+
checksumMD5: Optional[str]
|
|
223
|
+
digest: Optional[str]
|
|
224
224
|
|
|
225
225
|
def column_by_name(self, name: str) -> Optional[FileColumnMetadata]:
|
|
226
226
|
for c in self.columns:
|
|
@@ -244,17 +244,17 @@ class FeaturesMetadataV2(BaseModel):
|
|
|
244
244
|
source: str
|
|
245
245
|
hit_rate: float
|
|
246
246
|
shap_value: float
|
|
247
|
-
commercial_schema: Optional[str]
|
|
248
|
-
data_provider: Optional[str]
|
|
249
|
-
data_providers: Optional[List[str]]
|
|
250
|
-
data_provider_link: Optional[str]
|
|
251
|
-
data_provider_links: Optional[List[str]]
|
|
252
|
-
data_source: Optional[str]
|
|
253
|
-
data_sources: Optional[List[str]]
|
|
254
|
-
data_source_link: Optional[str]
|
|
255
|
-
data_source_links: Optional[List[str]]
|
|
256
|
-
doc_link: Optional[str]
|
|
257
|
-
update_frequency: Optional[str]
|
|
247
|
+
commercial_schema: Optional[str]
|
|
248
|
+
data_provider: Optional[str]
|
|
249
|
+
data_providers: Optional[List[str]]
|
|
250
|
+
data_provider_link: Optional[str]
|
|
251
|
+
data_provider_links: Optional[List[str]]
|
|
252
|
+
data_source: Optional[str]
|
|
253
|
+
data_sources: Optional[List[str]]
|
|
254
|
+
data_source_link: Optional[str]
|
|
255
|
+
data_source_links: Optional[List[str]]
|
|
256
|
+
doc_link: Optional[str]
|
|
257
|
+
update_frequency: Optional[str]
|
|
258
258
|
|
|
259
259
|
|
|
260
260
|
class HitRateMetrics(BaseModel):
|
|
@@ -274,48 +274,48 @@ class ModelEvalSet(BaseModel):
|
|
|
274
274
|
class BaseColumnMetadata(BaseModel):
|
|
275
275
|
original_name: str
|
|
276
276
|
hashed_name: str
|
|
277
|
-
ads_definition_id: Optional[str]
|
|
277
|
+
ads_definition_id: Optional[str]
|
|
278
278
|
is_augmented: bool
|
|
279
279
|
|
|
280
280
|
|
|
281
281
|
class GeneratedFeatureMetadata(BaseModel):
|
|
282
|
-
alias: Optional[str]
|
|
282
|
+
alias: Optional[str]
|
|
283
283
|
formula: str
|
|
284
284
|
display_index: str
|
|
285
285
|
base_columns: List[BaseColumnMetadata]
|
|
286
|
-
operator_params: Optional[Dict[str, str]]
|
|
286
|
+
operator_params: Optional[Dict[str, str]]
|
|
287
287
|
|
|
288
288
|
|
|
289
289
|
class ProviderTaskMetadataV2(BaseModel):
|
|
290
290
|
features: List[FeaturesMetadataV2]
|
|
291
|
-
hit_rate_metrics: Optional[HitRateMetrics]
|
|
292
|
-
eval_set_metrics: Optional[List[ModelEvalSet]]
|
|
293
|
-
zero_hit_rate_search_keys: Optional[List[str]]
|
|
294
|
-
features_used_for_embeddings: Optional[List[str]]
|
|
295
|
-
shuffle_kfold: Optional[bool]
|
|
296
|
-
generated_features: Optional[List[GeneratedFeatureMetadata]]
|
|
291
|
+
hit_rate_metrics: Optional[HitRateMetrics]
|
|
292
|
+
eval_set_metrics: Optional[List[ModelEvalSet]]
|
|
293
|
+
zero_hit_rate_search_keys: Optional[List[str]]
|
|
294
|
+
features_used_for_embeddings: Optional[List[str]]
|
|
295
|
+
shuffle_kfold: Optional[bool]
|
|
296
|
+
generated_features: Optional[List[GeneratedFeatureMetadata]]
|
|
297
297
|
|
|
298
298
|
|
|
299
299
|
class FeaturesFilter(BaseModel):
|
|
300
|
-
minImportance: Optional[float]
|
|
301
|
-
maxPSI: Optional[float]
|
|
302
|
-
maxCount: Optional[int]
|
|
303
|
-
selectedFeatures: Optional[List[str]]
|
|
300
|
+
minImportance: Optional[float]
|
|
301
|
+
maxPSI: Optional[float]
|
|
302
|
+
maxCount: Optional[int]
|
|
303
|
+
selectedFeatures: Optional[List[str]]
|
|
304
304
|
|
|
305
305
|
|
|
306
306
|
class RuntimeParameters(BaseModel):
|
|
307
|
-
properties: Dict[str,
|
|
307
|
+
properties: Dict[str, str] = {}
|
|
308
308
|
|
|
309
309
|
|
|
310
310
|
class SearchCustomization(BaseModel):
|
|
311
|
-
featuresFilter: Optional[FeaturesFilter]
|
|
312
|
-
extractFeatures: Optional[bool]
|
|
313
|
-
accurateModel: Optional[bool]
|
|
314
|
-
importanceThreshold: Optional[float]
|
|
315
|
-
maxFeatures: Optional[int]
|
|
316
|
-
returnScores: Optional[bool]
|
|
317
|
-
runtimeParameters: Optional[RuntimeParameters]
|
|
318
|
-
metricsCalculation: Optional[bool]
|
|
311
|
+
featuresFilter: Optional[FeaturesFilter]
|
|
312
|
+
extractFeatures: Optional[bool]
|
|
313
|
+
accurateModel: Optional[bool]
|
|
314
|
+
importanceThreshold: Optional[float]
|
|
315
|
+
maxFeatures: Optional[int]
|
|
316
|
+
returnScores: Optional[bool]
|
|
317
|
+
runtimeParameters: Optional[RuntimeParameters]
|
|
318
|
+
metricsCalculation: Optional[bool]
|
|
319
319
|
|
|
320
320
|
def __repr__(self):
|
|
321
321
|
return (
|
|
@@ -10,6 +10,7 @@ from pandas.api.types import (
|
|
|
10
10
|
is_float_dtype,
|
|
11
11
|
is_numeric_dtype,
|
|
12
12
|
is_object_dtype,
|
|
13
|
+
is_period_dtype,
|
|
13
14
|
is_string_dtype,
|
|
14
15
|
)
|
|
15
16
|
|
|
@@ -134,7 +135,7 @@ class Normalizer:
|
|
|
134
135
|
|
|
135
136
|
removed_features = []
|
|
136
137
|
for f in features:
|
|
137
|
-
if is_datetime(df[f]) or
|
|
138
|
+
if is_datetime(df[f]) or is_period_dtype(df[f]):
|
|
138
139
|
removed_features.append(f)
|
|
139
140
|
df.drop(columns=f, inplace=True)
|
|
140
141
|
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from dateutil.relativedelta import relativedelta
|
|
9
|
-
from pandas.api.types import is_numeric_dtype
|
|
9
|
+
from pandas.api.types import is_numeric_dtype, is_period_dtype
|
|
10
10
|
|
|
11
11
|
from upgini.errors import ValidationError
|
|
12
12
|
from upgini.metadata import EVAL_SET_INDEX, SearchKey
|
|
@@ -84,7 +84,7 @@ class DateTimeSearchKeyConverter:
|
|
|
84
84
|
df[self.date_column] = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
|
|
85
85
|
elif isinstance(df[self.date_column].values[0], datetime.date):
|
|
86
86
|
df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
|
|
87
|
-
elif
|
|
87
|
+
elif is_period_dtype(df[self.date_column]):
|
|
88
88
|
df[self.date_column] = df[self.date_column].dt.to_timestamp()
|
|
89
89
|
elif is_numeric_dtype(df[self.date_column]):
|
|
90
90
|
# 315532801 - 2524608001 - seconds
|
|
@@ -207,7 +207,7 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
|
|
|
207
207
|
def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
|
|
208
208
|
df = df.copy()
|
|
209
209
|
seconds = "datetime_seconds"
|
|
210
|
-
if
|
|
210
|
+
if is_period_dtype(df[date_col]):
|
|
211
211
|
df[date_col] = df[date_col].dt.to_timestamp()
|
|
212
212
|
else:
|
|
213
213
|
df[date_col] = pd.to_datetime(df[date_col])
|
|
@@ -275,7 +275,7 @@ def validate_dates_distribution(
|
|
|
275
275
|
if col in search_keys:
|
|
276
276
|
continue
|
|
277
277
|
try:
|
|
278
|
-
if
|
|
278
|
+
if is_period_dtype(X[col]):
|
|
279
279
|
pass
|
|
280
280
|
elif pd.__version__ >= "2.0.0":
|
|
281
281
|
# Format mixed to avoid massive warnings
|
|
@@ -290,7 +290,7 @@ def validate_dates_distribution(
|
|
|
290
290
|
if maybe_date_col is None:
|
|
291
291
|
return
|
|
292
292
|
|
|
293
|
-
if
|
|
293
|
+
if is_period_dtype(X[maybe_date_col]):
|
|
294
294
|
dates = X[maybe_date_col].dt.to_timestamp().dt.date
|
|
295
295
|
elif pd.__version__ >= "2.0.0":
|
|
296
296
|
dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
|
upgini/utils/phone_utils.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
import numpy as np
|
|
4
3
|
import pandas as pd
|
|
5
|
-
from pandas.api.types import
|
|
4
|
+
from pandas.api.types import (
|
|
5
|
+
is_float_dtype,
|
|
6
|
+
is_int64_dtype,
|
|
7
|
+
is_object_dtype,
|
|
8
|
+
is_string_dtype,
|
|
9
|
+
)
|
|
6
10
|
|
|
7
11
|
from upgini.errors import ValidationError
|
|
8
12
|
from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
|
|
@@ -59,9 +63,7 @@ class PhoneSearchKeyConverter:
|
|
|
59
63
|
convert_func = self.phone_str_to_int_safe
|
|
60
64
|
elif is_float_dtype(df[self.phone_column]):
|
|
61
65
|
convert_func = self.phone_float_to_int_safe
|
|
62
|
-
elif df[self.phone_column]
|
|
63
|
-
df[self.phone_column].dtype, pd.Int64Dtype
|
|
64
|
-
):
|
|
66
|
+
elif is_int64_dtype(df[self.phone_column]):
|
|
65
67
|
convert_func = self.phone_int_to_int_safe
|
|
66
68
|
else:
|
|
67
69
|
raise ValidationError(
|
|
@@ -25,7 +25,7 @@ class PostalCodeSearchKeyConverter:
|
|
|
25
25
|
if is_string_dtype(df[self.postal_code_column]) or is_object_dtype(df[self.postal_code_column]):
|
|
26
26
|
try:
|
|
27
27
|
df[self.postal_code_column] = (
|
|
28
|
-
df[self.postal_code_column].astype("string").astype("
|
|
28
|
+
df[self.postal_code_column].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
29
29
|
)
|
|
30
30
|
except Exception:
|
|
31
31
|
pass
|
upgini/utils/target_utils.py
CHANGED
|
@@ -194,7 +194,4 @@ def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
|
|
|
194
194
|
test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
|
|
195
195
|
|
|
196
196
|
# Calculate the PSI
|
|
197
|
-
|
|
198
|
-
return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
|
|
199
|
-
except Exception:
|
|
200
|
-
return np.nan
|
|
197
|
+
return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.317
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -29,9 +29,9 @@ Requires-Dist: ipywidgets>=8.1.0
|
|
|
29
29
|
Requires-Dist: jarowinkler>=2.0.0
|
|
30
30
|
Requires-Dist: levenshtein>=0.25.1
|
|
31
31
|
Requires-Dist: lightgbm>=3.3.2
|
|
32
|
-
Requires-Dist: numpy
|
|
32
|
+
Requires-Dist: numpy>=1.19.0
|
|
33
33
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
34
|
-
Requires-Dist: pydantic<
|
|
34
|
+
Requires-Dist: pydantic<2.0.0,>=1.8.2
|
|
35
35
|
Requires-Dist: pyjwt>=2.8.0
|
|
36
36
|
Requires-Dist: python-bidi==0.4.2
|
|
37
37
|
Requires-Dist: python-dateutil>=2.8.0
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=7A4Mpkf8cSUSzwIJzMaQ6hlkjN2sldlyOHl5dtLNJkE,24
|
|
2
2
|
upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=yAWIygHejxdKXOA4g3QjtCu0VRa9at-4nPPuugCr77U,30857
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
7
|
-
upgini/http.py,sha256=
|
|
8
|
-
upgini/lazy_import.py,sha256=
|
|
9
|
-
upgini/metadata.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=Gu4gsnMVjcsfWnJlu4Np3jpE9Au1UywhuHQb0Xv5YNg,187982
|
|
7
|
+
upgini/http.py,sha256=a4Epc9YLIJBuYk4t8E_2-QDLBtJFqKO35jn2SnYQZCg,42920
|
|
8
|
+
upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
|
|
9
|
+
upgini/metadata.py,sha256=YQ-1HZGyPOksP2iM50ff_pMHXLyzvpChqSfNh8Z0ke4,10833
|
|
10
10
|
upgini/metrics.py,sha256=Tu5cN8RlhOSSMWUTXRSkdl8SWBqR1N_2eJpBum9pZxc,30926
|
|
11
11
|
upgini/search_task.py,sha256=LtRJ9bCPjMo1gJ-sUDKERhDwGcWKImrzwVFHjkMSQHQ,17071
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
@@ -15,19 +15,19 @@ upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9Jvf
|
|
|
15
15
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
upgini/autofe/all_operands.py,sha256=3LiH9iU-ArGmYpS8FHWH7yCFx40ILfvlSXJlKIa75BQ,2542
|
|
18
|
-
upgini/autofe/binary.py,sha256=
|
|
19
|
-
upgini/autofe/date.py,sha256=
|
|
18
|
+
upgini/autofe/binary.py,sha256=2Z5FrfdCtesKEHBuabEBiRvwOAzcRoFKAX1wvGpHL0I,7003
|
|
19
|
+
upgini/autofe/date.py,sha256=ijB9RCh5wBwl03Nl8zDYA50gpL4sqmAkYVYzVPm1bn0,9070
|
|
20
20
|
upgini/autofe/feature.py,sha256=gwGWY2UcX_0wHAvfEiu1rRU7GFZyzMWZIaPVcf6kD80,14223
|
|
21
|
-
upgini/autofe/groupby.py,sha256=
|
|
22
|
-
upgini/autofe/operand.py,sha256=
|
|
23
|
-
upgini/autofe/unary.py,sha256=
|
|
24
|
-
upgini/autofe/vector.py,sha256=
|
|
21
|
+
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
22
|
+
upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
|
|
23
|
+
upgini/autofe/unary.py,sha256=oIMf-IVy7L7GkzxMmQyExX0tOH9RhWeQh7cGxxMDiPk,3832
|
|
24
|
+
upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
upgini/data_source/data_source_publisher.py,sha256=Vg0biG86YB0OEaoxbK9YYrr4yARm11_h3bTWIBgoScA,22115
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
28
28
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
29
29
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
|
-
upgini/normalizer/normalize_utils.py,sha256=
|
|
30
|
+
upgini/normalizer/normalize_utils.py,sha256=8gH1oabPNZrC1kHSRFxGGcO0o6yNDlOJXCLzzExq-3s,7451
|
|
31
31
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
32
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
33
|
upgini/resource_bundle/strings.properties,sha256=WZAuYPX2Dpn6BHoA3RX8uvMNMr-yJE2fF7Gz0i24x2s,26459
|
|
@@ -42,7 +42,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
|
|
|
42
42
|
upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
|
|
43
43
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
44
44
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
45
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
45
|
+
upgini/utils/datetime_utils.py,sha256=niZcf2YqAwokUFUW474zajlzv9HAMf7nv9v_WPJHpyc,12123
|
|
46
46
|
upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
|
|
47
47
|
upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
|
|
48
48
|
upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
|
|
@@ -50,14 +50,14 @@ upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0-
|
|
|
50
50
|
upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
|
|
51
51
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
52
52
|
upgini/utils/ip_utils.py,sha256=ZZj_uQFTHhagzt-MRew__ZBOp2DdnkMrachS7PElkSE,5143
|
|
53
|
-
upgini/utils/phone_utils.py,sha256=
|
|
54
|
-
upgini/utils/postal_code_utils.py,sha256=
|
|
53
|
+
upgini/utils/phone_utils.py,sha256=PTSRfGAWCuLy8R6I8X6clcc1K7bZXIIrZ_alIB8irC8,10368
|
|
54
|
+
upgini/utils/postal_code_utils.py,sha256=C899tJS8qM_ps4I3g-Ve6qzIa22O_UqwNmGFoyy9sO8,1716
|
|
55
55
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
56
56
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
57
|
-
upgini/utils/target_utils.py,sha256=
|
|
57
|
+
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
63
|
-
upgini-1.1.
|
|
60
|
+
upgini-1.1.317.dist-info/METADATA,sha256=MAx5zlya3JBerLBEmC9me552zgexw4gy4Cfc2VuNzSg,48222
|
|
61
|
+
upgini-1.1.317.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.1.317.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.1.317.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|