upgini 1.1.312__py3-none-any.whl → 1.1.312a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/all_operands.py +7 -26
- upgini/autofe/binary.py +4 -95
- upgini/autofe/date.py +3 -16
- upgini/autofe/feature.py +11 -25
- upgini/autofe/unary.py +0 -7
- upgini/dataset.py +30 -385
- upgini/features_enricher.py +276 -120
- upgini/metadata.py +16 -1
- upgini/normalizer/normalize_utils.py +203 -0
- upgini/utils/country_utils.py +16 -0
- upgini/utils/datetime_utils.py +34 -15
- upgini/utils/email_utils.py +19 -5
- upgini/utils/ip_utils.py +100 -1
- upgini/utils/phone_utils.py +345 -0
- upgini/utils/postal_code_utils.py +34 -0
- {upgini-1.1.312.dist-info → upgini-1.1.312a1.dist-info}/METADATA +1 -3
- {upgini-1.1.312.dist-info → upgini-1.1.312a1.dist-info}/RECORD +20 -20
- {upgini-1.1.312.dist-info → upgini-1.1.312a1.dist-info}/WHEEL +1 -1
- upgini/normalizer/phone_normalizer.py +0 -340
- {upgini-1.1.312.dist-info → upgini-1.1.312a1.dist-info}/licenses/LICENSE +0 -0
upgini/utils/phone_utils.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
import pandas as pd
|
|
4
|
+
from pandas.api.types import (
|
|
5
|
+
is_float_dtype,
|
|
6
|
+
is_int64_dtype,
|
|
7
|
+
is_object_dtype,
|
|
8
|
+
is_string_dtype,
|
|
9
|
+
)
|
|
2
10
|
|
|
11
|
+
from upgini.errors import ValidationError
|
|
3
12
|
from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
|
|
4
13
|
|
|
5
14
|
|
|
@@ -9,3 +18,339 @@ class PhoneSearchKeyDetector(BaseSearchKeyDetector):
|
|
|
9
18
|
|
|
10
19
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
11
20
|
return False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PhoneSearchKeyConverter:
|
|
24
|
+
|
|
25
|
+
def __init__(self, phone_column: str, country_column: Optional[str] = None):
|
|
26
|
+
self.phone_column = phone_column
|
|
27
|
+
self.country_column = country_column
|
|
28
|
+
|
|
29
|
+
def convert(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
30
|
+
df = self.phone_to_int(df)
|
|
31
|
+
if self.country_column is not None:
|
|
32
|
+
df = df.apply(self.add_prefix, axis=1)
|
|
33
|
+
df[self.phone_column] = df[self.phone_column].astype("Int64")
|
|
34
|
+
return df
|
|
35
|
+
|
|
36
|
+
def add_prefix(self, row):
|
|
37
|
+
phone = row[self.phone_column]
|
|
38
|
+
if pd.isna(phone):
|
|
39
|
+
return row
|
|
40
|
+
country = row[self.country_column]
|
|
41
|
+
country_prefix_tuple = self.COUNTRIES_PREFIXES.get(country)
|
|
42
|
+
if country_prefix_tuple is not None:
|
|
43
|
+
country_prefix, number_of_digits = country_prefix_tuple
|
|
44
|
+
if len(str(phone)) == number_of_digits:
|
|
45
|
+
row[self.phone_column] = int(country_prefix + str(phone))
|
|
46
|
+
return row
|
|
47
|
+
|
|
48
|
+
def phone_to_int(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
49
|
+
"""
|
|
50
|
+
Convention: phone number is always presented as int number.
|
|
51
|
+
phone_number = Country code + National Destination Code + Subscriber Number.
|
|
52
|
+
Examples:
|
|
53
|
+
41793834315 for Switzerland
|
|
54
|
+
46767040672 for Sweden
|
|
55
|
+
861065529988 for China
|
|
56
|
+
18143008198 for the USA
|
|
57
|
+
Inplace conversion of phone to int.
|
|
58
|
+
|
|
59
|
+
Method will remove all non numeric chars from string and convert it to int.
|
|
60
|
+
None will be set for phone numbers that couldn"t be converted to int
|
|
61
|
+
"""
|
|
62
|
+
if is_string_dtype(df[self.phone_column]) or is_object_dtype(df[self.phone_column]):
|
|
63
|
+
convert_func = self.phone_str_to_int_safe
|
|
64
|
+
elif is_float_dtype(df[self.phone_column]):
|
|
65
|
+
convert_func = self.phone_float_to_int_safe
|
|
66
|
+
elif is_int64_dtype(df[self.phone_column]):
|
|
67
|
+
convert_func = self.phone_int_to_int_safe
|
|
68
|
+
else:
|
|
69
|
+
raise ValidationError(
|
|
70
|
+
f"phone_column_name {self.phone_column} doesn't have supported dtype. "
|
|
71
|
+
f"Dataset dtypes: {df.dtypes}. "
|
|
72
|
+
f"Contact developer and request to implement conversion of {self.phone_column} to int"
|
|
73
|
+
)
|
|
74
|
+
df[self.phone_column] = df[self.phone_column].apply(convert_func).astype("Int64")
|
|
75
|
+
return df
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def phone_float_to_int_safe(value: float) -> Optional[int]:
|
|
79
|
+
try:
|
|
80
|
+
return PhoneSearchKeyConverter.validate_length(int(value))
|
|
81
|
+
except Exception:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def phone_int_to_int_safe(value: int) -> Optional[int]:
|
|
86
|
+
try:
|
|
87
|
+
return PhoneSearchKeyConverter.validate_length(int(value))
|
|
88
|
+
except Exception:
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def phone_str_to_int_safe(value: str) -> Optional[int]:
|
|
93
|
+
try:
|
|
94
|
+
value = str(value)
|
|
95
|
+
if value.endswith(".0"):
|
|
96
|
+
value = value[: len(value) - 2]
|
|
97
|
+
numeric_filter = filter(str.isdigit, value)
|
|
98
|
+
numeric_string = "".join(numeric_filter)
|
|
99
|
+
return PhoneSearchKeyConverter.validate_length(int(numeric_string))
|
|
100
|
+
except Exception:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
@staticmethod
|
|
104
|
+
def validate_length(value: int) -> Optional[int]:
|
|
105
|
+
if value < 10000000 or value > 999999999999999:
|
|
106
|
+
return None
|
|
107
|
+
else:
|
|
108
|
+
return value
|
|
109
|
+
|
|
110
|
+
COUNTRIES_PREFIXES = {
|
|
111
|
+
"US": ("1", 10),
|
|
112
|
+
"CA": ("1", 10),
|
|
113
|
+
"AI": ("1", 10),
|
|
114
|
+
"AG": ("1", 10),
|
|
115
|
+
"AS": ("1", 10),
|
|
116
|
+
"BB": ("1", 10),
|
|
117
|
+
"BS": ("1", 10),
|
|
118
|
+
"VG": ("1", 10),
|
|
119
|
+
"VI": ("1", 10),
|
|
120
|
+
"KY": ("1", 10),
|
|
121
|
+
"BM": ("1", 10),
|
|
122
|
+
"GD": ("1", 10),
|
|
123
|
+
"TC": ("1", 10),
|
|
124
|
+
"MS": ("1", 10),
|
|
125
|
+
"MP": ("1", 10),
|
|
126
|
+
"GU": ("1", 10),
|
|
127
|
+
"SX": ("1", 10),
|
|
128
|
+
"LC": ("1", 10),
|
|
129
|
+
"DM": ("1", 10),
|
|
130
|
+
"VC": ("1", 10),
|
|
131
|
+
"PR": ("1", 10),
|
|
132
|
+
"TT": ("1", 10),
|
|
133
|
+
"KN": ("1", 10),
|
|
134
|
+
"JM": ("1", 10),
|
|
135
|
+
"EG": ("20", 9),
|
|
136
|
+
"SS": ("211", 9),
|
|
137
|
+
"MA": ("212", 9),
|
|
138
|
+
"EH": ("212", 4),
|
|
139
|
+
"DZ": ("213", 8),
|
|
140
|
+
"TN": ("216", 8),
|
|
141
|
+
"LY": ("218", 9),
|
|
142
|
+
"GM": ("220", 6),
|
|
143
|
+
"SN": ("221", 9),
|
|
144
|
+
"MR": ("222", 7),
|
|
145
|
+
"ML": ("223", 8),
|
|
146
|
+
"GN": ("224", 9),
|
|
147
|
+
"CI": ("225", 7),
|
|
148
|
+
"BF": ("226", 8),
|
|
149
|
+
"NE": ("227", 8),
|
|
150
|
+
"TG": ("228", 8),
|
|
151
|
+
"BJ": ("229", 8),
|
|
152
|
+
"MU": ("230", 7),
|
|
153
|
+
"LR": ("231", 9),
|
|
154
|
+
"SL": ("232", 8),
|
|
155
|
+
"GH": ("233", 9),
|
|
156
|
+
"NG": ("234", 9),
|
|
157
|
+
"TD": ("235", 8),
|
|
158
|
+
"CF": ("236", 7),
|
|
159
|
+
"CM": ("237", 9),
|
|
160
|
+
"CV": ("238", 7),
|
|
161
|
+
"ST": ("239", 7),
|
|
162
|
+
"GQ": ("240", 9),
|
|
163
|
+
"GA": ("241", 8),
|
|
164
|
+
"CG": ("242", 7),
|
|
165
|
+
"CD": ("243", 9),
|
|
166
|
+
"AO": ("244", 9),
|
|
167
|
+
"GW": ("245", 6),
|
|
168
|
+
"IO": ("246", 7),
|
|
169
|
+
"AC": ("247", 5),
|
|
170
|
+
"SC": ("248", 7),
|
|
171
|
+
"SD": ("249", 9),
|
|
172
|
+
"RW": ("250", 9),
|
|
173
|
+
"ET": ("251", 9),
|
|
174
|
+
"SO": ("252", 9),
|
|
175
|
+
"DJ": ("253", 8),
|
|
176
|
+
"KE": ("254", 9),
|
|
177
|
+
"TZ": ("255", 9),
|
|
178
|
+
"UG": ("256", 9),
|
|
179
|
+
"BI": ("257", 8),
|
|
180
|
+
"MZ": ("258", 8),
|
|
181
|
+
"ZM": ("260", 9),
|
|
182
|
+
"MG": ("261", 9),
|
|
183
|
+
"RE": ("262", 9),
|
|
184
|
+
"YT": ("262", 9),
|
|
185
|
+
"TF": ("262", 9),
|
|
186
|
+
"ZW": ("263", 9),
|
|
187
|
+
"NA": ("264", 9),
|
|
188
|
+
"MW": ("265", 7),
|
|
189
|
+
"LS": ("266", 8),
|
|
190
|
+
"BW": ("267", 7),
|
|
191
|
+
"SZ": ("268", 8),
|
|
192
|
+
"KM": ("269", 7),
|
|
193
|
+
"ZA": ("27", 10),
|
|
194
|
+
"SH": ("290", 5),
|
|
195
|
+
"TA": ("290", 5),
|
|
196
|
+
"ER": ("291", 7),
|
|
197
|
+
"AT": ("43", 10),
|
|
198
|
+
"AW": ("297", 7),
|
|
199
|
+
"FO": ("298", 6),
|
|
200
|
+
"GL": ("299", 6),
|
|
201
|
+
"GR": ("30", 10),
|
|
202
|
+
"BE": ("32", 8),
|
|
203
|
+
"FR": ("33", 9),
|
|
204
|
+
"ES": ("34", 9),
|
|
205
|
+
"GI": ("350", 8),
|
|
206
|
+
"PE": ("51", 8),
|
|
207
|
+
"MX": ("52", 10),
|
|
208
|
+
"CU": ("53", 8),
|
|
209
|
+
"AR": ("54", 10),
|
|
210
|
+
"BR": ("55", 10),
|
|
211
|
+
"CL": ("56", 9),
|
|
212
|
+
"CO": ("57", 8),
|
|
213
|
+
"VE": ("58", 10),
|
|
214
|
+
"PT": ("351", 9),
|
|
215
|
+
"LU": ("352", 8),
|
|
216
|
+
"IE": ("353", 8),
|
|
217
|
+
"IS": ("354", 7),
|
|
218
|
+
"AL": ("355", 8),
|
|
219
|
+
"MT": ("356", 8),
|
|
220
|
+
"CY": ("357", 8),
|
|
221
|
+
"FI": ("358", 9),
|
|
222
|
+
"BG": ("359", 8),
|
|
223
|
+
"HU": ("36", 8),
|
|
224
|
+
"LT": ("370", 8),
|
|
225
|
+
"LV": ("371", 8),
|
|
226
|
+
"EE": ("372", 7),
|
|
227
|
+
"MD": ("373", 8),
|
|
228
|
+
"AM": ("374", 8),
|
|
229
|
+
"BY": ("375", 9),
|
|
230
|
+
"AD": ("376", 6),
|
|
231
|
+
"MC": ("377", 8),
|
|
232
|
+
"SM": ("378", 9),
|
|
233
|
+
"VA": ("3906698", 5),
|
|
234
|
+
"UA": ("380", 9),
|
|
235
|
+
"RS": ("381", 9),
|
|
236
|
+
"ME": ("382", 8),
|
|
237
|
+
"HR": ("385", 8),
|
|
238
|
+
"SI": ("386", 8),
|
|
239
|
+
"BA": ("387", 8),
|
|
240
|
+
"MK": ("389", 8),
|
|
241
|
+
"MY": ("60", 9),
|
|
242
|
+
"AU": ("61", 9),
|
|
243
|
+
"CX": ("61", 9),
|
|
244
|
+
"CC": ("61", 9),
|
|
245
|
+
"ID": ("62", 9),
|
|
246
|
+
"PH": ("632", 7),
|
|
247
|
+
"NZ": ("64", 8),
|
|
248
|
+
"PN": ("64", 8),
|
|
249
|
+
"SG": ("65", 8),
|
|
250
|
+
"TH": ("66", 8),
|
|
251
|
+
"IT": ("39", 10),
|
|
252
|
+
"RO": ("40", 9),
|
|
253
|
+
"CH": ("41", 9),
|
|
254
|
+
"CZ": ("420", 9),
|
|
255
|
+
"SK": ("421", 9),
|
|
256
|
+
"GB": ("44", 10),
|
|
257
|
+
"LI": ("423", 7),
|
|
258
|
+
"GG": ("44", 10),
|
|
259
|
+
"IM": ("44", 10),
|
|
260
|
+
"JE": ("44", 10),
|
|
261
|
+
"DK": ("45", 8),
|
|
262
|
+
"SE": ("46", 8),
|
|
263
|
+
"BD": ("880", 8),
|
|
264
|
+
"TW": ("886", 9),
|
|
265
|
+
"JP": ("81", 9),
|
|
266
|
+
"KR": ("82", 9),
|
|
267
|
+
"VN": ("84", 10),
|
|
268
|
+
"KP": ("850", 8),
|
|
269
|
+
"HK": ("852", 8),
|
|
270
|
+
"MO": ("853", 8),
|
|
271
|
+
"KH": ("855", 8),
|
|
272
|
+
"LA": ("856", 8),
|
|
273
|
+
"NO": ("47", 8),
|
|
274
|
+
"SJ": ("47", 8),
|
|
275
|
+
"BV": ("47", 8),
|
|
276
|
+
"PL": ("48", 9),
|
|
277
|
+
"DE": ("49", 10),
|
|
278
|
+
"TR": ("90", 10),
|
|
279
|
+
"IN": ("91", 10),
|
|
280
|
+
"PK": ("92", 9),
|
|
281
|
+
"AF": ("93", 9),
|
|
282
|
+
"LK": ("94", 9),
|
|
283
|
+
"MM": ("95", 7),
|
|
284
|
+
"IR": ("98", 10),
|
|
285
|
+
"MV": ("960", 7),
|
|
286
|
+
"LB": ("961", 7),
|
|
287
|
+
"JO": ("962", 9),
|
|
288
|
+
"SY": ("963", 10),
|
|
289
|
+
"IQ": ("964", 10),
|
|
290
|
+
"KW": ("965", 7),
|
|
291
|
+
"SA": ("966", 9),
|
|
292
|
+
"YE": ("967", 7),
|
|
293
|
+
"OM": ("968", 8),
|
|
294
|
+
"PS": ("970", 8),
|
|
295
|
+
"AE": ("971", 8),
|
|
296
|
+
"IL": ("972", 9),
|
|
297
|
+
"BH": ("973", 8),
|
|
298
|
+
"QA": ("974", 8),
|
|
299
|
+
"BT": ("975", 7),
|
|
300
|
+
"MN": ("976", 8),
|
|
301
|
+
"NP": ("977", 8),
|
|
302
|
+
"TJ": ("992", 9),
|
|
303
|
+
"TM": ("993", 8),
|
|
304
|
+
"AZ": ("994", 9),
|
|
305
|
+
"GE": ("995", 9),
|
|
306
|
+
"KG": ("996", 9),
|
|
307
|
+
"UZ": ("998", 9),
|
|
308
|
+
"FK": ("500", 5),
|
|
309
|
+
"BZ": ("501", 7),
|
|
310
|
+
"GT": ("502", 8),
|
|
311
|
+
"SV": ("503", 8),
|
|
312
|
+
"HN": ("504", 8),
|
|
313
|
+
"NI": ("505", 8),
|
|
314
|
+
"CR": ("506", 8),
|
|
315
|
+
"PA": ("507", 7),
|
|
316
|
+
"PM": ("508", 6),
|
|
317
|
+
"HT": ("509", 8),
|
|
318
|
+
"GS": ("500", 5),
|
|
319
|
+
"MF": ("590", 9),
|
|
320
|
+
"BL": ("590", 9),
|
|
321
|
+
"GP": ("590", 9),
|
|
322
|
+
"BO": ("591", 9),
|
|
323
|
+
"GY": ("592", 9),
|
|
324
|
+
"EC": ("593", 9),
|
|
325
|
+
"GF": ("594", 9),
|
|
326
|
+
"PY": ("595", 9),
|
|
327
|
+
"MQ": ("596", 9),
|
|
328
|
+
"SR": ("597", 9),
|
|
329
|
+
"UY": ("598", 9),
|
|
330
|
+
"CW": ("599", 9),
|
|
331
|
+
"BQ": ("599", 9),
|
|
332
|
+
"RU": ("7", 10),
|
|
333
|
+
"KZ": ("7", 10),
|
|
334
|
+
"TL": ("670", 7),
|
|
335
|
+
"NF": ("672", 7),
|
|
336
|
+
"HM": ("672", 7),
|
|
337
|
+
"BN": ("673", 7),
|
|
338
|
+
"NR": ("674", 7),
|
|
339
|
+
"PG": ("675", 7),
|
|
340
|
+
"TO": ("676", 7),
|
|
341
|
+
"SB": ("677", 7),
|
|
342
|
+
"VU": ("678", 7),
|
|
343
|
+
"FJ": ("679", 7),
|
|
344
|
+
"PW": ("680", 7),
|
|
345
|
+
"WF": ("681", 7),
|
|
346
|
+
"CK": ("682", 5),
|
|
347
|
+
"NU": ("683", 7),
|
|
348
|
+
"WS": ("685", 7),
|
|
349
|
+
"KI": ("686", 7),
|
|
350
|
+
"NC": ("687", 7),
|
|
351
|
+
"TV": ("688", 7),
|
|
352
|
+
"PF": ("689", 7),
|
|
353
|
+
"TK": ("690", 7),
|
|
354
|
+
"FM": ("691", 7),
|
|
355
|
+
"MH": ("692", 7),
|
|
356
|
+
}
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
+
from pandas.api.types import (
|
|
3
|
+
is_float_dtype,
|
|
4
|
+
is_object_dtype,
|
|
5
|
+
is_string_dtype,
|
|
6
|
+
)
|
|
2
7
|
|
|
3
8
|
from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
|
|
4
9
|
|
|
@@ -9,3 +14,32 @@ class PostalCodeSearchKeyDetector(BaseSearchKeyDetector):
|
|
|
9
14
|
|
|
10
15
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
11
16
|
return False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PostalCodeSearchKeyConverter:
|
|
20
|
+
|
|
21
|
+
def __init__(self, postal_code_column: str):
|
|
22
|
+
self.postal_code_column = postal_code_column
|
|
23
|
+
|
|
24
|
+
def convert(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
25
|
+
if is_string_dtype(df[self.postal_code_column]) or is_object_dtype(df[self.postal_code_column]):
|
|
26
|
+
try:
|
|
27
|
+
df[self.postal_code_column] = (
|
|
28
|
+
df[self.postal_code_column].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
29
|
+
)
|
|
30
|
+
except Exception:
|
|
31
|
+
pass
|
|
32
|
+
elif is_float_dtype(df[self.postal_code_column]):
|
|
33
|
+
df[self.postal_code_column] = df[self.postal_code_column].astype("Int64").astype("string")
|
|
34
|
+
|
|
35
|
+
df[self.postal_code_column] = (
|
|
36
|
+
df[self.postal_code_column]
|
|
37
|
+
.astype("string")
|
|
38
|
+
.str.upper()
|
|
39
|
+
.str.replace(r"[^0-9A-Z]", "", regex=True) # remove non alphanumeric characters
|
|
40
|
+
.str.replace(r"^0+\B", "", regex=True) # remove leading zeros
|
|
41
|
+
)
|
|
42
|
+
# if (df[self.postal_code_column] == "").all():
|
|
43
|
+
# raise ValidationError(self.bundle.get("invalid_postal_code").format(self.postal_code_column))
|
|
44
|
+
|
|
45
|
+
return df
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.312a1
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -26,8 +26,6 @@ Requires-Python: <3.11,>=3.8
|
|
|
26
26
|
Requires-Dist: catboost>=1.0.3
|
|
27
27
|
Requires-Dist: fastparquet>=0.8.1
|
|
28
28
|
Requires-Dist: ipywidgets>=8.1.0
|
|
29
|
-
Requires-Dist: jarowinkler>=2.0.0
|
|
30
|
-
Requires-Dist: levenshtein>=0.25.1
|
|
31
29
|
Requires-Dist: lightgbm>=3.3.2
|
|
32
30
|
Requires-Dist: numpy>=1.19.0
|
|
33
31
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=bgCPs5_xce8_o9bI-etowHUS0QExeDzykUqEC39xHMs,26
|
|
2
2
|
upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=TZkG1PBSnbvUt6j12GdUwdhXWE2mxbFks2wJdNm7ioM,30758
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=RYApDsW7z_6yIBgK26rFHgU187tOWD75s75nbRRHg58,187789
|
|
7
7
|
upgini/http.py,sha256=a4Epc9YLIJBuYk4t8E_2-QDLBtJFqKO35jn2SnYQZCg,42920
|
|
8
8
|
upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
|
|
9
|
-
upgini/metadata.py,sha256=
|
|
9
|
+
upgini/metadata.py,sha256=YQ-1HZGyPOksP2iM50ff_pMHXLyzvpChqSfNh8Z0ke4,10833
|
|
10
10
|
upgini/metrics.py,sha256=Tu5cN8RlhOSSMWUTXRSkdl8SWBqR1N_2eJpBum9pZxc,30926
|
|
11
11
|
upgini/search_task.py,sha256=LtRJ9bCPjMo1gJ-sUDKERhDwGcWKImrzwVFHjkMSQHQ,17071
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
@@ -14,20 +14,20 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
|
|
|
14
14
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
15
15
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
upgini/autofe/all_operands.py,sha256=
|
|
18
|
-
upgini/autofe/binary.py,sha256=
|
|
19
|
-
upgini/autofe/date.py,sha256=
|
|
20
|
-
upgini/autofe/feature.py,sha256=
|
|
17
|
+
upgini/autofe/all_operands.py,sha256=XbvgX2IU4aee9rJZ--d5MdmrfKhON_emle5-RU1qlEY,2506
|
|
18
|
+
upgini/autofe/binary.py,sha256=8FXPJxN7fnC5wphO0Dp1tQCa0lFMSDGQGvBMkSIVAcE,4155
|
|
19
|
+
upgini/autofe/date.py,sha256=tshVUTioOLVy8on8b5xjgtVrSSrXz_8fMCLeIkpo808,7941
|
|
20
|
+
upgini/autofe/feature.py,sha256=ayxiF8Ip1ww_pt_BC9Pk127fAHZ_3fuluulS1EYLolk,13423
|
|
21
21
|
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
22
22
|
upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
|
|
23
|
-
upgini/autofe/unary.py,sha256=
|
|
23
|
+
upgini/autofe/unary.py,sha256=ZWjLd-CUkNt_PpM8YuWLLipW1v_RdBlsl4JxXIVo9aM,3652
|
|
24
24
|
upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
upgini/data_source/data_source_publisher.py,sha256=kTewGmdoxTVkZEqDdbhWbmIKIvb7W0w7ml3WOo-qc2g,21450
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
28
28
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
29
29
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
|
-
upgini/normalizer/
|
|
30
|
+
upgini/normalizer/normalize_utils.py,sha256=8gH1oabPNZrC1kHSRFxGGcO0o6yNDlOJXCLzzExq-3s,7451
|
|
31
31
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
32
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
33
|
upgini/resource_bundle/strings.properties,sha256=WZAuYPX2Dpn6BHoA3RX8uvMNMr-yJE2fF7Gz0i24x2s,26459
|
|
@@ -39,25 +39,25 @@ upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
|
|
|
39
39
|
upgini/utils/__init__.py,sha256=O_KgzKiJjW3g4NoqZ7lAxUpoHcBi_gze6r3ndEjCH74,842
|
|
40
40
|
upgini/utils/base_search_key_detector.py,sha256=Inc6iGG-VXQdejWFfbekIkZk2ahC4k7CdGqzOkie6Bs,1021
|
|
41
41
|
upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl1UOB4s,3382
|
|
42
|
-
upgini/utils/country_utils.py,sha256=
|
|
42
|
+
upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
|
|
43
43
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
44
44
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
45
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
45
|
+
upgini/utils/datetime_utils.py,sha256=O-IQbWtWJs6xTAr3m9FMRHyT-fL_28vCMrrt4eqfpa0,12025
|
|
46
46
|
upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
|
|
47
47
|
upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
|
|
48
|
-
upgini/utils/email_utils.py,sha256=
|
|
48
|
+
upgini/utils/email_utils.py,sha256=Ge4oRlpYpBA5ZLXWfXrMHx-DJWsVLglzWvpQOU6iZBw,4715
|
|
49
49
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
50
50
|
upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
|
|
51
51
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
52
|
-
upgini/utils/ip_utils.py,sha256=
|
|
53
|
-
upgini/utils/phone_utils.py,sha256=
|
|
54
|
-
upgini/utils/postal_code_utils.py,sha256=
|
|
52
|
+
upgini/utils/ip_utils.py,sha256=ZZj_uQFTHhagzt-MRew__ZBOp2DdnkMrachS7PElkSE,5143
|
|
53
|
+
upgini/utils/phone_utils.py,sha256=c8oNajhT7Z1hXpiRAEH828vX7SoALBJKUun_M5qu9vg,10363
|
|
54
|
+
upgini/utils/postal_code_utils.py,sha256=C899tJS8qM_ps4I3g-Ve6qzIa22O_UqwNmGFoyy9sO8,1716
|
|
55
55
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
56
56
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
57
57
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
63
|
-
upgini-1.1.
|
|
60
|
+
upgini-1.1.312a1.dist-info/METADATA,sha256=263n_P2QL9zSfoNG0s4OTIUYgK3j3kwEsbCBacnpOm0,48155
|
|
61
|
+
upgini-1.1.312a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.1.312a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.1.312a1.dist-info/RECORD,,
|