upgini 1.2.6__py3-none-any.whl → 1.2.6a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/all_operands.py +1 -2
- upgini/autofe/feature.py +14 -44
- upgini/data_source/data_source_publisher.py +1 -0
- upgini/features_enricher.py +1 -0
- upgini/utils/ip_utils.py +16 -16
- {upgini-1.2.6.dist-info → upgini-1.2.6a1.dist-info}/METADATA +1 -1
- {upgini-1.2.6.dist-info → upgini-1.2.6a1.dist-info}/RECORD +10 -10
- {upgini-1.2.6.dist-info → upgini-1.2.6a1.dist-info}/WHEEL +1 -1
- {upgini-1.2.6.dist-info → upgini-1.2.6a1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.6a1"
|
upgini/autofe/all_operands.py
CHANGED
upgini/autofe/feature.py
CHANGED
|
@@ -22,9 +22,6 @@ class Column:
|
|
|
22
22
|
def set_op_params(self, params: Dict[str, str]) -> "Column":
|
|
23
23
|
return self
|
|
24
24
|
|
|
25
|
-
def get_op_params(self, **kwargs):
|
|
26
|
-
return dict()
|
|
27
|
-
|
|
28
25
|
def rename_columns(self, mapping: Dict[str, str]) -> "Column":
|
|
29
26
|
self.name = self._unhash(mapping.get(self.name) or self.name)
|
|
30
27
|
return self
|
|
@@ -47,10 +44,6 @@ class Column:
|
|
|
47
44
|
def get_columns(self, **kwargs) -> List[str]:
|
|
48
45
|
return [self.name]
|
|
49
46
|
|
|
50
|
-
@property
|
|
51
|
-
def children(self) -> List[Union["Feature", "Column"]]:
|
|
52
|
-
return []
|
|
53
|
-
|
|
54
47
|
def infer_type(self, data: pd.DataFrame) -> DtypeObj:
|
|
55
48
|
return data[self.name].dtype
|
|
56
49
|
|
|
@@ -95,30 +88,9 @@ class Feature:
|
|
|
95
88
|
self.op.set_params(params)
|
|
96
89
|
|
|
97
90
|
for child in self.children:
|
|
98
|
-
|
|
99
|
-
k[len(child.get_display_name()) + 1 :]: v
|
|
100
|
-
for k, v in params.items()
|
|
101
|
-
if k.startswith(child.get_display_name())
|
|
102
|
-
}
|
|
103
|
-
if not child_params:
|
|
104
|
-
child_params = params
|
|
105
|
-
child.set_op_params(child_params)
|
|
91
|
+
child.set_op_params(params)
|
|
106
92
|
return self
|
|
107
93
|
|
|
108
|
-
def get_op_params(self, **kwargs) -> Dict[str, str]:
|
|
109
|
-
return {
|
|
110
|
-
k: str(v)
|
|
111
|
-
for k, v in dict(
|
|
112
|
-
(
|
|
113
|
-
(f"{child.get_display_name(**kwargs)}_{k}", v)
|
|
114
|
-
for child in self.children
|
|
115
|
-
for k, v in child.get_op_params(**kwargs).items()
|
|
116
|
-
),
|
|
117
|
-
**(self.op.get_params() or {}),
|
|
118
|
-
).items()
|
|
119
|
-
if v is not None
|
|
120
|
-
}
|
|
121
|
-
|
|
122
94
|
def get_hash(self) -> str:
|
|
123
95
|
return hashlib.sha256(
|
|
124
96
|
"_".join([self.op.name] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
|
|
@@ -354,26 +326,24 @@ class FeatureGroup:
|
|
|
354
326
|
return names
|
|
355
327
|
|
|
356
328
|
def calculate(self, data: pd.DataFrame, is_root=False) -> pd.DataFrame:
|
|
329
|
+
main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
|
|
357
330
|
if isinstance(self.op, PandasOperand):
|
|
358
|
-
|
|
359
|
-
lower_order_children = [
|
|
360
|
-
if self.main_column_node is not None:
|
|
361
|
-
lower_order_children.append(self.main_column_node)
|
|
362
|
-
lower_order_children.extend(
|
|
331
|
+
columns = self.get_columns()
|
|
332
|
+
lower_order_children = [
|
|
363
333
|
ch for f in self.children for ch in f.children if ch.get_display_name() != main_column
|
|
364
|
-
|
|
334
|
+
]
|
|
365
335
|
lower_order_names = [ch.get_display_name() for ch in lower_order_children]
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
336
|
+
if any(isinstance(f, Feature) for f in lower_order_children):
|
|
337
|
+
child_data = pd.concat(
|
|
338
|
+
[data[main_column or []]] + [ch.calculate(data) for ch in lower_order_children],
|
|
339
|
+
axis=1,
|
|
340
|
+
)
|
|
341
|
+
child_data.columns = ([main_column] if main_column is not None else []) + lower_order_names
|
|
342
|
+
else:
|
|
343
|
+
child_data = data[columns]
|
|
371
344
|
|
|
372
345
|
new_data = self.op.calculate_group(child_data, main_column=main_column)
|
|
373
|
-
new_data.rename(
|
|
374
|
-
columns=dict(zip((n for n in lower_order_names if n != main_column), self.get_display_names())),
|
|
375
|
-
inplace=True,
|
|
376
|
-
)
|
|
346
|
+
new_data.rename(columns=dict(zip(lower_order_names, self.get_display_names())), inplace=True)
|
|
377
347
|
else:
|
|
378
348
|
raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
|
|
379
349
|
|
|
@@ -378,6 +378,7 @@ class DataSourcePublisher:
|
|
|
378
378
|
search_keys = [k.value.value for k in search_keys] if search_keys else None
|
|
379
379
|
request = {"bqTableId": bq_table_id, "searchKeys": search_keys}
|
|
380
380
|
task_id = self._rest_client.upload_online(request, trace_id)
|
|
381
|
+
print(f"Start polling management task_id={task_id} with trace_id={trace_id}")
|
|
381
382
|
with Spinner():
|
|
382
383
|
status_response = self._rest_client.poll_ads_management_task_status(task_id, trace_id)
|
|
383
384
|
while status_response["status"] not in self.FINAL_STATUSES:
|
upgini/features_enricher.py
CHANGED
upgini/utils/ip_utils.py
CHANGED
|
@@ -57,17 +57,17 @@ class IpSearchKeyConverter:
|
|
|
57
57
|
except ValueError:
|
|
58
58
|
pass
|
|
59
59
|
|
|
60
|
-
@staticmethod
|
|
61
|
-
def _is_ipv4(ip: Optional[_BaseAddress]):
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
@staticmethod
|
|
67
|
-
def _to_ipv4(ip: Optional[_BaseAddress]) -> Optional[IPv4Address]:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
60
|
+
# @staticmethod
|
|
61
|
+
# def _is_ipv4(ip: Optional[_BaseAddress]):
|
|
62
|
+
# return ip is not None and (
|
|
63
|
+
# isinstance(ip, IPv4Address) or (isinstance(ip, IPv6Address) and ip.ipv4_mapped is not None)
|
|
64
|
+
# )
|
|
65
|
+
|
|
66
|
+
# @staticmethod
|
|
67
|
+
# def _to_ipv4(ip: Optional[_BaseAddress]) -> Optional[IPv4Address]:
|
|
68
|
+
# if isinstance(ip, IPv4Address):
|
|
69
|
+
# return ip
|
|
70
|
+
# return None
|
|
71
71
|
|
|
72
72
|
@staticmethod
|
|
73
73
|
def _to_ipv6(ip: Optional[_BaseAddress]) -> Optional[IPv6Address]:
|
|
@@ -87,10 +87,10 @@ class IpSearchKeyConverter:
|
|
|
87
87
|
raise ValidationError(self.bundle.get("invalid_ip").format(self.ip_column))
|
|
88
88
|
|
|
89
89
|
# legacy support
|
|
90
|
-
ipv4 = self.ip_column + "_v4"
|
|
91
|
-
df[ipv4] = df[self.ip_column].apply(self._to_ipv4).apply(self._ip_to_int).astype("Int64")
|
|
92
|
-
self.search_keys[ipv4] = SearchKey.IP
|
|
93
|
-
self.columns_renaming[ipv4] = original_ip
|
|
90
|
+
# ipv4 = self.ip_column + "_v4"
|
|
91
|
+
# df[ipv4] = df[self.ip_column].apply(self._to_ipv4).apply(self._ip_to_int).astype("Int64")
|
|
92
|
+
# self.search_keys[ipv4] = SearchKey.IP
|
|
93
|
+
# self.columns_renaming[ipv4] = original_ip
|
|
94
94
|
|
|
95
95
|
ipv6 = self.ip_column + "_v6"
|
|
96
96
|
df[ipv6] = (
|
|
@@ -104,7 +104,7 @@ class IpSearchKeyConverter:
|
|
|
104
104
|
del self.search_keys[self.ip_column]
|
|
105
105
|
del self.columns_renaming[self.ip_column]
|
|
106
106
|
self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
|
|
107
|
-
self.columns_renaming[ipv6] = original_ip # could be
|
|
107
|
+
self.columns_renaming[ipv6] = original_ip # could be upgini_ip_unnest...
|
|
108
108
|
|
|
109
109
|
return df
|
|
110
110
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=iKaF643F-CtlunSR-fjGeg8d8_wX6p_P6BtFkRK6sEA,24
|
|
2
2
|
upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=NHlf_ib70UiaDTzWvA30Gz2HANzpf61Ql5EDFZqQzk0,188120
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -14,16 +14,16 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
|
|
|
14
14
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
15
15
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
upgini/autofe/all_operands.py,sha256=
|
|
17
|
+
upgini/autofe/all_operands.py,sha256=3LiH9iU-ArGmYpS8FHWH7yCFx40ILfvlSXJlKIa75BQ,2542
|
|
18
18
|
upgini/autofe/binary.py,sha256=TRjEdxsfyPY5E8ksYfdKMmU6GtvALfGFPNVIG7DBhzM,7520
|
|
19
19
|
upgini/autofe/date.py,sha256=OpFc3Al0xO3qlESn2Uokfxw51ArVqmh3xngWwdrsaqE,9762
|
|
20
|
-
upgini/autofe/feature.py,sha256=
|
|
20
|
+
upgini/autofe/feature.py,sha256=gwGWY2UcX_0wHAvfEiu1rRU7GFZyzMWZIaPVcf6kD80,14223
|
|
21
21
|
upgini/autofe/groupby.py,sha256=r-xl_keZZgm_tpiEoDhjYSkT6NHv7a4cRQR4wJ4uCp8,3263
|
|
22
22
|
upgini/autofe/operand.py,sha256=uk883RaNqgXqtkaRqA1re1d9OFnnpv0JVvelYx09Yw0,2943
|
|
23
23
|
upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
|
|
24
24
|
upgini/autofe/vector.py,sha256=ehcZUDqV71TfbU8EmKfdYp603gS2dJY_-fpr10ho5sI,663
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
26
|
+
upgini/data_source/data_source_publisher.py,sha256=6ZiT36flJHioh1J3F9lv6vSyqjRnQ_enZ6Mmb3GI2T8,22209
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
28
28
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
29
29
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -49,7 +49,7 @@ upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5
|
|
|
49
49
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
50
50
|
upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
|
|
51
51
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
52
|
-
upgini/utils/ip_utils.py,sha256=
|
|
52
|
+
upgini/utils/ip_utils.py,sha256=fB9jhabRr8mtZzNNscXIHQ5-QJqIZkAw3FO06eQ9jO8,5176
|
|
53
53
|
upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
|
|
54
54
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
55
55
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.2.
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
60
|
+
upgini-1.2.6a1.dist-info/METADATA,sha256=h6NV4SfVmBdj8hXvweTrWrOCid84grVTAGPVFukvqiI,48609
|
|
61
|
+
upgini-1.2.6a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.2.6a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.2.6a1.dist-info/RECORD,,
|
|
File without changes
|