upgini 1.2.6__py3-none-any.whl → 1.2.6a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.6"
1
+ __version__ = "1.2.6a1"
@@ -1,4 +1,3 @@
1
- from copy import deepcopy
2
1
  from typing import Dict
3
2
 
4
3
  from upgini.autofe.binary import (
@@ -84,4 +83,4 @@ ALL_OPERANDS: Dict[str, Operand] = {
84
83
 
85
84
 
86
85
  def find_op(name):
87
- return deepcopy(ALL_OPERANDS.get(name))
86
+ return ALL_OPERANDS.get(name)
upgini/autofe/feature.py CHANGED
@@ -22,9 +22,6 @@ class Column:
22
22
  def set_op_params(self, params: Dict[str, str]) -> "Column":
23
23
  return self
24
24
 
25
- def get_op_params(self, **kwargs):
26
- return dict()
27
-
28
25
  def rename_columns(self, mapping: Dict[str, str]) -> "Column":
29
26
  self.name = self._unhash(mapping.get(self.name) or self.name)
30
27
  return self
@@ -47,10 +44,6 @@ class Column:
47
44
  def get_columns(self, **kwargs) -> List[str]:
48
45
  return [self.name]
49
46
 
50
- @property
51
- def children(self) -> List[Union["Feature", "Column"]]:
52
- return []
53
-
54
47
  def infer_type(self, data: pd.DataFrame) -> DtypeObj:
55
48
  return data[self.name].dtype
56
49
 
@@ -95,30 +88,9 @@ class Feature:
95
88
  self.op.set_params(params)
96
89
 
97
90
  for child in self.children:
98
- child_params = {
99
- k[len(child.get_display_name()) + 1 :]: v
100
- for k, v in params.items()
101
- if k.startswith(child.get_display_name())
102
- }
103
- if not child_params:
104
- child_params = params
105
- child.set_op_params(child_params)
91
+ child.set_op_params(params)
106
92
  return self
107
93
 
108
- def get_op_params(self, **kwargs) -> Dict[str, str]:
109
- return {
110
- k: str(v)
111
- for k, v in dict(
112
- (
113
- (f"{child.get_display_name(**kwargs)}_{k}", v)
114
- for child in self.children
115
- for k, v in child.get_op_params(**kwargs).items()
116
- ),
117
- **(self.op.get_params() or {}),
118
- ).items()
119
- if v is not None
120
- }
121
-
122
94
  def get_hash(self) -> str:
123
95
  return hashlib.sha256(
124
96
  "_".join([self.op.name] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
@@ -354,26 +326,24 @@ class FeatureGroup:
354
326
  return names
355
327
 
356
328
  def calculate(self, data: pd.DataFrame, is_root=False) -> pd.DataFrame:
329
+ main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
357
330
  if isinstance(self.op, PandasOperand):
358
- main_column = None if self.main_column_node is None else self.main_column_node.get_display_name()
359
- lower_order_children = []
360
- if self.main_column_node is not None:
361
- lower_order_children.append(self.main_column_node)
362
- lower_order_children.extend(
331
+ columns = self.get_columns()
332
+ lower_order_children = [
363
333
  ch for f in self.children for ch in f.children if ch.get_display_name() != main_column
364
- )
334
+ ]
365
335
  lower_order_names = [ch.get_display_name() for ch in lower_order_children]
366
- child_data = pd.concat(
367
- [ch.calculate(data) for ch in lower_order_children],
368
- axis=1,
369
- )
370
- child_data.columns = lower_order_names
336
+ if any(isinstance(f, Feature) for f in lower_order_children):
337
+ child_data = pd.concat(
338
+ [data[main_column or []]] + [ch.calculate(data) for ch in lower_order_children],
339
+ axis=1,
340
+ )
341
+ child_data.columns = ([main_column] if main_column is not None else []) + lower_order_names
342
+ else:
343
+ child_data = data[columns]
371
344
 
372
345
  new_data = self.op.calculate_group(child_data, main_column=main_column)
373
- new_data.rename(
374
- columns=dict(zip((n for n in lower_order_names if n != main_column), self.get_display_names())),
375
- inplace=True,
376
- )
346
+ new_data.rename(columns=dict(zip(lower_order_names, self.get_display_names())), inplace=True)
377
347
  else:
378
348
  raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
379
349
 
@@ -378,6 +378,7 @@ class DataSourcePublisher:
378
378
  search_keys = [k.value.value for k in search_keys] if search_keys else None
379
379
  request = {"bqTableId": bq_table_id, "searchKeys": search_keys}
380
380
  task_id = self._rest_client.upload_online(request, trace_id)
381
+ print(f"Start polling management task_id={task_id} with trace_id={trace_id}")
381
382
  with Spinner():
382
383
  status_response = self._rest_client.poll_ads_management_task_status(task_id, trace_id)
383
384
  while status_response["status"] not in self.FINAL_STATUSES:
@@ -3235,6 +3235,7 @@ class FeaturesEnricher(TransformerMixin):
3235
3235
  ]
3236
3236
  )
3237
3237
 
3238
+ # TODO some columns not exists
3238
3239
  all_other_columns = sorted_other_keys + other_columns
3239
3240
 
3240
3241
  search_keys_hash = "search_keys_hash"
upgini/utils/ip_utils.py CHANGED
@@ -57,17 +57,17 @@ class IpSearchKeyConverter:
57
57
  except ValueError:
58
58
  pass
59
59
 
60
- @staticmethod
61
- def _is_ipv4(ip: Optional[_BaseAddress]):
62
- return ip is not None and (
63
- isinstance(ip, IPv4Address) or (isinstance(ip, IPv6Address) and ip.ipv4_mapped is not None)
64
- )
65
-
66
- @staticmethod
67
- def _to_ipv4(ip: Optional[_BaseAddress]) -> Optional[IPv4Address]:
68
- if isinstance(ip, IPv4Address):
69
- return ip
70
- return None
60
+ # @staticmethod
61
+ # def _is_ipv4(ip: Optional[_BaseAddress]):
62
+ # return ip is not None and (
63
+ # isinstance(ip, IPv4Address) or (isinstance(ip, IPv6Address) and ip.ipv4_mapped is not None)
64
+ # )
65
+
66
+ # @staticmethod
67
+ # def _to_ipv4(ip: Optional[_BaseAddress]) -> Optional[IPv4Address]:
68
+ # if isinstance(ip, IPv4Address):
69
+ # return ip
70
+ # return None
71
71
 
72
72
  @staticmethod
73
73
  def _to_ipv6(ip: Optional[_BaseAddress]) -> Optional[IPv6Address]:
@@ -87,10 +87,10 @@ class IpSearchKeyConverter:
87
87
  raise ValidationError(self.bundle.get("invalid_ip").format(self.ip_column))
88
88
 
89
89
  # legacy support
90
- ipv4 = self.ip_column + "_v4"
91
- df[ipv4] = df[self.ip_column].apply(self._to_ipv4).apply(self._ip_to_int).astype("Int64")
92
- self.search_keys[ipv4] = SearchKey.IP
93
- self.columns_renaming[ipv4] = original_ip
90
+ # ipv4 = self.ip_column + "_v4"
91
+ # df[ipv4] = df[self.ip_column].apply(self._to_ipv4).apply(self._ip_to_int).astype("Int64")
92
+ # self.search_keys[ipv4] = SearchKey.IP
93
+ # self.columns_renaming[ipv4] = original_ip
94
94
 
95
95
  ipv6 = self.ip_column + "_v6"
96
96
  df[ipv6] = (
@@ -104,7 +104,7 @@ class IpSearchKeyConverter:
104
104
  del self.search_keys[self.ip_column]
105
105
  del self.columns_renaming[self.ip_column]
106
106
  self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
107
- self.columns_renaming[ipv6] = original_ip # could be __unnest_ip...
107
+ self.columns_renaming[ipv6] = original_ip # could be upgini_ip_unnest...
108
108
 
109
109
  return df
110
110
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.6
3
+ Version: 1.2.6a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=vMQK58X8_YZGKzRm0ThvPAKFtpfyejGmUnDrY9RQ13w,22
1
+ upgini/__about__.py,sha256=iKaF643F-CtlunSR-fjGeg8d8_wX6p_P6BtFkRK6sEA,24
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=twH4qdl91iHZF_AraLk0aIbRDw61S_DYtCWCZ34Yjjg,188077
6
+ upgini/features_enricher.py,sha256=NHlf_ib70UiaDTzWvA30Gz2HANzpf61Ql5EDFZqQzk0,188120
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
@@ -14,16 +14,16 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
14
14
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
15
15
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
16
16
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- upgini/autofe/all_operands.py,sha256=cCCB44qvkmuWyiRM5Xykx8tkHPIjQthrWyj67STWN80,2578
17
+ upgini/autofe/all_operands.py,sha256=3LiH9iU-ArGmYpS8FHWH7yCFx40ILfvlSXJlKIa75BQ,2542
18
18
  upgini/autofe/binary.py,sha256=TRjEdxsfyPY5E8ksYfdKMmU6GtvALfGFPNVIG7DBhzM,7520
19
19
  upgini/autofe/date.py,sha256=OpFc3Al0xO3qlESn2Uokfxw51ArVqmh3xngWwdrsaqE,9762
20
- upgini/autofe/feature.py,sha256=eL7wABUhDKZzv3E-RPJNcyGwSfB0UptcfU2RbvsOks4,15082
20
+ upgini/autofe/feature.py,sha256=gwGWY2UcX_0wHAvfEiu1rRU7GFZyzMWZIaPVcf6kD80,14223
21
21
  upgini/autofe/groupby.py,sha256=r-xl_keZZgm_tpiEoDhjYSkT6NHv7a4cRQR4wJ4uCp8,3263
22
22
  upgini/autofe/operand.py,sha256=uk883RaNqgXqtkaRqA1re1d9OFnnpv0JVvelYx09Yw0,2943
23
23
  upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
24
24
  upgini/autofe/vector.py,sha256=ehcZUDqV71TfbU8EmKfdYp603gS2dJY_-fpr10ho5sI,663
25
25
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- upgini/data_source/data_source_publisher.py,sha256=Vg0biG86YB0OEaoxbK9YYrr4yARm11_h3bTWIBgoScA,22115
26
+ upgini/data_source/data_source_publisher.py,sha256=6ZiT36flJHioh1J3F9lv6vSyqjRnQ_enZ6Mmb3GI2T8,22209
27
27
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
28
28
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
29
29
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -49,7 +49,7 @@ upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5
49
49
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
50
50
  upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
51
51
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
52
- upgini/utils/ip_utils.py,sha256=ZZj_uQFTHhagzt-MRew__ZBOp2DdnkMrachS7PElkSE,5143
52
+ upgini/utils/ip_utils.py,sha256=fB9jhabRr8mtZzNNscXIHQ5-QJqIZkAw3FO06eQ9jO8,5176
53
53
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
54
54
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
55
55
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.2.6.dist-info/METADATA,sha256=NxVadJI-fglLuYE_HcGEWEqxxW8m-uF2T44BGWagFEU,48607
61
- upgini-1.2.6.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
62
- upgini-1.2.6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.6.dist-info/RECORD,,
60
+ upgini-1.2.6a1.dist-info/METADATA,sha256=h6NV4SfVmBdj8hXvweTrWrOCid84grVTAGPVFukvqiI,48609
61
+ upgini-1.2.6a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.2.6a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.2.6a1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.2
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any