upgini 1.2.4__tar.gz → 1.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {upgini-1.2.4 → upgini-1.2.6}/PKG-INFO +1 -1
  2. upgini-1.2.6/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/all_operands.py +2 -1
  4. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/feature.py +44 -14
  5. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/unary.py +4 -2
  6. upgini-1.2.4/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.4 → upgini-1.2.6}/.gitignore +0 -0
  8. {upgini-1.2.4 → upgini-1.2.6}/LICENSE +0 -0
  9. {upgini-1.2.4 → upgini-1.2.6}/README.md +0 -0
  10. {upgini-1.2.4 → upgini-1.2.6}/pyproject.toml +0 -0
  11. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/operand.py +0 -0
  20. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/autofe/vector.py +0 -0
  21. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/data_source/__init__.py +0 -0
  22. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/data_source/data_source_publisher.py +0 -0
  23. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/dataset.py +0 -0
  24. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/errors.py +0 -0
  25. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/features_enricher.py +0 -0
  26. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/http.py +0 -0
  27. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/metadata.py +0 -0
  31. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/metrics.py +0 -0
  32. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/normalizer/normalize_utils.py +0 -0
  34. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/search_task.py +0 -0
  43. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/spinner.py +0 -0
  44. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/datetime_utils.py +0 -0
  51. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/deduplicate_utils.py +0 -0
  52. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/display_utils.py +0 -0
  53. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/email_utils.py +0 -0
  54. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/fallback_progress_bar.py +0 -0
  55. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/features_validator.py +0 -0
  56. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/format.py +0 -0
  57. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/ip_utils.py +0 -0
  58. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/phone_utils.py +0 -0
  59. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/sklearn_ext.py +0 -0
  62. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/target_utils.py +0 -0
  63. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.2.4 → upgini-1.2.6}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.4
3
+ Version: 1.2.6
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.6"
@@ -1,3 +1,4 @@
1
+ from copy import deepcopy
1
2
  from typing import Dict
2
3
 
3
4
  from upgini.autofe.binary import (
@@ -83,4 +84,4 @@ ALL_OPERANDS: Dict[str, Operand] = {
83
84
 
84
85
 
85
86
  def find_op(name):
86
- return ALL_OPERANDS.get(name)
87
+ return deepcopy(ALL_OPERANDS.get(name))
@@ -22,6 +22,9 @@ class Column:
22
22
  def set_op_params(self, params: Dict[str, str]) -> "Column":
23
23
  return self
24
24
 
25
+ def get_op_params(self, **kwargs):
26
+ return dict()
27
+
25
28
  def rename_columns(self, mapping: Dict[str, str]) -> "Column":
26
29
  self.name = self._unhash(mapping.get(self.name) or self.name)
27
30
  return self
@@ -44,6 +47,10 @@ class Column:
44
47
  def get_columns(self, **kwargs) -> List[str]:
45
48
  return [self.name]
46
49
 
50
+ @property
51
+ def children(self) -> List[Union["Feature", "Column"]]:
52
+ return []
53
+
47
54
  def infer_type(self, data: pd.DataFrame) -> DtypeObj:
48
55
  return data[self.name].dtype
49
56
 
@@ -88,9 +95,30 @@ class Feature:
88
95
  self.op.set_params(params)
89
96
 
90
97
  for child in self.children:
91
- child.set_op_params(params)
98
+ child_params = {
99
+ k[len(child.get_display_name()) + 1 :]: v
100
+ for k, v in params.items()
101
+ if k.startswith(child.get_display_name())
102
+ }
103
+ if not child_params:
104
+ child_params = params
105
+ child.set_op_params(child_params)
92
106
  return self
93
107
 
108
+ def get_op_params(self, **kwargs) -> Dict[str, str]:
109
+ return {
110
+ k: str(v)
111
+ for k, v in dict(
112
+ (
113
+ (f"{child.get_display_name(**kwargs)}_{k}", v)
114
+ for child in self.children
115
+ for k, v in child.get_op_params(**kwargs).items()
116
+ ),
117
+ **(self.op.get_params() or {}),
118
+ ).items()
119
+ if v is not None
120
+ }
121
+
94
122
  def get_hash(self) -> str:
95
123
  return hashlib.sha256(
96
124
  "_".join([self.op.name] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
@@ -326,24 +354,26 @@ class FeatureGroup:
326
354
  return names
327
355
 
328
356
  def calculate(self, data: pd.DataFrame, is_root=False) -> pd.DataFrame:
329
- main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
330
357
  if isinstance(self.op, PandasOperand):
331
- columns = self.get_columns()
332
- lower_order_children = [
358
+ main_column = None if self.main_column_node is None else self.main_column_node.get_display_name()
359
+ lower_order_children = []
360
+ if self.main_column_node is not None:
361
+ lower_order_children.append(self.main_column_node)
362
+ lower_order_children.extend(
333
363
  ch for f in self.children for ch in f.children if ch.get_display_name() != main_column
334
- ]
364
+ )
335
365
  lower_order_names = [ch.get_display_name() for ch in lower_order_children]
336
- if any(isinstance(f, Feature) for f in lower_order_children):
337
- child_data = pd.concat(
338
- [data[main_column or []]] + [ch.calculate(data) for ch in lower_order_children],
339
- axis=1,
340
- )
341
- child_data.columns = ([main_column] if main_column is not None else []) + lower_order_names
342
- else:
343
- child_data = data[columns]
366
+ child_data = pd.concat(
367
+ [ch.calculate(data) for ch in lower_order_children],
368
+ axis=1,
369
+ )
370
+ child_data.columns = lower_order_names
344
371
 
345
372
  new_data = self.op.calculate_group(child_data, main_column=main_column)
346
- new_data.rename(columns=dict(zip(lower_order_names, self.get_display_names())), inplace=True)
373
+ new_data.rename(
374
+ columns=dict(zip((n for n in lower_order_names if n != main_column), self.get_display_names())),
375
+ inplace=True,
376
+ )
347
377
  else:
348
378
  raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
349
379
 
@@ -12,10 +12,12 @@ class Abs(PandasOperand, VectorizableMixin):
12
12
  group_index: int = 0
13
13
 
14
14
  def calculate_unary(self, data: pd.Series) -> pd.Series:
15
- return data.abs()
15
+ return data.astype(np.float64).abs()
16
+ # return data.abs()
16
17
 
17
18
  def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
18
- return data.abs()
19
+ return data.astype(np.float64).abs()
20
+ # return data.abs()
19
21
 
20
22
 
21
23
  class Log(PandasOperand, VectorizableMixin):
@@ -1 +0,0 @@
1
- __version__ = "1.2.4"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes