upgini 1.2.56a3675.dev1__tar.gz → 1.2.56a3818.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/PKG-INFO +1 -1
  2. upgini-1.2.56a3818.dev1/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/date.py +0 -8
  4. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/feature.py +1 -10
  5. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/vector.py +18 -2
  6. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/features_enricher.py +1 -1
  7. upgini-1.2.56a3675.dev1/src/upgini/__about__.py +0 -1
  8. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/.gitignore +0 -0
  9. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/LICENSE +0 -0
  10. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/README.md +0 -0
  11. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/pyproject.toml +0 -0
  12. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/__init__.py +0 -0
  13. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/ads.py +0 -0
  14. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/ads_management/__init__.py +0 -0
  15. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/ads_management/ads_manager.py +0 -0
  16. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/__init__.py +0 -0
  17. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/all_operands.py +0 -0
  18. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/binary.py +0 -0
  19. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/autofe/unary.py +0 -0
  22. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/dataset.py +0 -0
  25. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/errors.py +0 -0
  26. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/http.py +0 -0
  27. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/metadata.py +0 -0
  31. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/metrics.py +0 -0
  32. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/normalizer/normalize_utils.py +0 -0
  34. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/search_task.py +0 -0
  43. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/spinner.py +0 -0
  44. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  45. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/feature_info.py +0 -0
  57. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/features_validator.py +0 -0
  58. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/format.py +0 -0
  59. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/ip_utils.py +0 -0
  60. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3818.dev1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.56a3675.dev1
3
+ Version: 1.2.56a3818.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.56a3818.dev1"
@@ -64,9 +64,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
64
64
  return res
65
65
 
66
66
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
67
- if left.isna().all() or right.isna().all():
68
- return pd.Series([None] * len(left))
69
-
70
67
  left = self._convert_to_date(left, self.left_unit)
71
68
  right = self._convert_to_date(right, self.right_unit)
72
69
  diff = self._convert_diff_to_unit(left.dt.date - right.dt.date)
@@ -145,9 +142,6 @@ class DateListDiff(PandasOperand, DateDiffMixin, ParametrizedOperand):
145
142
  return cls(aggregation=aggregation)
146
143
 
147
144
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
148
- if left.isna().all() or right.isna().all():
149
- return pd.Series([None] * len(left), dtype=np.float64)
150
-
151
145
  left = self._convert_to_date(left, self.left_unit)
152
146
  right_mask = right.apply(lambda x: len(x) > 0)
153
147
  mask = left.notna() & right.notna() & right_mask
@@ -236,8 +230,6 @@ class DatePercentileBase(PandasOperand, abc.ABC):
236
230
  pass
237
231
 
238
232
  def _perc(self, f, bounds):
239
- if f is None or np.isnan(f):
240
- return np.nan
241
233
  hit = np.where(f >= np.array(bounds))[0]
242
234
  if hit.size > 0:
243
235
  return np.max(hit) + 1
@@ -26,18 +26,9 @@ class Column:
26
26
  return dict()
27
27
 
28
28
  def rename_columns(self, mapping: Dict[str, str]) -> "Column":
29
- self.name = mapping.get(self.name) or self.name
29
+ self.name = self._unhash(mapping.get(self.name) or self.name)
30
30
  return self
31
31
 
32
- def _unhash(self, feature_name: str) -> str:
33
- last_component_idx = feature_name.rfind("_")
34
- if not feature_name.startswith("f_"):
35
- return feature_name # etalon feature
36
- elif last_component_idx == 1:
37
- return feature_name[2:] # fully hashed name, cannot unhash
38
- else:
39
- return feature_name[2:last_component_idx]
40
-
41
32
  def delete_data(self):
42
33
  self.data = None
43
34
 
@@ -2,7 +2,11 @@ import abc
2
2
  from typing import Dict, List, Optional
3
3
 
4
4
  import pandas as pd
5
- from pydantic import validator
5
+
6
+ try:
7
+ from pydantic import field_validator as validator # V2
8
+ except ImportError:
9
+ from pydantic import validator # V1
6
10
 
7
11
  from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
8
12
 
@@ -29,12 +33,16 @@ class Sum(PandasOperand, VectorizableMixin):
29
33
  class TimeSeriesBase(PandasOperand, abc.ABC):
30
34
  is_vector: bool = True
31
35
  date_unit: Optional[str] = None
36
+ offset_size: int = 0
37
+ offset_unit: str = "D"
32
38
 
33
39
  def get_params(self) -> Dict[str, Optional[str]]:
34
40
  res = super().get_params()
35
41
  res.update(
36
42
  {
37
43
  "date_unit": self.date_unit,
44
+ "offset_size": self.offset_size,
45
+ "offset_unit": self.offset_unit,
38
46
  }
39
47
  )
40
48
  return res
@@ -46,6 +54,13 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
46
54
  ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
47
55
  ts.set_index(date.name, inplace=True)
48
56
  ts = ts[ts.index.notna()].sort_index()
57
+ if self.offset_size > 0:
58
+ ts = ts.iloc[:, :-1].merge(
59
+ ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
60
+ left_index=True,
61
+ right_index=True,
62
+ how="left",
63
+ )
49
64
  ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
50
65
  ts = self._aggregate(ts)
51
66
  ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
@@ -67,6 +82,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
67
82
  window_unit: str = "D"
68
83
 
69
84
  @validator("window_unit")
85
+ @classmethod
70
86
  def validate_window_unit(cls, v: str) -> str:
71
87
  try:
72
88
  pd.tseries.frequencies.to_offset(v)
@@ -107,7 +123,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
107
123
  return res
108
124
 
109
125
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
110
- return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=self.window_size).agg(
126
+ return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=1).agg(
111
127
  _roll_aggregations.get(self.aggregation, self.aggregation)
112
128
  )
113
129
 
@@ -3733,7 +3733,7 @@ if response.status_code == 200:
3733
3733
  features_info_without_links = []
3734
3734
  internal_features_info = []
3735
3735
 
3736
- original_shaps = {fm.name: fm.shap_value for fm in features_meta}
3736
+ original_shaps = {original_names_dict.get(fm.name, fm.name): fm.shap_value for fm in features_meta}
3737
3737
 
3738
3738
  if updated_shaps is not None:
3739
3739
  for fm in features_meta:
@@ -1 +0,0 @@
1
- __version__ = "1.2.56a3675.dev1"