upgini 1.2.71a3810.dev3__tar.gz → 1.2.71a3810.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (79) hide show
  1. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/PKG-INFO +1 -1
  2. upgini-1.2.71a3810.dev4/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/date.py +20 -4
  4. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/unary.py +2 -2
  5. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/vector.py +9 -0
  6. upgini-1.2.71a3810.dev3/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/.gitignore +0 -0
  8. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/LICENSE +0 -0
  9. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/README.md +0 -0
  10. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/pyproject.toml +0 -0
  11. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/all_operators.py +0 -0
  17. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/operator.py +0 -0
  21. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/__init__.py +0 -0
  22. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/base.py +0 -0
  23. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/cross.py +0 -0
  24. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/delta.py +0 -0
  25. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/lag.py +0 -0
  26. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/roll.py +0 -0
  27. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/trend.py +0 -0
  28. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/volatility.py +0 -0
  29. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/utils.py +0 -0
  30. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/data_source/__init__.py +0 -0
  31. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/data_source/data_source_publisher.py +0 -0
  32. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/dataset.py +0 -0
  33. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/errors.py +0 -0
  34. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/features_enricher.py +0 -0
  35. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/http.py +0 -0
  36. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/lazy_import.py +0 -0
  37. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/metadata.py +0 -0
  40. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/metrics.py +0 -0
  41. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/normalizer/normalize_utils.py +0 -0
  43. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/__init__.py +0 -0
  44. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/exceptions.py +0 -0
  45. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/strings.properties +0 -0
  46. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  47. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/__init__.py +0 -0
  48. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/base.py +0 -0
  49. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/random_under_sampler.py +0 -0
  50. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/utils.py +0 -0
  51. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/search_task.py +0 -0
  52. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/spinner.py +0 -0
  53. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  54. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/__init__.py +0 -0
  55. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/base_search_key_detector.py +0 -0
  56. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/blocked_time_series.py +0 -0
  57. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/country_utils.py +0 -0
  58. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/custom_loss_utils.py +0 -0
  59. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/cv_utils.py +0 -0
  60. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/datetime_utils.py +0 -0
  61. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/deduplicate_utils.py +0 -0
  62. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/display_utils.py +0 -0
  63. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/feature_info.py +0 -0
  66. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/ip_utils.py +0 -0
  69. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/mstats.py +0 -0
  70. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/phone_utils.py +0 -0
  71. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/postal_code_utils.py +0 -0
  72. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/progress_bar.py +0 -0
  73. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/sklearn_ext.py +0 -0
  74. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/sort.py +0 -0
  75. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/target_utils.py +0 -0
  76. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/track_info.py +0 -0
  77. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/ts_utils.py +0 -0
  78. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/warning_counter.py +0 -0
  79. {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.71a3810.dev3
3
+ Version: 1.2.71a3810.dev4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.71a3810.dev4"
@@ -187,16 +187,21 @@ class DateListDiff(PandasOperator, DateDiffMixin, ParametrizedOperator):
187
187
  class DateListDiffBounded(DateListDiff, ParametrizedOperator):
188
188
  lower_bound: Optional[int] = None
189
189
  upper_bound: Optional[int] = None
190
+ normalize: Optional[bool] = None
190
191
 
191
192
  def to_formula(self) -> str:
192
193
  lower_bound = "minusinf" if self.lower_bound is None else self.lower_bound
193
194
  upper_bound = "plusinf" if self.upper_bound is None else self.upper_bound
194
- return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}"
195
+ norm = "_norm" if self.normalize else ""
196
+ return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}{norm}"
195
197
 
196
198
  @classmethod
197
199
  def from_formula(cls, formula: str) -> Optional["DateListDiffBounded"]:
198
200
  import re
199
201
 
202
+ normalize = formula.endswith("_norm")
203
+ formula = formula.replace("_norm", "")
204
+
200
205
  pattern = r"^date_diff_([^_]+)_((minusinf|\d+))_((plusinf|\d+))_(\w+)$"
201
206
  match = re.match(pattern, formula)
202
207
 
@@ -207,8 +212,13 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
207
212
  lower_bound = None if match.group(2) == "minusinf" else int(match.group(2))
208
213
  upper_bound = None if match.group(4) == "plusinf" else int(match.group(4))
209
214
  aggregation = match.group(6)
210
-
211
- return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
215
+ return cls(
216
+ diff_unit=diff_unit,
217
+ lower_bound=lower_bound,
218
+ upper_bound=upper_bound,
219
+ aggregation=aggregation,
220
+ normalize=normalize,
221
+ )
212
222
 
213
223
  def get_params(self) -> Dict[str, Optional[str]]:
214
224
  res = super().get_params()
@@ -216,14 +226,20 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
216
226
  res["lower_bound"] = str(self.lower_bound)
217
227
  if self.upper_bound is not None:
218
228
  res["upper_bound"] = str(self.upper_bound)
229
+ if self.normalize is not None:
230
+ res["normalize"] = str(self.normalize)
219
231
  return res
220
232
 
221
233
  def _agg(self, x):
234
+ orig_len = len(x)
222
235
  x = x[
223
236
  (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
224
237
  & (x < (self.upper_bound if self.upper_bound is not None else np.inf))
225
238
  ]
226
- return super()._agg(x)
239
+ agg_res = super()._agg(x)
240
+ if self.normalize and orig_len > 0:
241
+ return agg_res / orig_len
242
+ return agg_res
227
243
 
228
244
 
229
245
  class DatePercentileBase(PandasOperator, abc.ABC):
@@ -160,12 +160,12 @@ class Embeddings(PandasOperator):
160
160
  class Bin(PandasOperator):
161
161
  name: str = "bin"
162
162
  is_unary: bool = True
163
- output_type: Optional[str] = "string"
163
+ output_type: Optional[str] = "category"
164
164
  bin_bounds: List[int] = []
165
165
  is_categorical: bool = True
166
166
 
167
167
  def calculate_unary(self, data: pd.Series) -> pd.Series:
168
- return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype(str)
168
+ return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype("category")
169
169
 
170
170
  def _bin(self, f, bounds):
171
171
  if f is None or np.isnan(f):
@@ -22,3 +22,12 @@ class Sum(PandasOperator, VectorizableMixin):
22
22
 
23
23
  def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
24
24
  return pd.DataFrame(data).T.fillna(0).sum(axis=1)
25
+
26
+
27
+ class Vectorize(PandasOperator, VectorizableMixin):
28
+ name: str = "vectorize"
29
+ is_vector: bool = True
30
+ group_index: int = 0
31
+
32
+ def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
33
+ return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
@@ -1 +0,0 @@
1
- __version__ = "1.2.71a3810.dev3"