upgini 1.1.290__tar.gz → 1.1.290a3232.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (64) hide show
  1. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/PKG-INFO +1 -1
  2. upgini-1.1.290a3232.post1/src/upgini/__about__.py +1 -0
  3. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/all_operands.py +2 -1
  4. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/unary.py +13 -0
  5. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/datetime_utils.py +8 -15
  6. upgini-1.1.290/src/upgini/__about__.py +0 -1
  7. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/.gitignore +0 -0
  8. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/LICENSE +0 -0
  9. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/README.md +0 -0
  10. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/pyproject.toml +0 -0
  11. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/__init__.py +0 -0
  12. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/ads.py +0 -0
  13. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/autofe/vector.py +0 -0
  22. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/dataset.py +0 -0
  25. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/errors.py +0 -0
  26. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/features_enricher.py +0 -0
  27. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/http.py +0 -0
  28. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/metadata.py +0 -0
  31. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/metrics.py +0 -0
  32. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/normalizer/phone_normalizer.py +0 -0
  34. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/search_task.py +0 -0
  43. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/spinner.py +0 -0
  44. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/deduplicate_utils.py +0 -0
  51. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/display_utils.py +0 -0
  52. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/email_utils.py +0 -0
  53. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  54. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/features_validator.py +0 -0
  55. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/format.py +0 -0
  56. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/ip_utils.py +0 -0
  57. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/phone_utils.py +0 -0
  58. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/postal_code_utils.py +0 -0
  59. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/progress_bar.py +0 -0
  60. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/sklearn_ext.py +0 -0
  61. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/target_utils.py +0 -0
  62. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/track_info.py +0 -0
  63. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/utils/warning_counter.py +0 -0
  64. {upgini-1.1.290 → upgini-1.1.290a3232.post1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.290
3
+ Version: 1.1.290a3232.post1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.290a3232-1"
@@ -4,7 +4,7 @@ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
4
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
5
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
6
6
  from upgini.autofe.operand import Operand
7
- from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
7
+ from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Scale, Sigmoid, Sqrt, Square
8
8
  from upgini.autofe.vector import Mean, Sum
9
9
 
10
10
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -50,6 +50,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
50
50
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
51
51
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
52
52
  DatePercentile(),
53
+ Scale(),
53
54
  ]
54
55
  }
55
56
 
@@ -1,5 +1,6 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
+ from sklearn.preprocessing import robust_scale
3
4
 
4
5
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
5
6
 
@@ -111,3 +112,15 @@ class Freq(PandasOperand):
111
112
  def calculate_unary(self, data: pd.Series) -> pd.Series:
112
113
  value_counts = data.value_counts(normalize=True)
113
114
  return self._loc(data, value_counts)
115
+
116
+
117
+ class Scale(PandasOperand, VectorizableMixin):
118
+ name = "scale"
119
+ is_unary = True
120
+ output_type = "float"
121
+
122
+ def calculate_unary(self, data: pd.Series) -> pd.Series:
123
+ return pd.Series(robust_scale(data), index=data.index, name=data.name)
124
+
125
+ def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
126
+ return pd.DataFrame(robust_scale(data), index=data.index, columns=data.columns)
@@ -82,19 +82,19 @@ class DateTimeSearchKeyConverter:
82
82
  elif isinstance(df[self.date_column].values[0], datetime.date):
83
83
  df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
84
84
  elif is_period_dtype(df[self.date_column]):
85
- df[self.date_column] = df[self.date_column].dt.to_timestamp()
85
+ df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
86
86
  elif is_numeric_dtype(df[self.date_column]):
87
87
  # 315532801 - 2524608001 - seconds
88
88
  # 315532801000 - 2524608001000 - milliseconds
89
89
  # 315532801000000 - 2524608001000000 - microseconds
90
90
  # 315532801000000000 - 2524608001000000000 - nanoseconds
91
- if df[self.date_column].apply(lambda x: 10 ** 16 < x).all():
91
+ if df[self.date_column].apply(lambda x: 10**16 < x).all():
92
92
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
93
- elif df[self.date_column].apply(lambda x: 10 ** 14 < x < 10 ** 16).all():
93
+ elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
94
94
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
95
- elif df[self.date_column].apply(lambda x: 10 ** 11 < x < 10 ** 14).all():
95
+ elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
96
96
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
97
- elif df[self.date_column].apply(lambda x: 0 < x < 10 ** 11).all():
97
+ elif df[self.date_column].apply(lambda x: 0 < x < 10 * 11).all():
98
98
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
99
99
  else:
100
100
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
@@ -185,10 +185,7 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
185
185
  def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
186
186
  df = df.copy()
187
187
  seconds = "datetime_seconds"
188
- if is_period_dtype(df[date_col]):
189
- df[date_col] = df[date_col].dt.to_timestamp()
190
- else:
191
- df[date_col] = pd.to_datetime(df[date_col])
188
+ df[date_col] = pd.to_datetime(df[date_col])
192
189
  df[date_col] = df[date_col].dt.tz_localize(None)
193
190
  df[seconds] = (df[date_col] - df[date_col].dt.floor("D")).dt.seconds
194
191
 
@@ -251,9 +248,7 @@ def validate_dates_distribution(
251
248
  if col in search_keys:
252
249
  continue
253
250
  try:
254
- if is_period_dtype(X[col]):
255
- pass
256
- elif pd.__version__ >= "2.0.0":
251
+ if pd.__version__ >= "2.0.0":
257
252
  # Format mixed to avoid massive warnings
258
253
  pd.to_datetime(X[col], format="mixed")
259
254
  else:
@@ -266,9 +261,7 @@ def validate_dates_distribution(
266
261
  if maybe_date_col is None:
267
262
  return
268
263
 
269
- if is_period_dtype(X[maybe_date_col]):
270
- dates = X[maybe_date_col].dt.to_timestamp().dt.date
271
- elif pd.__version__ >= "2.0.0":
264
+ if pd.__version__ >= "2.0.0":
272
265
  dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
273
266
  else:
274
267
  dates = pd.to_datetime(X[maybe_date_col]).dt.date
@@ -1 +0,0 @@
1
- __version__ = "1.1.290"
File without changes
File without changes
File without changes