upgini 1.1.288a0__tar.gz → 1.1.290__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (64) hide show
  1. {upgini-1.1.288a0 → upgini-1.1.290}/PKG-INFO +1 -1
  2. upgini-1.1.290/src/upgini/__about__.py +1 -0
  3. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/date.py +17 -7
  4. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/datetime_utils.py +15 -8
  5. upgini-1.1.288a0/src/upgini/__about__.py +0 -1
  6. {upgini-1.1.288a0 → upgini-1.1.290}/.gitignore +0 -0
  7. {upgini-1.1.288a0 → upgini-1.1.290}/LICENSE +0 -0
  8. {upgini-1.1.288a0 → upgini-1.1.290}/README.md +0 -0
  9. {upgini-1.1.288a0 → upgini-1.1.290}/pyproject.toml +0 -0
  10. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/__init__.py +0 -0
  11. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/ads.py +0 -0
  12. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/all_operands.py +0 -0
  16. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/operand.py +0 -0
  20. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/unary.py +0 -0
  21. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/autofe/vector.py +0 -0
  22. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/dataset.py +0 -0
  25. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/errors.py +0 -0
  26. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/features_enricher.py +0 -0
  27. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/http.py +0 -0
  28. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/metadata.py +0 -0
  31. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/metrics.py +0 -0
  32. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/normalizer/phone_normalizer.py +0 -0
  34. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/search_task.py +0 -0
  43. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/spinner.py +0 -0
  44. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/deduplicate_utils.py +0 -0
  51. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/display_utils.py +0 -0
  52. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/email_utils.py +0 -0
  53. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/fallback_progress_bar.py +0 -0
  54. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/features_validator.py +0 -0
  55. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/format.py +0 -0
  56. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/ip_utils.py +0 -0
  57. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/phone_utils.py +0 -0
  58. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/postal_code_utils.py +0 -0
  59. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/progress_bar.py +0 -0
  60. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/sklearn_ext.py +0 -0
  61. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/target_utils.py +0 -0
  62. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/track_info.py +0 -0
  63. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/utils/warning_counter.py +0 -0
  64. {upgini-1.1.288a0 → upgini-1.1.290}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.288a0
3
+ Version: 1.1.290
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.290"
@@ -21,6 +21,20 @@ class DateDiffMixin(BaseModel):
21
21
 
22
22
  return pd.to_datetime(x, unit=unit)
23
23
 
24
+ def _convert_diff_to_unit(self, diff: Union[pd.Series, TimedeltaArray]) -> Union[pd.Series, TimedeltaArray]:
25
+ if self.diff_unit == "D":
26
+ if isinstance(diff, pd.Series) and diff.dtype == "object":
27
+ return diff.apply(lambda x: None if isinstance(x, float) and np.isnan(x) else x.days)
28
+ else:
29
+ return diff / np.timedelta64(1, self.diff_unit)
30
+ elif self.diff_unit == "Y":
31
+ if isinstance(diff, TimedeltaArray):
32
+ return (diff / 365 / 24 / 60 / 60 / 10**9).astype(int)
33
+ else:
34
+ return (diff / 365 / 24 / 60 / 60 / 10**9).dt.nanoseconds
35
+ else:
36
+ raise Exception(f"Unsupported difference unit: {self.diff_unit}")
37
+
24
38
 
25
39
  class DateDiff(PandasOperand, DateDiffMixin):
26
40
  name = "date_diff"
@@ -41,7 +55,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
41
55
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
42
56
  left = self._convert_to_date(left, self.left_unit)
43
57
  right = self._convert_to_date(right, self.right_unit)
44
- return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
58
+ diff = self._convert_diff_to_unit(left.dt.date - right.dt.date)
59
+ return self.__replace_negative(diff)
45
60
 
46
61
  def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
47
62
  x[x < 0] = None
@@ -107,12 +122,7 @@ class DateListDiff(PandasOperand, DateDiffMixin):
107
122
  return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
108
123
 
109
124
  def _diff(self, x: TimedeltaArray):
110
- if self.diff_unit == "Y":
111
- x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
112
- elif self.diff_unit == "M":
113
- raise Exception("Unsupported difference unit: Month")
114
- else:
115
- x = x / np.timedelta64(1, self.diff_unit)
125
+ x = self._convert_diff_to_unit(x)
116
126
  return x[x > 0]
117
127
 
118
128
  def _agg(self, x):
@@ -82,19 +82,19 @@ class DateTimeSearchKeyConverter:
82
82
  elif isinstance(df[self.date_column].values[0], datetime.date):
83
83
  df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
84
84
  elif is_period_dtype(df[self.date_column]):
85
- df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
85
+ df[self.date_column] = df[self.date_column].dt.to_timestamp()
86
86
  elif is_numeric_dtype(df[self.date_column]):
87
87
  # 315532801 - 2524608001 - seconds
88
88
  # 315532801000 - 2524608001000 - milliseconds
89
89
  # 315532801000000 - 2524608001000000 - microseconds
90
90
  # 315532801000000000 - 2524608001000000000 - nanoseconds
91
- if df[self.date_column].apply(lambda x: 10**16 < x).all():
91
+ if df[self.date_column].apply(lambda x: 10 ** 16 < x).all():
92
92
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
93
- elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
93
+ elif df[self.date_column].apply(lambda x: 10 ** 14 < x < 10 ** 16).all():
94
94
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
95
- elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
95
+ elif df[self.date_column].apply(lambda x: 10 ** 11 < x < 10 ** 14).all():
96
96
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
97
- elif df[self.date_column].apply(lambda x: 0 < x < 10 * 11).all():
97
+ elif df[self.date_column].apply(lambda x: 0 < x < 10 ** 11).all():
98
98
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
99
99
  else:
100
100
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
@@ -185,7 +185,10 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
185
185
  def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
186
186
  df = df.copy()
187
187
  seconds = "datetime_seconds"
188
- df[date_col] = pd.to_datetime(df[date_col])
188
+ if is_period_dtype(df[date_col]):
189
+ df[date_col] = df[date_col].dt.to_timestamp()
190
+ else:
191
+ df[date_col] = pd.to_datetime(df[date_col])
189
192
  df[date_col] = df[date_col].dt.tz_localize(None)
190
193
  df[seconds] = (df[date_col] - df[date_col].dt.floor("D")).dt.seconds
191
194
 
@@ -248,7 +251,9 @@ def validate_dates_distribution(
248
251
  if col in search_keys:
249
252
  continue
250
253
  try:
251
- if pd.__version__ >= "2.0.0":
254
+ if is_period_dtype(X[col]):
255
+ pass
256
+ elif pd.__version__ >= "2.0.0":
252
257
  # Format mixed to avoid massive warnings
253
258
  pd.to_datetime(X[col], format="mixed")
254
259
  else:
@@ -261,7 +266,9 @@ def validate_dates_distribution(
261
266
  if maybe_date_col is None:
262
267
  return
263
268
 
264
- if pd.__version__ >= "2.0.0":
269
+ if is_period_dtype(X[maybe_date_col]):
270
+ dates = X[maybe_date_col].dt.to_timestamp().dt.date
271
+ elif pd.__version__ >= "2.0.0":
265
272
  dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
266
273
  else:
267
274
  dates = pd.to_datetime(X[maybe_date_col]).dt.date
@@ -1 +0,0 @@
1
- __version__ = "1.1.288a0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes