upgini 1.2.133a1__tar.gz → 1.2.134__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (82) hide show
  1. {upgini-1.2.133a1 → upgini-1.2.134}/.gitignore +2 -1
  2. {upgini-1.2.133a1 → upgini-1.2.134}/PKG-INFO +2 -2
  3. upgini-1.2.134/src/upgini/__about__.py +1 -0
  4. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/features_enricher.py +2 -2
  5. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/datetime_utils.py +21 -18
  6. upgini-1.2.133a1/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.133a1 → upgini-1.2.134}/LICENSE +0 -0
  8. {upgini-1.2.133a1 → upgini-1.2.134}/README.md +0 -0
  9. {upgini-1.2.133a1 → upgini-1.2.134}/pyproject.toml +0 -0
  10. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/all_operators.py +0 -0
  16. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/operator.py +0 -0
  21. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/__init__.py +0 -0
  22. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/base.py +0 -0
  23. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/cross.py +0 -0
  24. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/delta.py +0 -0
  25. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/lag.py +0 -0
  26. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/roll.py +0 -0
  27. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/trend.py +0 -0
  28. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/timeseries/volatility.py +0 -0
  29. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/unary.py +0 -0
  30. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/utils.py +0 -0
  31. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/autofe/vector.py +0 -0
  32. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/data_source/__init__.py +0 -0
  33. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/data_source/data_source_publisher.py +0 -0
  34. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/dataset.py +0 -0
  35. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/errors.py +0 -0
  36. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/http.py +0 -0
  37. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/metadata.py +0 -0
  40. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/metrics.py +0 -0
  41. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/normalizer/normalize_utils.py +0 -0
  43. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/resource_bundle/__init__.py +0 -0
  44. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/resource_bundle/exceptions.py +0 -0
  45. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/resource_bundle/strings.properties +0 -0
  46. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  47. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/sampler/__init__.py +0 -0
  48. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/sampler/base.py +0 -0
  49. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/sampler/random_under_sampler.py +0 -0
  50. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/sampler/utils.py +0 -0
  51. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/search_task.py +0 -0
  52. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/spinner.py +0 -0
  53. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  54. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/__init__.py +0 -0
  55. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/base_search_key_detector.py +0 -0
  56. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/blocked_time_series.py +0 -0
  57. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/config.py +0 -0
  58. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/country_utils.py +0 -0
  59. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/custom_loss_utils.py +0 -0
  60. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/cv_utils.py +0 -0
  61. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/deduplicate_utils.py +0 -0
  62. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/display_utils.py +0 -0
  63. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/feature_info.py +0 -0
  66. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/phone_utils.py +0 -0
  72. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/postal_code_utils.py +0 -0
  73. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/psi.py +0 -0
  75. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/sample_utils.py +0 -0
  76. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/sklearn_ext.py +0 -0
  77. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.133a1 → upgini-1.2.134}/src/upgini/version_validator.py +0 -0
@@ -155,4 +155,5 @@ fingerprint.js
155
155
  envVars.txt
156
156
  .ruff_cache
157
157
  .jupyter
158
- *.excalidraw
158
+ *.excalidraw
159
+ .testmondata*
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: upgini
3
- Version: 1.2.133a1
3
+ Version: 1.2.134
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.134"
@@ -2938,7 +2938,7 @@ if response.status_code == 200:
2938
2938
  new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
2939
2939
 
2940
2940
  selected_generated_features = [
2941
- c for c in generated_features if not self.fit_select_features or c in self.feature_names_
2941
+ c for c in generated_features if c in self.feature_names_
2942
2942
  ]
2943
2943
  if keep_input is True:
2944
2944
  selected_input_columns = [
@@ -3245,7 +3245,7 @@ if response.status_code == 200:
3245
3245
  if fintech_warnings:
3246
3246
  for fintech_warning in fintech_warnings:
3247
3247
  self.__log_warning(fintech_warning)
3248
- df, full_duplicates_warning = clean_full_duplicates(df, self.logger, bundle=self.bundle)
3248
+ df, full_duplicates_warning = clean_full_duplicates(df, logger=self.logger, bundle=self.bundle)
3249
3249
  if full_duplicates_warning:
3250
3250
  if len(df) == 0:
3251
3251
  raise ValidationError(full_duplicates_warning)
@@ -84,30 +84,31 @@ class DateTimeConverter:
84
84
  return parsed is not None and not parsed.isna().all()
85
85
 
86
86
  def parse_datetime(self, df: pd.DataFrame, raise_errors=True) -> pd.Series | None:
87
- df = df.copy()
88
87
  if len(df) == 0 or df[self.date_column].isna().all():
89
88
  return None
90
89
 
90
+ date_col = df[self.date_column].copy()
91
+
91
92
  try:
92
- if df[self.date_column].apply(lambda x: isinstance(x, datetime.datetime)).all():
93
- parsed_datetime = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
94
- elif isinstance(df[self.date_column].dropna().values[0], datetime.date):
95
- parsed_datetime = pd.to_datetime(df[self.date_column], errors="coerce")
96
- elif isinstance(df[self.date_column].dtype, pd.PeriodDtype):
97
- parsed_datetime = df[self.date_column].dt.to_timestamp()
98
- elif is_numeric_dtype(df[self.date_column]):
93
+ if date_col.apply(lambda x: isinstance(x, datetime.datetime)).all():
94
+ parsed_datetime = date_col.apply(lambda x: x.replace(tzinfo=None))
95
+ elif isinstance(date_col.dropna().values[0], datetime.date):
96
+ parsed_datetime = pd.to_datetime(date_col, errors="coerce")
97
+ elif isinstance(date_col.dtype, pd.PeriodDtype):
98
+ parsed_datetime = date_col.dt.to_timestamp()
99
+ elif is_numeric_dtype(date_col):
99
100
  # 315532801 - 2524608001 - seconds
100
101
  # 315532801000 - 2524608001000 - milliseconds
101
102
  # 315532801000000 - 2524608001000000 - microseconds
102
103
  # 315532801000000000 - 2524608001000000000 - nanoseconds
103
- if df[self.date_column].apply(lambda x: 10**16 < x).all():
104
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="ns")
105
- elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
106
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="us")
107
- elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
108
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="ms")
109
- elif df[self.date_column].apply(lambda x: 10**8 < x < 10**11).all():
110
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="s")
104
+ if date_col.apply(lambda x: 10**16 < x).all():
105
+ parsed_datetime = pd.to_datetime(date_col, unit="ns")
106
+ elif date_col.apply(lambda x: 10**14 < x < 10**16).all():
107
+ parsed_datetime = pd.to_datetime(date_col, unit="us")
108
+ elif date_col.apply(lambda x: 10**11 < x < 10**14).all():
109
+ parsed_datetime = pd.to_datetime(date_col, unit="ms")
110
+ elif date_col.apply(lambda x: 10**8 < x < 10**11).all():
111
+ parsed_datetime = pd.to_datetime(date_col, unit="s")
111
112
  else:
112
113
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
113
114
  if raise_errors:
@@ -115,8 +116,10 @@ class DateTimeConverter:
115
116
  else:
116
117
  return None
117
118
  else:
118
- df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
119
- parsed_datetime = self.parse_string_date(df, raise_errors)
119
+ date_col = date_col.astype("string") # .apply(self.clean_date)
120
+ parsed_datetime = self.parse_string_date(date_col.to_frame(self.date_column), raise_errors)
121
+ if parsed_datetime.isna().all():
122
+ raise ValidationError(self.bundle.get("invalid_date_format").format(self.date_column))
120
123
  parsed_datetime = parsed_datetime.dt.tz_localize(None)
121
124
  return parsed_datetime
122
125
  except Exception as e:
@@ -1 +0,0 @@
1
- __version__ = "1.2.133a1"
File without changes
File without changes
File without changes
File without changes
File without changes