upgini 1.2.29a1__tar.gz → 1.2.29a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (66) hide show
  1. {upgini-1.2.29a1 → upgini-1.2.29a3}/PKG-INFO +1 -1
  2. upgini-1.2.29a3/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/datetime_utils.py +42 -36
  4. upgini-1.2.29a1/src/upgini/__about__.py +0 -1
  5. {upgini-1.2.29a1 → upgini-1.2.29a3}/.gitignore +0 -0
  6. {upgini-1.2.29a1 → upgini-1.2.29a3}/LICENSE +0 -0
  7. {upgini-1.2.29a1 → upgini-1.2.29a3}/README.md +0 -0
  8. {upgini-1.2.29a1 → upgini-1.2.29a3}/pyproject.toml +0 -0
  9. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/__init__.py +0 -0
  10. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/ads.py +0 -0
  11. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/__init__.py +0 -0
  14. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/all_operands.py +0 -0
  15. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/binary.py +0 -0
  16. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/operand.py +0 -0
  20. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/unary.py +0 -0
  21. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/autofe/vector.py +0 -0
  22. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/dataset.py +0 -0
  25. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/errors.py +0 -0
  26. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/features_enricher.py +0 -0
  27. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/http.py +0 -0
  28. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/lazy_import.py +0 -0
  29. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/mdc/__init__.py +0 -0
  30. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/mdc/context.py +0 -0
  31. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/metadata.py +0 -0
  32. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/metrics.py +0 -0
  33. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/normalizer/__init__.py +0 -0
  34. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/normalizer/normalize_utils.py +0 -0
  35. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/resource_bundle/__init__.py +0 -0
  36. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/resource_bundle/exceptions.py +0 -0
  37. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/resource_bundle/strings.properties +0 -0
  38. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  46. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/__init__.py +0 -0
  47. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/base_search_key_detector.py +0 -0
  48. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/blocked_time_series.py +0 -0
  49. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/country_utils.py +0 -0
  50. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/custom_loss_utils.py +0 -0
  51. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/cv_utils.py +0 -0
  52. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/phone_utils.py +0 -0
  60. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/postal_code_utils.py +0 -0
  61. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/progress_bar.py +0 -0
  62. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/sklearn_ext.py +0 -0
  63. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/target_utils.py +0 -0
  64. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/track_info.py +0 -0
  65. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/utils/warning_counter.py +0 -0
  66. {upgini-1.2.29a1 → upgini-1.2.29a3}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.29a1
3
+ Version: 1.2.29a3
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.29a3"
@@ -109,54 +109,60 @@ class DateTimeSearchKeyConverter:
109
109
 
110
110
  df = self.clean_old_dates(df)
111
111
 
112
+ # Define function to apply sine and cosine transformations
113
+ def add_cyclical_features(df, column, period):
114
+ period_suffix = f"_{period}" if column != 'day_in_quarter' else ""
115
+ sin_feature = f"datetime_{column}_sin{period_suffix}"
116
+ cos_feature = f"datetime_{column}_cos{period_suffix}"
117
+ df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
118
+ df[cos_feature] = np.cos(2 * np.pi * df[column] / period)
119
+ self.generated_features.append(sin_feature)
120
+ self.generated_features.append(cos_feature)
121
+
122
+ df["quarter"] = df[self.date_column].dt.quarter
123
+
124
+ # Calculate the start date of the quarter for each timestamp
125
+ df["quarter_start"] = df[self.date_column].dt.to_period("Q").dt.start_time
126
+
127
+ # Calculate the day in the quarter
128
+ df["day_in_quarter"] = (df[self.date_column] - df["quarter_start"]).dt.days + 1
129
+
130
+ # Vectorized calculation of days_in_quarter
131
+ quarter = df["quarter"]
132
+ start = df["quarter_start"]
133
+ year = start.dt.year
134
+ month = start.dt.month
135
+
136
+ quarter_end_year = np.where(quarter == 4, year + 1, year)
137
+ quarter_end_month = np.where(quarter == 4, 1, month + 3)
138
+
139
+ end = pd.to_datetime({"year": quarter_end_year, "month": quarter_end_month, "day": 1})
140
+
141
+ df["days_in_quarter"] = (end - start).dt.days
142
+
143
+ add_cyclical_features(df, "day_in_quarter", df["days_in_quarter"]) # Days in the quarter
144
+
145
+ df.drop(columns=["quarter", "quarter_start", "day_in_quarter", "days_in_quarter"], inplace=True)
146
+
112
147
  df[seconds] = (df[self.date_column] - df[self.date_column].dt.floor("D")).dt.seconds
113
148
 
114
149
  seconds_without_na = df[seconds].dropna()
115
150
  if (seconds_without_na != 0).any() and seconds_without_na.nunique() > 1:
116
151
  self.logger.info("Time found in date search key. Add extra features based on time")
117
152
 
118
- # Extract time components
153
+ # Extract basic components
119
154
  df["second"] = df[self.date_column].dt.second
120
155
  df["minute"] = df[self.date_column].dt.minute
121
156
  df["hour"] = df[self.date_column].dt.hour
122
- df["day"] = df[self.date_column].dt.day
123
- df["month"] = df[self.date_column].dt.month
124
-
125
- # Get the actual number of days in each month
126
- df["days_in_month"] = df[self.date_column].dt.days_in_month
127
-
128
- # Define function to apply sine and cosine transformations
129
- def add_cyclical_features(df, column, period):
130
- sin_feature = f"datetime_{column}_sin_{period}"
131
- cos_feature = f"datetime_{column}_cos_{period}"
132
- df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
133
- df[cos_feature] = np.cos(2 * np.pi * df[column] / period)
134
- self.generated_features.append(sin_feature)
135
- self.generated_features.append(cos_feature)
136
-
137
- # Apply transformations using vectorized operations
157
+
158
+ # Apply cyclical transformations
138
159
  add_cyclical_features(df, "second", 60) # Seconds in a minute
139
160
  add_cyclical_features(df, "minute", 60) # Minutes in an hour
161
+ add_cyclical_features(df, "minute", 30) # Minutes in half an hour
140
162
  add_cyclical_features(df, "hour", 24) # Hours in a day
141
- add_cyclical_features(df, "day", df["days_in_month"]) # Days in the specific month
142
- add_cyclical_features(df, "month", 12) # Months in a year
143
-
144
- # Extract quarter information
145
- df["quarter"] = df[self.date_column].dt.quarter
146
-
147
- # Apply transformations for quarters
148
- add_cyclical_features(df, "quarter", 4) # Quarters in a year
149
- # seconds_in_day = 60 * 60 * 24
150
- # orders = [1, 2, 24, 48]
151
- # for order in orders:
152
- # sin_feature = f"datetime_time_sin_{order}"
153
- # cos_feature = f"datetime_time_cos_{order}"
154
- # df[sin_feature] = np.round(np.sin(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
155
- # df[cos_feature] = np.round(np.cos(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
156
- # self.generated_features.append(sin_feature)
157
- # self.generated_features.append(cos_feature)
158
-
159
- df.drop(columns=["second", "minute", "hour", "day", "month", "days_in_month", "quarter"])
163
+
164
+ # Drop intermediate columns if not needed
165
+ df.drop(columns=["second", "minute", "hour"], inplace=True)
160
166
 
161
167
  df.drop(columns=seconds, inplace=True)
162
168
 
@@ -1 +0,0 @@
1
- __version__ = "1.2.29a1"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes