upgini 1.2.28__tar.gz → 1.2.29a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (66) hide show
  1. {upgini-1.2.28 → upgini-1.2.29a1}/PKG-INFO +1 -1
  2. upgini-1.2.29a1/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/resource_bundle/strings.properties +1 -1
  4. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/datetime_utils.py +41 -7
  5. upgini-1.2.28/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.28 → upgini-1.2.29a1}/.gitignore +0 -0
  7. {upgini-1.2.28 → upgini-1.2.29a1}/LICENSE +0 -0
  8. {upgini-1.2.28 → upgini-1.2.29a1}/README.md +0 -0
  9. {upgini-1.2.28 → upgini-1.2.29a1}/pyproject.toml +0 -0
  10. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/all_operands.py +0 -0
  16. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/unary.py +0 -0
  22. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/autofe/vector.py +0 -0
  23. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/data_source/__init__.py +0 -0
  24. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/data_source/data_source_publisher.py +0 -0
  25. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/dataset.py +0 -0
  26. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/errors.py +0 -0
  27. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/features_enricher.py +0 -0
  28. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/http.py +0 -0
  29. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/lazy_import.py +0 -0
  30. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/mdc/__init__.py +0 -0
  31. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/mdc/context.py +0 -0
  32. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/metadata.py +0 -0
  33. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/metrics.py +0 -0
  34. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/normalizer/__init__.py +0 -0
  35. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/normalizer/normalize_utils.py +0 -0
  36. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/resource_bundle/__init__.py +0 -0
  37. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/resource_bundle/exceptions.py +0 -0
  38. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  46. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/__init__.py +0 -0
  47. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/base_search_key_detector.py +0 -0
  48. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/blocked_time_series.py +0 -0
  49. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/country_utils.py +0 -0
  50. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/custom_loss_utils.py +0 -0
  51. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/cv_utils.py +0 -0
  52. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/phone_utils.py +0 -0
  60. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/postal_code_utils.py +0 -0
  61. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/progress_bar.py +0 -0
  62. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/sklearn_ext.py +0 -0
  63. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/target_utils.py +0 -0
  64. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/track_info.py +0 -0
  65. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/utils/warning_counter.py +0 -0
  66. {upgini-1.2.28 → upgini-1.2.29a1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.28
3
+ Version: 1.2.29a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.29a1"
@@ -82,7 +82,7 @@ unregistered_only_personal_keys=Only personal search keys used. Api_key from pro
82
82
  search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
83
83
  numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
84
84
  unsupported_search_key_type=Unsupported type of key in search_keys: {}
85
- unsupported_type_of_search_key=Unsupported type of search key: {}. It should be a member of SearcKey
85
+ unsupported_type_of_search_key=Unsupported type of search key: {}. It should be a member of SearchKey
86
86
  search_key_country_and_country_code=SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
87
87
  empty_search_key=Search key {} is empty. Please fill values or remove this search key
88
88
  single_constant_search_key=Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
@@ -114,16 +114,50 @@ class DateTimeSearchKeyConverter:
114
114
  seconds_without_na = df[seconds].dropna()
115
115
  if (seconds_without_na != 0).any() and seconds_without_na.nunique() > 1:
116
116
  self.logger.info("Time found in date search key. Add extra features based on time")
117
- seconds_in_day = 60 * 60 * 24
118
- orders = [1, 2, 24, 48]
119
- for order in orders:
120
- sin_feature = f"datetime_time_sin_{order}"
121
- cos_feature = f"datetime_time_cos_{order}"
122
- df[sin_feature] = np.round(np.sin(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
123
- df[cos_feature] = np.round(np.cos(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
117
+
118
+ # Extract time components
119
+ df["second"] = df[self.date_column].dt.second
120
+ df["minute"] = df[self.date_column].dt.minute
121
+ df["hour"] = df[self.date_column].dt.hour
122
+ df["day"] = df[self.date_column].dt.day
123
+ df["month"] = df[self.date_column].dt.month
124
+
125
+ # Get the actual number of days in each month
126
+ df["days_in_month"] = df[self.date_column].dt.days_in_month
127
+
128
+ # Define function to apply sine and cosine transformations
129
+ def add_cyclical_features(df, column, period):
130
+ sin_feature = f"datetime_{column}_sin_{period}"
131
+ cos_feature = f"datetime_{column}_cos_{period}"
132
+ df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
133
+ df[cos_feature] = np.cos(2 * np.pi * df[column] / period)
124
134
  self.generated_features.append(sin_feature)
125
135
  self.generated_features.append(cos_feature)
126
136
 
137
+ # Apply transformations using vectorized operations
138
+ add_cyclical_features(df, "second", 60) # Seconds in a minute
139
+ add_cyclical_features(df, "minute", 60) # Minutes in an hour
140
+ add_cyclical_features(df, "hour", 24) # Hours in a day
141
+ add_cyclical_features(df, "day", df["days_in_month"]) # Days in the specific month
142
+ add_cyclical_features(df, "month", 12) # Months in a year
143
+
144
+ # Extract quarter information
145
+ df["quarter"] = df[self.date_column].dt.quarter
146
+
147
+ # Apply transformations for quarters
148
+ add_cyclical_features(df, "quarter", 4) # Quarters in a year
149
+ # seconds_in_day = 60 * 60 * 24
150
+ # orders = [1, 2, 24, 48]
151
+ # for order in orders:
152
+ # sin_feature = f"datetime_time_sin_{order}"
153
+ # cos_feature = f"datetime_time_cos_{order}"
154
+ # df[sin_feature] = np.round(np.sin(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
155
+ # df[cos_feature] = np.round(np.cos(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
156
+ # self.generated_features.append(sin_feature)
157
+ # self.generated_features.append(cos_feature)
158
+
159
+ df.drop(columns=["second", "minute", "hour", "day", "month", "days_in_month", "quarter"])
160
+
127
161
  df.drop(columns=seconds, inplace=True)
128
162
 
129
163
  if keep_time:
@@ -1 +0,0 @@
1
- __version__ = "1.2.28"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes