upgini 1.1.290__py3-none-any.whl → 1.1.290a3232.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.290"
1
+ __version__ = "1.1.290a3232-1"
@@ -4,7 +4,7 @@ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
4
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
5
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
6
6
  from upgini.autofe.operand import Operand
7
- from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
7
+ from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Scale, Sigmoid, Sqrt, Square
8
8
  from upgini.autofe.vector import Mean, Sum
9
9
 
10
10
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -50,6 +50,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
50
50
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
51
51
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
52
52
  DatePercentile(),
53
+ Scale(),
53
54
  ]
54
55
  }
55
56
 
upgini/autofe/unary.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
+ from sklearn.preprocessing import robust_scale
3
4
 
4
5
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
5
6
 
@@ -111,3 +112,15 @@ class Freq(PandasOperand):
111
112
  def calculate_unary(self, data: pd.Series) -> pd.Series:
112
113
  value_counts = data.value_counts(normalize=True)
113
114
  return self._loc(data, value_counts)
115
+
116
+
117
+ class Scale(PandasOperand, VectorizableMixin):
118
+ name = "scale"
119
+ is_unary = True
120
+ output_type = "float"
121
+
122
+ def calculate_unary(self, data: pd.Series) -> pd.Series:
123
+ return pd.Series(robust_scale(data), index=data.index, name=data.name)
124
+
125
+ def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
126
+ return pd.DataFrame(robust_scale(data), index=data.index, columns=data.columns)
@@ -82,19 +82,19 @@ class DateTimeSearchKeyConverter:
82
82
  elif isinstance(df[self.date_column].values[0], datetime.date):
83
83
  df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
84
84
  elif is_period_dtype(df[self.date_column]):
85
- df[self.date_column] = df[self.date_column].dt.to_timestamp()
85
+ df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
86
86
  elif is_numeric_dtype(df[self.date_column]):
87
87
  # 315532801 - 2524608001 - seconds
88
88
  # 315532801000 - 2524608001000 - milliseconds
89
89
  # 315532801000000 - 2524608001000000 - microseconds
90
90
  # 315532801000000000 - 2524608001000000000 - nanoseconds
91
- if df[self.date_column].apply(lambda x: 10 ** 16 < x).all():
91
+ if df[self.date_column].apply(lambda x: 10**16 < x).all():
92
92
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
93
- elif df[self.date_column].apply(lambda x: 10 ** 14 < x < 10 ** 16).all():
93
+ elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
94
94
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
95
- elif df[self.date_column].apply(lambda x: 10 ** 11 < x < 10 ** 14).all():
95
+ elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
96
96
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
97
- elif df[self.date_column].apply(lambda x: 0 < x < 10 ** 11).all():
97
+ elif df[self.date_column].apply(lambda x: 0 < x < 10 * 11).all():
98
98
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
99
99
  else:
100
100
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
@@ -185,10 +185,7 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
185
185
  def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
186
186
  df = df.copy()
187
187
  seconds = "datetime_seconds"
188
- if is_period_dtype(df[date_col]):
189
- df[date_col] = df[date_col].dt.to_timestamp()
190
- else:
191
- df[date_col] = pd.to_datetime(df[date_col])
188
+ df[date_col] = pd.to_datetime(df[date_col])
192
189
  df[date_col] = df[date_col].dt.tz_localize(None)
193
190
  df[seconds] = (df[date_col] - df[date_col].dt.floor("D")).dt.seconds
194
191
 
@@ -251,9 +248,7 @@ def validate_dates_distribution(
251
248
  if col in search_keys:
252
249
  continue
253
250
  try:
254
- if is_period_dtype(X[col]):
255
- pass
256
- elif pd.__version__ >= "2.0.0":
251
+ if pd.__version__ >= "2.0.0":
257
252
  # Format mixed to avoid massive warnings
258
253
  pd.to_datetime(X[col], format="mixed")
259
254
  else:
@@ -266,9 +261,7 @@ def validate_dates_distribution(
266
261
  if maybe_date_col is None:
267
262
  return
268
263
 
269
- if is_period_dtype(X[maybe_date_col]):
270
- dates = X[maybe_date_col].dt.to_timestamp().dt.date
271
- elif pd.__version__ >= "2.0.0":
264
+ if pd.__version__ >= "2.0.0":
272
265
  dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
273
266
  else:
274
267
  dates = pd.to_datetime(X[maybe_date_col]).dt.date
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.290
3
+ Version: 1.1.290a3232.post1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=14vdpqxzfW78z-avOfaHW_WatncVnDB1LrWtzjXSghM,24
1
+ upgini/__about__.py,sha256=SDsUMCDGdgycHBSyFToeEcF3TmvsEtCl-63CS7TpVBc,31
2
2
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
@@ -13,13 +13,13 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
13
13
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
14
14
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
15
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- upgini/autofe/all_operands.py,sha256=7UyvmmqGSqQu4kDgoFwQRKY__b9xKDk3Fpp2-H8A7AA,2399
16
+ upgini/autofe/all_operands.py,sha256=RHz7uLrcrcXeli8R4DhGZ2NhwGhNyWJhmSpFT_F3l5A,2423
17
17
  upgini/autofe/binary.py,sha256=441BRuqMsxlxuw4c8rMZB6h5EpRdVMk-bVa03U7T5Hg,3973
18
18
  upgini/autofe/date.py,sha256=qzk0NT332Q0vR1eRwTuNiMSrGE3ulh6Ic3QLBZqSdvw,7284
19
19
  upgini/autofe/feature.py,sha256=_V9B74B3ue7eAYXSOt9JKhVC9klkAKks22MwnBRye_w,12487
20
20
  upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
21
21
  upgini/autofe/operand.py,sha256=JjEVT1U3kY9NDjUPMdoki7Oa8hMDG0-_h_NklVjIFyc,2882
22
- upgini/autofe/unary.py,sha256=v-l3aiE5hj6kurvh6adCQL8W3X9u9a7RVbS_WPR2qlw,3146
22
+ upgini/autofe/unary.py,sha256=1EgesKM8M1Lm2Z5VrlgXj3aI0Z88hZnJDbuPaYJyyj4,3614
23
23
  upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
24
24
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  upgini/data_source/data_source_publisher.py,sha256=B4fJ1owDCF5ZZ0Ca9ywi_CXVt4iPvABh5BGTnXdXmHk,16635
@@ -41,7 +41,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
41
41
  upgini/utils/country_utils.py,sha256=yE8oRgMpXuJxPfQm4fioY6dg6700HgVnHSk4Cv9sUyM,6511
42
42
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
43
43
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
44
- upgini/utils/datetime_utils.py,sha256=Ujmu1ouwSFtG5SywQXJlmtDnGigAnIWPdE5Vx5NvgUM,10951
44
+ upgini/utils/datetime_utils.py,sha256=-LsDTThsGKsTZ57V1uNiHtLcoTtqktk5tui4WnqggJo,10673
45
45
  upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
46
46
  upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
47
47
  upgini/utils/email_utils.py,sha256=PLufTO97Pg9PPsNqB9agcM6M98MIxKUgIgNn2mVwSQ0,3520
@@ -56,7 +56,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
56
56
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
57
57
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
58
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
59
- upgini-1.1.290.dist-info/METADATA,sha256=hCW_dnkYOifgc0LJpt9mktkSzt5Y038uJLk8k_cdIQM,48117
60
- upgini-1.1.290.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
61
- upgini-1.1.290.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
62
- upgini-1.1.290.dist-info/RECORD,,
59
+ upgini-1.1.290a3232.post1.dist-info/METADATA,sha256=xp8PeULHddcPJBU_-5296G5KMf3gU03lRwqlYErPD2Q,48128
60
+ upgini-1.1.290a3232.post1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
61
+ upgini-1.1.290a3232.post1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
62
+ upgini-1.1.290a3232.post1.dist-info/RECORD,,