upgini 1.2.81a3832.dev16__py3-none-any.whl → 1.2.81a3853.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.81a3832.dev16"
1
+ __version__ = "1.2.81a3853.dev1"
upgini/autofe/feature.py CHANGED
@@ -161,7 +161,7 @@ class Feature:
161
161
  if self.cached_display_name is not None and cache:
162
162
  return self.cached_display_name
163
163
 
164
- should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary else False
164
+ should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary or self.op.is_vector else False
165
165
  components = []
166
166
 
167
167
  if self.alias:
@@ -60,12 +60,14 @@ class EWMAVolatility(VolatilityBase, ParametrizedOperator):
60
60
  return res
61
61
 
62
62
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
63
- return ts.apply(self._ewma_vol)
63
+ return ts.apply(self._ewma_vol).iloc[:, [-1]]
64
64
 
65
65
  def _ewma_vol(self, x):
66
- x = pd.DataFrame(x).iloc[:, -1]
67
- returns = self._get_returns(x, f"{self.step_size}{self.step_unit}")
68
- return returns.ewm(span=self.window_size).std()
66
+ return_series = isinstance(x, pd.Series)
67
+ x = pd.DataFrame(x)
68
+ returns = self._get_returns(x.iloc[:, -1], f"{self.step_size}{self.step_unit}")
69
+ x.iloc[:, -1] = returns.ewm(span=self.window_size).std()
70
+ return x.iloc[:, -1] if return_series else x
69
71
 
70
72
 
71
73
  class RollingVolBase(VolatilityBase):
@@ -453,6 +453,7 @@ class FeaturesEnricher(TransformerMixin):
453
453
  """
454
454
  trace_id = str(uuid.uuid4())
455
455
  start_time = time.time()
456
+ auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
456
457
  search_progress = SearchProgress(0.0, ProgressStage.START_FIT)
457
458
  if progress_callback is not None:
458
459
  progress_callback(search_progress)
@@ -607,6 +608,7 @@ class FeaturesEnricher(TransformerMixin):
607
608
  """
608
609
 
609
610
  self.warning_counter.reset()
611
+ auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
610
612
  trace_id = str(uuid.uuid4())
611
613
  start_time = time.time()
612
614
  with MDC(trace_id=trace_id):
@@ -2379,6 +2381,25 @@ if response.status_code == 200:
2379
2381
  df[columns_for_system_record_id], index=False
2380
2382
  ).astype("float64")
2381
2383
 
2384
+ features_not_to_pass = []
2385
+ if add_fit_system_record_id:
2386
+ df = self.__add_fit_system_record_id(
2387
+ df,
2388
+ search_keys,
2389
+ SYSTEM_RECORD_ID,
2390
+ TARGET,
2391
+ columns_renaming,
2392
+ silent=True,
2393
+ )
2394
+ df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
2395
+ features_not_to_pass.append(SORT_ID)
2396
+
2397
+ system_columns_with_original_index = [ENTITY_SYSTEM_RECORD_ID] + generated_features
2398
+ if add_fit_system_record_id:
2399
+ system_columns_with_original_index.append(SORT_ID)
2400
+
2401
+ df_before_explode = df[system_columns_with_original_index].copy()
2402
+
2382
2403
  # Explode multiple search keys
2383
2404
  df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys, columns_renaming)
2384
2405
 
@@ -2426,25 +2447,13 @@ if response.status_code == 200:
2426
2447
  meaning_types.update({col: FileColumnMeaningType.FEATURE for col in features_for_transform})
2427
2448
  meaning_types.update({col: key.value for col, key in search_keys.items()})
2428
2449
 
2429
- features_not_to_pass = [
2450
+ features_not_to_pass.extend([
2430
2451
  c
2431
2452
  for c in df.columns
2432
2453
  if c not in search_keys.keys()
2433
2454
  and c not in features_for_transform
2434
2455
  and c not in [ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]
2435
- ]
2436
-
2437
- if add_fit_system_record_id:
2438
- df = self.__add_fit_system_record_id(
2439
- df,
2440
- search_keys,
2441
- SYSTEM_RECORD_ID,
2442
- TARGET,
2443
- columns_renaming,
2444
- silent=True,
2445
- )
2446
- df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
2447
- features_not_to_pass.append(SORT_ID)
2456
+ ])
2448
2457
 
2449
2458
  if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
2450
2459
  df = df.drop(columns=DateTimeSearchKeyConverter.DATETIME_COL)
@@ -2460,10 +2469,6 @@ if response.status_code == 200:
2460
2469
  meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
2461
2470
 
2462
2471
  df = df.reset_index(drop=True)
2463
- system_columns_with_original_index = [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID] + generated_features
2464
- if add_fit_system_record_id:
2465
- system_columns_with_original_index.append(SORT_ID)
2466
- df_with_original_index = df[system_columns_with_original_index].copy()
2467
2472
 
2468
2473
  combined_search_keys = combine_search_keys(search_keys.keys())
2469
2474
 
@@ -2571,7 +2576,7 @@ if response.status_code == 200:
2571
2576
  combined_df = pd.concat(
2572
2577
  [
2573
2578
  validated_Xy.reset_index(drop=True),
2574
- df_with_original_index.reset_index(drop=True),
2579
+ df_before_explode.reset_index(drop=True),
2575
2580
  ],
2576
2581
  axis=1,
2577
2582
  ).set_index(validated_Xy.index)
@@ -2728,7 +2733,7 @@ if response.status_code == 200:
2728
2733
  importance_threshold: Optional[float],
2729
2734
  max_features: Optional[int],
2730
2735
  remove_outliers_calc_metrics: Optional[bool],
2731
- auto_fe_parameters: Optional[AutoFEParameters] = None,
2736
+ auto_fe_parameters: AutoFEParameters,
2732
2737
  progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
2733
2738
  search_id_callback: Optional[Callable[[str], Any]] = None,
2734
2739
  ):
upgini/metrics.py CHANGED
@@ -368,6 +368,7 @@ class EstimatorWrapper:
368
368
  self.converted_to_str = []
369
369
  self.converted_to_numeric = []
370
370
  for c in x.columns:
371
+
371
372
  if _get_unique_count(x[c]) < 2:
372
373
  self.logger.warning(f"Remove feature {c} because it has less than 2 unique values")
373
374
  if c in self.cat_features:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.81a3832.dev16
3
+ Version: 1.2.81a3853.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=Wtqf6woIkVhwgW_fiUm7UjHlXkCw_59KumFHUwoY2rU,34
1
+ upgini/__about__.py,sha256=lTpfELfQnKZizte-HRZ9wE8F3tEopfpe9cTE8ZbJymk,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=AGF2u2mbFL4KIdqZECiSkGuMhfluamJOveqhYnkKfQM,212614
6
+ upgini/features_enricher.py,sha256=cWbEA2lOt51x62NrLkyxu1G8I4KQo_2aOgqt3Ypyr1M,212819
7
7
  upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
8
8
  upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
9
- upgini/metrics.py,sha256=nVt4zJKt7y1xD1ga9698QKlJQfXv93lARjUMC1E1_U4,43163
9
+ upgini/metrics.py,sha256=3cip0_L6-OFew74KsRwzxJDU6UFq05h2v7IsyHLcMRc,43164
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -16,7 +16,7 @@ upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
17
17
  upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
18
18
  upgini/autofe/date.py,sha256=MM1S-6imNSzCDOhbNnmsc_bwSqUWBcS8vWAdHF8j1kY,11134
19
- upgini/autofe/feature.py,sha256=G_YgnsauIoaMgByx9JXDPiKc4nqs0pwWZUfvoIGMKxY,15305
19
+ upgini/autofe/feature.py,sha256=HYg6ngZXp9t-mzQN5KFCXJ8_EGGTg8TwTuw1W_uhzqI,15326
20
20
  upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
21
21
  upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
22
22
  upgini/autofe/unary.py,sha256=Sx11IoHRh5nwyALzjgG9GQOrVNIs8NZ1JzunAJuN66A,5731
@@ -29,7 +29,7 @@ upgini/autofe/timeseries/delta.py,sha256=h0YhmI1TlPJnjwFpN_GQxLb6r59DQuucnG5tQAX
29
29
  upgini/autofe/timeseries/lag.py,sha256=LfQtg484vuqM0mgY4Wft1swHX_Srq7OKKgZswCXoiXI,1882
30
30
  upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_aCHg,2794
31
31
  upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
32
- upgini/autofe/timeseries/volatility.py,sha256=9shUmIKjpWTHVYjj80YBsk0XheBJ9uBuLv5NW9Mchnk,7953
32
+ upgini/autofe/timeseries/volatility.py,sha256=SvZfhM_ZAWCNpTf87WjSnZsnlblARgruDlu4By4Zvhc,8078
33
33
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
35
35
  upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.81a3832.dev16.dist-info/METADATA,sha256=jE8_Lb2S-QjU7tcbGPQcoxiCCu-AcuMyHpoxW3KEvv0,49173
74
- upgini-1.2.81a3832.dev16.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.81a3832.dev16.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.81a3832.dev16.dist-info/RECORD,,
73
+ upgini-1.2.81a3853.dev1.dist-info/METADATA,sha256=0X9NO-JymZWgNv0-YsNoj_cVfSxsIZjI_IurJLrKR20,49172
74
+ upgini-1.2.81a3853.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
+ upgini-1.2.81a3853.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.81a3853.dev1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.24.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any