upgini 1.2.81a3832.dev16__py3-none-any.whl → 1.2.81a3853.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/feature.py +1 -1
- upgini/autofe/timeseries/volatility.py +6 -4
- upgini/features_enricher.py +25 -20
- upgini/metrics.py +1 -0
- {upgini-1.2.81a3832.dev16.dist-info → upgini-1.2.81a3853.dev1.dist-info}/METADATA +1 -1
- {upgini-1.2.81a3832.dev16.dist-info → upgini-1.2.81a3853.dev1.dist-info}/RECORD +9 -9
- {upgini-1.2.81a3832.dev16.dist-info → upgini-1.2.81a3853.dev1.dist-info}/WHEEL +1 -1
- {upgini-1.2.81a3832.dev16.dist-info → upgini-1.2.81a3853.dev1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.81a3853.dev1"
|
upgini/autofe/feature.py
CHANGED
@@ -161,7 +161,7 @@ class Feature:
|
|
161
161
|
if self.cached_display_name is not None and cache:
|
162
162
|
return self.cached_display_name
|
163
163
|
|
164
|
-
should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary else False
|
164
|
+
should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary or self.op.is_vector else False
|
165
165
|
components = []
|
166
166
|
|
167
167
|
if self.alias:
|
@@ -60,12 +60,14 @@ class EWMAVolatility(VolatilityBase, ParametrizedOperator):
|
|
60
60
|
return res
|
61
61
|
|
62
62
|
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
63
|
-
return ts.apply(self._ewma_vol)
|
63
|
+
return ts.apply(self._ewma_vol).iloc[:, [-1]]
|
64
64
|
|
65
65
|
def _ewma_vol(self, x):
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
return_series = isinstance(x, pd.Series)
|
67
|
+
x = pd.DataFrame(x)
|
68
|
+
returns = self._get_returns(x.iloc[:, -1], f"{self.step_size}{self.step_unit}")
|
69
|
+
x.iloc[:, -1] = returns.ewm(span=self.window_size).std()
|
70
|
+
return x.iloc[:, -1] if return_series else x
|
69
71
|
|
70
72
|
|
71
73
|
class RollingVolBase(VolatilityBase):
|
upgini/features_enricher.py
CHANGED
@@ -453,6 +453,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
453
453
|
"""
|
454
454
|
trace_id = str(uuid.uuid4())
|
455
455
|
start_time = time.time()
|
456
|
+
auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
|
456
457
|
search_progress = SearchProgress(0.0, ProgressStage.START_FIT)
|
457
458
|
if progress_callback is not None:
|
458
459
|
progress_callback(search_progress)
|
@@ -607,6 +608,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
607
608
|
"""
|
608
609
|
|
609
610
|
self.warning_counter.reset()
|
611
|
+
auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
|
610
612
|
trace_id = str(uuid.uuid4())
|
611
613
|
start_time = time.time()
|
612
614
|
with MDC(trace_id=trace_id):
|
@@ -2379,6 +2381,25 @@ if response.status_code == 200:
|
|
2379
2381
|
df[columns_for_system_record_id], index=False
|
2380
2382
|
).astype("float64")
|
2381
2383
|
|
2384
|
+
features_not_to_pass = []
|
2385
|
+
if add_fit_system_record_id:
|
2386
|
+
df = self.__add_fit_system_record_id(
|
2387
|
+
df,
|
2388
|
+
search_keys,
|
2389
|
+
SYSTEM_RECORD_ID,
|
2390
|
+
TARGET,
|
2391
|
+
columns_renaming,
|
2392
|
+
silent=True,
|
2393
|
+
)
|
2394
|
+
df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
|
2395
|
+
features_not_to_pass.append(SORT_ID)
|
2396
|
+
|
2397
|
+
system_columns_with_original_index = [ENTITY_SYSTEM_RECORD_ID] + generated_features
|
2398
|
+
if add_fit_system_record_id:
|
2399
|
+
system_columns_with_original_index.append(SORT_ID)
|
2400
|
+
|
2401
|
+
df_before_explode = df[system_columns_with_original_index].copy()
|
2402
|
+
|
2382
2403
|
# Explode multiple search keys
|
2383
2404
|
df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys, columns_renaming)
|
2384
2405
|
|
@@ -2426,25 +2447,13 @@ if response.status_code == 200:
|
|
2426
2447
|
meaning_types.update({col: FileColumnMeaningType.FEATURE for col in features_for_transform})
|
2427
2448
|
meaning_types.update({col: key.value for col, key in search_keys.items()})
|
2428
2449
|
|
2429
|
-
features_not_to_pass
|
2450
|
+
features_not_to_pass.extend([
|
2430
2451
|
c
|
2431
2452
|
for c in df.columns
|
2432
2453
|
if c not in search_keys.keys()
|
2433
2454
|
and c not in features_for_transform
|
2434
2455
|
and c not in [ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]
|
2435
|
-
]
|
2436
|
-
|
2437
|
-
if add_fit_system_record_id:
|
2438
|
-
df = self.__add_fit_system_record_id(
|
2439
|
-
df,
|
2440
|
-
search_keys,
|
2441
|
-
SYSTEM_RECORD_ID,
|
2442
|
-
TARGET,
|
2443
|
-
columns_renaming,
|
2444
|
-
silent=True,
|
2445
|
-
)
|
2446
|
-
df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
|
2447
|
-
features_not_to_pass.append(SORT_ID)
|
2456
|
+
])
|
2448
2457
|
|
2449
2458
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
2450
2459
|
df = df.drop(columns=DateTimeSearchKeyConverter.DATETIME_COL)
|
@@ -2460,10 +2469,6 @@ if response.status_code == 200:
|
|
2460
2469
|
meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
|
2461
2470
|
|
2462
2471
|
df = df.reset_index(drop=True)
|
2463
|
-
system_columns_with_original_index = [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID] + generated_features
|
2464
|
-
if add_fit_system_record_id:
|
2465
|
-
system_columns_with_original_index.append(SORT_ID)
|
2466
|
-
df_with_original_index = df[system_columns_with_original_index].copy()
|
2467
2472
|
|
2468
2473
|
combined_search_keys = combine_search_keys(search_keys.keys())
|
2469
2474
|
|
@@ -2571,7 +2576,7 @@ if response.status_code == 200:
|
|
2571
2576
|
combined_df = pd.concat(
|
2572
2577
|
[
|
2573
2578
|
validated_Xy.reset_index(drop=True),
|
2574
|
-
|
2579
|
+
df_before_explode.reset_index(drop=True),
|
2575
2580
|
],
|
2576
2581
|
axis=1,
|
2577
2582
|
).set_index(validated_Xy.index)
|
@@ -2728,7 +2733,7 @@ if response.status_code == 200:
|
|
2728
2733
|
importance_threshold: Optional[float],
|
2729
2734
|
max_features: Optional[int],
|
2730
2735
|
remove_outliers_calc_metrics: Optional[bool],
|
2731
|
-
auto_fe_parameters:
|
2736
|
+
auto_fe_parameters: AutoFEParameters,
|
2732
2737
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
2733
2738
|
search_id_callback: Optional[Callable[[str], Any]] = None,
|
2734
2739
|
):
|
upgini/metrics.py
CHANGED
@@ -368,6 +368,7 @@ class EstimatorWrapper:
|
|
368
368
|
self.converted_to_str = []
|
369
369
|
self.converted_to_numeric = []
|
370
370
|
for c in x.columns:
|
371
|
+
|
371
372
|
if _get_unique_count(x[c]) < 2:
|
372
373
|
self.logger.warning(f"Remove feature {c} because it has less than 2 unique values")
|
373
374
|
if c in self.cat_features:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.81a3853.dev1
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=lTpfELfQnKZizte-HRZ9wE8F3tEopfpe9cTE8ZbJymk,33
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=cWbEA2lOt51x62NrLkyxu1G8I4KQo_2aOgqt3Ypyr1M,212819
|
7
7
|
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
8
|
upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=3cip0_L6-OFew74KsRwzxJDU6UFq05h2v7IsyHLcMRc,43164
|
10
10
|
upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -16,7 +16,7 @@ upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
|
17
17
|
upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
|
18
18
|
upgini/autofe/date.py,sha256=MM1S-6imNSzCDOhbNnmsc_bwSqUWBcS8vWAdHF8j1kY,11134
|
19
|
-
upgini/autofe/feature.py,sha256=
|
19
|
+
upgini/autofe/feature.py,sha256=HYg6ngZXp9t-mzQN5KFCXJ8_EGGTg8TwTuw1W_uhzqI,15326
|
20
20
|
upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
|
21
21
|
upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
|
22
22
|
upgini/autofe/unary.py,sha256=Sx11IoHRh5nwyALzjgG9GQOrVNIs8NZ1JzunAJuN66A,5731
|
@@ -29,7 +29,7 @@ upgini/autofe/timeseries/delta.py,sha256=h0YhmI1TlPJnjwFpN_GQxLb6r59DQuucnG5tQAX
|
|
29
29
|
upgini/autofe/timeseries/lag.py,sha256=LfQtg484vuqM0mgY4Wft1swHX_Srq7OKKgZswCXoiXI,1882
|
30
30
|
upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_aCHg,2794
|
31
31
|
upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
|
32
|
-
upgini/autofe/timeseries/volatility.py,sha256=
|
32
|
+
upgini/autofe/timeseries/volatility.py,sha256=SvZfhM_ZAWCNpTf87WjSnZsnlblARgruDlu4By4Zvhc,8078
|
33
33
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
34
|
upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
|
35
35
|
upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.81a3853.dev1.dist-info/METADATA,sha256=0X9NO-JymZWgNv0-YsNoj_cVfSxsIZjI_IurJLrKR20,49172
|
74
|
+
upgini-1.2.81a3853.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
75
|
+
upgini-1.2.81a3853.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.81a3853.dev1.dist-info/RECORD,,
|
File without changes
|