upgini 1.2.38a3769.dev2__py3-none-any.whl → 1.2.38a3769.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +5 -1
- upgini/utils/target_utils.py +5 -3
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev4.dist-info}/METADATA +1 -1
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev4.dist-info}/RECORD +7 -7
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev4.dist-info}/WHEEL +0 -0
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev4.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.38a3769.
|
|
1
|
+
__version__ = "1.2.38a3769.dev4"
|
upgini/features_enricher.py
CHANGED
|
@@ -932,6 +932,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
932
932
|
cat_features, search_keys_for_metrics = self._get_client_cat_features(
|
|
933
933
|
estimator, validated_X, self.search_keys
|
|
934
934
|
)
|
|
935
|
+
search_keys_for_metrics.extend([c for c in self.id_columns if c not in search_keys_for_metrics])
|
|
935
936
|
|
|
936
937
|
prepared_data = self._prepare_data_for_metrics(
|
|
937
938
|
trace_id=trace_id,
|
|
@@ -2656,7 +2657,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2656
2657
|
self.__adjust_cv(df)
|
|
2657
2658
|
|
|
2658
2659
|
if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
|
|
2659
|
-
|
|
2660
|
+
reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
|
|
2661
|
+
id_columns = [reverse_renaming[col] for col in self.id_columns if col in reverse_renaming]
|
|
2662
|
+
self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
|
|
2663
|
+
self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
|
|
2660
2664
|
|
|
2661
2665
|
df, fintech_warnings = remove_fintech_duplicates(
|
|
2662
2666
|
df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
|
upgini/utils/target_utils.py
CHANGED
|
@@ -295,6 +295,8 @@ def balance_undersample_time_series(
|
|
|
295
295
|
|
|
296
296
|
random_state = np.random.RandomState(random_state)
|
|
297
297
|
|
|
298
|
+
if not id_columns:
|
|
299
|
+
id_columns = [date_column]
|
|
298
300
|
ids_sort = df.groupby(id_columns)[date_column].aggregate(["max", "count"]).T.to_dict()
|
|
299
301
|
ids_sort = {
|
|
300
302
|
ensure_tuple(k): (
|
|
@@ -306,7 +308,7 @@ def balance_undersample_time_series(
|
|
|
306
308
|
id_counts.index = [ensure_tuple(i) for i in id_counts.index]
|
|
307
309
|
id_counts = id_counts.sort_index(key=lambda x: [ids_sort[y] for y in x], ascending=False).cumsum()
|
|
308
310
|
id_counts = id_counts[id_counts <= sample_size]
|
|
309
|
-
min_different_ids = int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio)
|
|
311
|
+
min_different_ids = max(int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio), 1)
|
|
310
312
|
|
|
311
313
|
def id_mask(sample_index: pd.Index) -> pd.Index:
|
|
312
314
|
if isinstance(sample_index, pd.MultiIndex):
|
|
@@ -317,10 +319,10 @@ def balance_undersample_time_series(
|
|
|
317
319
|
if len(id_counts) < min_different_ids:
|
|
318
320
|
if logger is not None:
|
|
319
321
|
logger.info(
|
|
320
|
-
f"Different ids count {len(id_counts)} is less than min different ids {min_different_ids}, sampling time window"
|
|
322
|
+
f"Different ids count {len(id_counts)} for sample size {sample_size} is less than min different ids {min_different_ids}, sampling time window"
|
|
321
323
|
)
|
|
322
324
|
date_counts = df.groupby(id_columns)[date_column].nunique().sort_values(ascending=False)
|
|
323
|
-
ids_to_sample = date_counts.index[:min_different_ids]
|
|
325
|
+
ids_to_sample = date_counts.index[:min_different_ids] if len(id_counts) > 0 else date_counts.index
|
|
324
326
|
mask = id_mask(ids_to_sample)
|
|
325
327
|
df = df[mask]
|
|
326
328
|
sample_date_counts = df[date_column].value_counts().sort_index(ascending=False).cumsum()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.38a3769.
|
|
3
|
+
Version: 1.2.38a3769.dev4
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=LD7zYM0Dg1LG9nVQ26_22PqIAzyHE6_ydgUMfXgIB6o,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=zYPSQ73ch6k5EWxZlh1KrjL0gMkmAwl7Nkgrz6zxywY,33161
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=ia40Oq2WZMVCfsTPmsjtMgsz0j8zOdoqlQxyW_-5NPA,195924
|
|
7
7
|
upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
|
|
@@ -56,10 +56,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
|
|
|
56
56
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
57
57
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
58
58
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
59
|
-
upgini/utils/target_utils.py,sha256=
|
|
59
|
+
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.38a3769.
|
|
63
|
-
upgini-1.2.38a3769.
|
|
64
|
-
upgini-1.2.38a3769.
|
|
65
|
-
upgini-1.2.38a3769.
|
|
62
|
+
upgini-1.2.38a3769.dev4.dist-info/METADATA,sha256=eJSDG0irBysdfvDWt44W70ePfymnLAdMnxbUam-IcBQ,48604
|
|
63
|
+
upgini-1.2.38a3769.dev4.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.38a3769.dev4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.38a3769.dev4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|