upgini 1.2.38a3769.dev2__py3-none-any.whl → 1.2.38a3769.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/utils/target_utils.py +3 -3
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev3.dist-info}/METADATA +1 -1
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev3.dist-info}/RECORD +6 -6
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev3.dist-info}/WHEEL +0 -0
- {upgini-1.2.38a3769.dev2.dist-info → upgini-1.2.38a3769.dev3.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.38a3769.
|
|
1
|
+
__version__ = "1.2.38a3769.dev3"
|
upgini/utils/target_utils.py
CHANGED
|
@@ -306,7 +306,7 @@ def balance_undersample_time_series(
|
|
|
306
306
|
id_counts.index = [ensure_tuple(i) for i in id_counts.index]
|
|
307
307
|
id_counts = id_counts.sort_index(key=lambda x: [ids_sort[y] for y in x], ascending=False).cumsum()
|
|
308
308
|
id_counts = id_counts[id_counts <= sample_size]
|
|
309
|
-
min_different_ids = int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio)
|
|
309
|
+
min_different_ids = max(int(len(df[id_columns].drop_duplicates()) * min_different_ids_ratio), 1)
|
|
310
310
|
|
|
311
311
|
def id_mask(sample_index: pd.Index) -> pd.Index:
|
|
312
312
|
if isinstance(sample_index, pd.MultiIndex):
|
|
@@ -317,10 +317,10 @@ def balance_undersample_time_series(
|
|
|
317
317
|
if len(id_counts) < min_different_ids:
|
|
318
318
|
if logger is not None:
|
|
319
319
|
logger.info(
|
|
320
|
-
f"Different ids count {len(id_counts)} is less than min different ids {min_different_ids}, sampling time window"
|
|
320
|
+
f"Different ids count {len(id_counts)} for sample size {sample_size} is less than min different ids {min_different_ids}, sampling time window"
|
|
321
321
|
)
|
|
322
322
|
date_counts = df.groupby(id_columns)[date_column].nunique().sort_values(ascending=False)
|
|
323
|
-
ids_to_sample = date_counts.index[:min_different_ids]
|
|
323
|
+
ids_to_sample = date_counts.index[:min_different_ids] if len(id_counts) > 0 else date_counts.index
|
|
324
324
|
mask = id_mask(ids_to_sample)
|
|
325
325
|
df = df[mask]
|
|
326
326
|
sample_date_counts = df[date_column].value_counts().sort_index(ascending=False).cumsum()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.38a3769.
|
|
3
|
+
Version: 1.2.38a3769.dev3
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=sQ7NNr0lfG3UfxCnX2sMNRntUVR0zW-NHhIgizLV7ls,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=zYPSQ73ch6k5EWxZlh1KrjL0gMkmAwl7Nkgrz6zxywY,33161
|
|
@@ -56,10 +56,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
|
|
|
56
56
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
57
57
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
58
58
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
59
|
-
upgini/utils/target_utils.py,sha256=
|
|
59
|
+
upgini/utils/target_utils.py,sha256=i_EsluRZG3LKrqv9NmhvEha9Uwp8JQjRUmokeo240Is,14283
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.38a3769.
|
|
63
|
-
upgini-1.2.38a3769.
|
|
64
|
-
upgini-1.2.38a3769.
|
|
65
|
-
upgini-1.2.38a3769.
|
|
62
|
+
upgini-1.2.38a3769.dev3.dist-info/METADATA,sha256=AeaVPfRIc-RCuzozwXSgurTpHXE21yR_tpsBjCra3KA,48604
|
|
63
|
+
upgini-1.2.38a3769.dev3.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.38a3769.dev3.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.38a3769.dev3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|