upgini 1.2.38a3769.dev1__py3-none-any.whl → 1.2.38a3769.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/dataset.py +1 -0
- upgini/features_enricher.py +3 -2
- upgini/utils/target_utils.py +11 -1
- {upgini-1.2.38a3769.dev1.dist-info → upgini-1.2.38a3769.dev2.dist-info}/METADATA +1 -1
- {upgini-1.2.38a3769.dev1.dist-info → upgini-1.2.38a3769.dev2.dist-info}/RECORD +8 -8
- {upgini-1.2.38a3769.dev1.dist-info → upgini-1.2.38a3769.dev2.dist-info}/WHEEL +0 -0
- {upgini-1.2.38a3769.dev1.dist-info → upgini-1.2.38a3769.dev2.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.38a3769.
|
|
1
|
+
__version__ = "1.2.38a3769.dev2"
|
upgini/dataset.py
CHANGED
upgini/features_enricher.py
CHANGED
|
@@ -281,8 +281,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
281
281
|
|
|
282
282
|
self.search_keys = search_keys or {}
|
|
283
283
|
self.id_columns = id_columns
|
|
284
|
-
if id_columns is not None:
|
|
285
|
-
self.search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
|
|
286
284
|
self.country_code = country_code
|
|
287
285
|
self.__validate_search_keys(search_keys, search_id)
|
|
288
286
|
|
|
@@ -2657,6 +2655,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2657
2655
|
|
|
2658
2656
|
self.__adjust_cv(df)
|
|
2659
2657
|
|
|
2658
|
+
if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
|
|
2659
|
+
self.search_keys.update({col: SearchKey.CUSTOM_KEY for col in self.id_columns})
|
|
2660
|
+
|
|
2660
2661
|
df, fintech_warnings = remove_fintech_duplicates(
|
|
2661
2662
|
df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
|
|
2662
2663
|
)
|
upgini/utils/target_utils.py
CHANGED
|
@@ -246,6 +246,7 @@ def balance_undersample_forced(
|
|
|
246
246
|
id_columns=id_columns,
|
|
247
247
|
date_column=date_column,
|
|
248
248
|
sample_size=sample_size,
|
|
249
|
+
random_state=random_state,
|
|
249
250
|
logger=logger,
|
|
250
251
|
)
|
|
251
252
|
elif task_type in [ModelTaskType.MULTICLASS, ModelTaskType.REGRESSION]:
|
|
@@ -284,14 +285,23 @@ def balance_undersample_time_series(
|
|
|
284
285
|
id_columns: List[str],
|
|
285
286
|
date_column: str,
|
|
286
287
|
sample_size: int,
|
|
288
|
+
random_state: int = 42,
|
|
287
289
|
min_different_ids_ratio: float = TS_MIN_DIFFERENT_IDS_RATIO,
|
|
290
|
+
prefer_recent_dates: bool = True,
|
|
288
291
|
logger: Optional[logging.Logger] = None,
|
|
289
292
|
):
|
|
290
293
|
def ensure_tuple(x):
|
|
291
294
|
return tuple([x]) if not isinstance(x, tuple) else x
|
|
292
295
|
|
|
296
|
+
random_state = np.random.RandomState(random_state)
|
|
297
|
+
|
|
293
298
|
ids_sort = df.groupby(id_columns)[date_column].aggregate(["max", "count"]).T.to_dict()
|
|
294
|
-
ids_sort = {
|
|
299
|
+
ids_sort = {
|
|
300
|
+
ensure_tuple(k): (
|
|
301
|
+
(v["max"], v["count"], random_state.rand()) if prefer_recent_dates else (v["count"], random_state.rand())
|
|
302
|
+
)
|
|
303
|
+
for k, v in ids_sort.items()
|
|
304
|
+
}
|
|
295
305
|
id_counts = df[id_columns].value_counts()
|
|
296
306
|
id_counts.index = [ensure_tuple(i) for i in id_counts.index]
|
|
297
307
|
id_counts = id_counts.sort_index(key=lambda x: [ids_sort[y] for y in x], ascending=False).cumsum()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.38a3769.
|
|
3
|
+
Version: 1.2.38a3769.dev2
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=Pi7t905kMODW13sjgsQdvH0UCQF5An8AOCl5peng2eA,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=zYPSQ73ch6k5EWxZlh1KrjL0gMkmAwl7Nkgrz6zxywY,33161
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=m7z3iWSEj0ORUVnp65I0b_427SITjNnBvn8hdebS_xE,195541
|
|
7
7
|
upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
|
|
@@ -56,10 +56,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
|
|
|
56
56
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
57
57
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
58
58
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
59
|
-
upgini/utils/target_utils.py,sha256=
|
|
59
|
+
upgini/utils/target_utils.py,sha256=AosrXCtfbNEUbc7fzyt1udKIZ-vvt-ngwx9SkOclWWQ,14200
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.38a3769.
|
|
63
|
-
upgini-1.2.38a3769.
|
|
64
|
-
upgini-1.2.38a3769.
|
|
65
|
-
upgini-1.2.38a3769.
|
|
62
|
+
upgini-1.2.38a3769.dev2.dist-info/METADATA,sha256=CifOyMPTP6xdb1WljZK3BuIb5FomL6xgRGbrq1sC_04,48604
|
|
63
|
+
upgini-1.2.38a3769.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.38a3769.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.38a3769.dev2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|