upgini 1.2.38a3769.dev3__py3-none-any.whl → 1.2.38a3769.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/dataset.py +4 -1
- upgini/features_enricher.py +7 -1
- upgini/utils/target_utils.py +2 -0
- {upgini-1.2.38a3769.dev3.dist-info → upgini-1.2.38a3769.dev5.dist-info}/METADATA +1 -1
- {upgini-1.2.38a3769.dev3.dist-info → upgini-1.2.38a3769.dev5.dist-info}/RECORD +8 -8
- {upgini-1.2.38a3769.dev3.dist-info → upgini-1.2.38a3769.dev5.dist-info}/WHEEL +0 -0
- {upgini-1.2.38a3769.dev3.dist-info → upgini-1.2.38a3769.dev5.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.38a3769.
|
|
1
|
+
__version__ = "1.2.38a3769.dev5"
|
upgini/dataset.py
CHANGED
|
@@ -77,6 +77,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
77
77
|
unnest_search_keys: Optional[Dict[str, str]] = None,
|
|
78
78
|
model_task_type: Optional[ModelTaskType] = None,
|
|
79
79
|
cv_type: Optional[CVType] = None,
|
|
80
|
+
id_columns: Optional[List[str]] = None,
|
|
80
81
|
random_state: Optional[int] = None,
|
|
81
82
|
rest_client: Optional[_RestClient] = None,
|
|
82
83
|
logger: Optional[logging.Logger] = None,
|
|
@@ -120,6 +121,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
120
121
|
self.random_state = random_state
|
|
121
122
|
self.columns_renaming: Dict[str, str] = {}
|
|
122
123
|
self.imbalanced: bool = False
|
|
124
|
+
self.id_columns = id_columns
|
|
123
125
|
if logger is not None:
|
|
124
126
|
self.logger = logger
|
|
125
127
|
else:
|
|
@@ -230,6 +232,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
230
232
|
target_column=target_column,
|
|
231
233
|
task_type=self.task_type,
|
|
232
234
|
cv_type=self.cv_type,
|
|
235
|
+
id_columns=self.id_columns,
|
|
233
236
|
random_state=self.random_state,
|
|
234
237
|
sample_size=self.FORCE_SAMPLE_SIZE,
|
|
235
238
|
logger=self.logger,
|
|
@@ -305,7 +308,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
305
308
|
if self.cv_type is not None and self.cv_type.is_time_series():
|
|
306
309
|
resampled_data = balance_undersample_time_series(
|
|
307
310
|
df=self.data,
|
|
308
|
-
id_columns=
|
|
311
|
+
id_columns=self.id_columns,
|
|
309
312
|
date_column=next(
|
|
310
313
|
k
|
|
311
314
|
for k, v in self.meaning_types.items()
|
upgini/features_enricher.py
CHANGED
|
@@ -932,6 +932,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
932
932
|
cat_features, search_keys_for_metrics = self._get_client_cat_features(
|
|
933
933
|
estimator, validated_X, self.search_keys
|
|
934
934
|
)
|
|
935
|
+
search_keys_for_metrics.extend([c for c in self.id_columns or [] if c not in search_keys_for_metrics])
|
|
935
936
|
|
|
936
937
|
prepared_data = self._prepare_data_for_metrics(
|
|
937
938
|
trace_id=trace_id,
|
|
@@ -2299,6 +2300,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2299
2300
|
meaning_types=meaning_types,
|
|
2300
2301
|
search_keys=combined_search_keys,
|
|
2301
2302
|
unnest_search_keys=unnest_search_keys,
|
|
2303
|
+
id_columns=self.id_columns,
|
|
2302
2304
|
date_format=self.date_format,
|
|
2303
2305
|
rest_client=self.rest_client,
|
|
2304
2306
|
logger=self.logger,
|
|
@@ -2656,7 +2658,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2656
2658
|
self.__adjust_cv(df)
|
|
2657
2659
|
|
|
2658
2660
|
if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
|
|
2659
|
-
|
|
2661
|
+
reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
|
|
2662
|
+
id_columns = [reverse_renaming[col] for col in self.id_columns if col in reverse_renaming]
|
|
2663
|
+
self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
|
|
2664
|
+
self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
|
|
2660
2665
|
|
|
2661
2666
|
df, fintech_warnings = remove_fintech_duplicates(
|
|
2662
2667
|
df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
|
|
@@ -2780,6 +2785,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2780
2785
|
unnest_search_keys=unnest_search_keys,
|
|
2781
2786
|
model_task_type=self.model_task_type,
|
|
2782
2787
|
cv_type=self.cv,
|
|
2788
|
+
id_columns=self.id_columns,
|
|
2783
2789
|
date_format=self.date_format,
|
|
2784
2790
|
random_state=self.random_state,
|
|
2785
2791
|
rest_client=self.rest_client,
|
upgini/utils/target_utils.py
CHANGED
|
@@ -295,6 +295,8 @@ def balance_undersample_time_series(
|
|
|
295
295
|
|
|
296
296
|
random_state = np.random.RandomState(random_state)
|
|
297
297
|
|
|
298
|
+
if not id_columns:
|
|
299
|
+
id_columns = [date_column]
|
|
298
300
|
ids_sort = df.groupby(id_columns)[date_column].aggregate(["max", "count"]).T.to_dict()
|
|
299
301
|
ids_sort = {
|
|
300
302
|
ensure_tuple(k): (
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.38a3769.
|
|
3
|
+
Version: 1.2.38a3769.dev5
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=bj9nQpQPQBgyZ975N_D4PWwEYJJKbsfpt1gs-e4tMio,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256
|
|
4
|
+
upgini/dataset.py,sha256=-3FeDMADnHxGb70rKFY_U96NCQO-TEUAXFicFl25CtY,33222
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=usNAM9eNFa19OeyPuMaaCp_4HMLeuYrkG4gBU6MwANg,196014
|
|
7
7
|
upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
|
|
@@ -56,10 +56,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
|
|
|
56
56
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
57
57
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
58
58
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
59
|
-
upgini/utils/target_utils.py,sha256=
|
|
59
|
+
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.38a3769.
|
|
63
|
-
upgini-1.2.38a3769.
|
|
64
|
-
upgini-1.2.38a3769.
|
|
65
|
-
upgini-1.2.38a3769.
|
|
62
|
+
upgini-1.2.38a3769.dev5.dist-info/METADATA,sha256=ldrVhkIorzNJE1GYHBfBQkXGi8upTruz5sa9s-DTld4,48604
|
|
63
|
+
upgini-1.2.38a3769.dev5.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.38a3769.dev5.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.38a3769.dev5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|