upgini 1.2.38a3769.dev4__py3-none-any.whl → 1.2.38a3769.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/dataset.py +4 -1
- upgini/features_enricher.py +8 -3
- {upgini-1.2.38a3769.dev4.dist-info → upgini-1.2.38a3769.dev6.dist-info}/METADATA +1 -1
- {upgini-1.2.38a3769.dev4.dist-info → upgini-1.2.38a3769.dev6.dist-info}/RECORD +7 -7
- {upgini-1.2.38a3769.dev4.dist-info → upgini-1.2.38a3769.dev6.dist-info}/WHEEL +0 -0
- {upgini-1.2.38a3769.dev4.dist-info → upgini-1.2.38a3769.dev6.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.38a3769.
|
|
1
|
+
__version__ = "1.2.38a3769.dev6"
|
upgini/dataset.py
CHANGED
|
@@ -77,6 +77,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
77
77
|
unnest_search_keys: Optional[Dict[str, str]] = None,
|
|
78
78
|
model_task_type: Optional[ModelTaskType] = None,
|
|
79
79
|
cv_type: Optional[CVType] = None,
|
|
80
|
+
id_columns: Optional[List[str]] = None,
|
|
80
81
|
random_state: Optional[int] = None,
|
|
81
82
|
rest_client: Optional[_RestClient] = None,
|
|
82
83
|
logger: Optional[logging.Logger] = None,
|
|
@@ -120,6 +121,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
120
121
|
self.random_state = random_state
|
|
121
122
|
self.columns_renaming: Dict[str, str] = {}
|
|
122
123
|
self.imbalanced: bool = False
|
|
124
|
+
self.id_columns = id_columns
|
|
123
125
|
if logger is not None:
|
|
124
126
|
self.logger = logger
|
|
125
127
|
else:
|
|
@@ -230,6 +232,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
230
232
|
target_column=target_column,
|
|
231
233
|
task_type=self.task_type,
|
|
232
234
|
cv_type=self.cv_type,
|
|
235
|
+
id_columns=self.id_columns,
|
|
233
236
|
random_state=self.random_state,
|
|
234
237
|
sample_size=self.FORCE_SAMPLE_SIZE,
|
|
235
238
|
logger=self.logger,
|
|
@@ -305,7 +308,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
305
308
|
if self.cv_type is not None and self.cv_type.is_time_series():
|
|
306
309
|
resampled_data = balance_undersample_time_series(
|
|
307
310
|
df=self.data,
|
|
308
|
-
id_columns=
|
|
311
|
+
id_columns=self.id_columns,
|
|
309
312
|
date_column=next(
|
|
310
313
|
k
|
|
311
314
|
for k, v in self.meaning_types.items()
|
upgini/features_enricher.py
CHANGED
|
@@ -932,7 +932,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
932
932
|
cat_features, search_keys_for_metrics = self._get_client_cat_features(
|
|
933
933
|
estimator, validated_X, self.search_keys
|
|
934
934
|
)
|
|
935
|
-
search_keys_for_metrics.extend([c for c in self.id_columns if c not in search_keys_for_metrics])
|
|
935
|
+
search_keys_for_metrics.extend([c for c in self.id_columns or [] if c not in search_keys_for_metrics])
|
|
936
936
|
|
|
937
937
|
prepared_data = self._prepare_data_for_metrics(
|
|
938
938
|
trace_id=trace_id,
|
|
@@ -2300,6 +2300,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2300
2300
|
meaning_types=meaning_types,
|
|
2301
2301
|
search_keys=combined_search_keys,
|
|
2302
2302
|
unnest_search_keys=unnest_search_keys,
|
|
2303
|
+
id_columns=self.__get_renamed_id_columns(),
|
|
2303
2304
|
date_format=self.date_format,
|
|
2304
2305
|
rest_client=self.rest_client,
|
|
2305
2306
|
logger=self.logger,
|
|
@@ -2657,8 +2658,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2657
2658
|
self.__adjust_cv(df)
|
|
2658
2659
|
|
|
2659
2660
|
if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
|
|
2660
|
-
|
|
2661
|
-
id_columns = [reverse_renaming[col] for col in self.id_columns if col in reverse_renaming]
|
|
2661
|
+
id_columns = self.__get_renamed_id_columns()
|
|
2662
2662
|
self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
|
|
2663
2663
|
self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
|
|
2664
2664
|
|
|
@@ -2784,6 +2784,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2784
2784
|
unnest_search_keys=unnest_search_keys,
|
|
2785
2785
|
model_task_type=self.model_task_type,
|
|
2786
2786
|
cv_type=self.cv,
|
|
2787
|
+
id_columns=self.__get_renamed_id_columns(),
|
|
2787
2788
|
date_format=self.date_format,
|
|
2788
2789
|
random_state=self.random_state,
|
|
2789
2790
|
rest_client=self.rest_client,
|
|
@@ -2943,6 +2944,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2943
2944
|
def __should_add_date_column(self):
|
|
2944
2945
|
return self.add_date_if_missing or (self.cv is not None and self.cv.is_time_series())
|
|
2945
2946
|
|
|
2947
|
+
def __get_renamed_id_columns(self):
|
|
2948
|
+
reverse_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
|
|
2949
|
+
return [reverse_renaming.get(c) or c for c in self.id_columns]
|
|
2950
|
+
|
|
2946
2951
|
def __adjust_cv(self, df: pd.DataFrame):
|
|
2947
2952
|
date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
|
|
2948
2953
|
# Check Multivariate time series
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.38a3769.
|
|
3
|
+
Version: 1.2.38a3769.dev6
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=BkueeegvmmuEH9qyBrb5l1xILgdxX8JrEA3EC3ANEDk,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256
|
|
4
|
+
upgini/dataset.py,sha256=-3FeDMADnHxGb70rKFY_U96NCQO-TEUAXFicFl25CtY,33222
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=saA9lh32FC6uQ1Y537WLCNR0YgZ9AguleGG97uR0CrY,196108
|
|
7
7
|
upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.38a3769.
|
|
63
|
-
upgini-1.2.38a3769.
|
|
64
|
-
upgini-1.2.38a3769.
|
|
65
|
-
upgini-1.2.38a3769.
|
|
62
|
+
upgini-1.2.38a3769.dev6.dist-info/METADATA,sha256=ZhLqyvPmIEibbeefPmaH1Rl2rVqaINTUw48JAZd6MVk,48604
|
|
63
|
+
upgini-1.2.38a3769.dev6.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.38a3769.dev6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.38a3769.dev6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|