upgini 1.2.57a3675.dev5__py3-none-any.whl → 1.2.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/data_source/data_source_publisher.py +0 -1
- upgini/dataset.py +8 -16
- upgini/features_enricher.py +6 -11
- upgini/resource_bundle/strings.properties +0 -1
- upgini/utils/email_utils.py +6 -6
- {upgini-1.2.57a3675.dev5.dist-info → upgini-1.2.58.dist-info}/METADATA +1 -1
- {upgini-1.2.57a3675.dev5.dist-info → upgini-1.2.58.dist-info}/RECORD +10 -10
- {upgini-1.2.57a3675.dev5.dist-info → upgini-1.2.58.dist-info}/WHEEL +0 -0
- {upgini-1.2.57a3675.dev5.dist-info → upgini-1.2.58.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.58"
|
|
@@ -386,7 +386,6 @@ class DataSourcePublisher:
|
|
|
386
386
|
search_keys = [k.value.value for k in search_keys] if search_keys else None
|
|
387
387
|
request = {"bqTableId": bq_table_id, "searchKeys": search_keys}
|
|
388
388
|
task_id = self._rest_client.upload_online(request, trace_id)
|
|
389
|
-
print(f"Uploading online task created. task_id={task_id}")
|
|
390
389
|
with Spinner():
|
|
391
390
|
status_response = self._rest_client.poll_ads_management_task_status(task_id, trace_id)
|
|
392
391
|
while status_response["status"] not in self.FINAL_STATUSES:
|
upgini/dataset.py
CHANGED
|
@@ -587,23 +587,15 @@ class Dataset: # (pd.DataFrame):
|
|
|
587
587
|
if (
|
|
588
588
|
runtime_parameters is not None
|
|
589
589
|
and runtime_parameters.properties is not None
|
|
590
|
+
and "generate_features" in runtime_parameters.properties
|
|
590
591
|
):
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
for
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
runtime_parameters.properties["generate_features"] = ",".join(renamed_generate_features)
|
|
599
|
-
if "columns_for_online_api" in runtime_parameters.properties:
|
|
600
|
-
columns_for_online_api = runtime_parameters.properties["columns_for_online_api"].split(",")
|
|
601
|
-
renamed_columns_for_online_api = []
|
|
602
|
-
for f in columns_for_online_api:
|
|
603
|
-
for new_column, orig_column in self.columns_renaming.items():
|
|
604
|
-
if f == orig_column:
|
|
605
|
-
renamed_columns_for_online_api.append(new_column)
|
|
606
|
-
runtime_parameters.properties["columns_for_online_api"] = ",".join(renamed_columns_for_online_api)
|
|
592
|
+
generate_features = runtime_parameters.properties["generate_features"].split(",")
|
|
593
|
+
renamed_generate_features = []
|
|
594
|
+
for f in generate_features:
|
|
595
|
+
for new_column, orig_column in self.columns_renaming.items():
|
|
596
|
+
if f == orig_column:
|
|
597
|
+
renamed_generate_features.append(new_column)
|
|
598
|
+
runtime_parameters.properties["generate_features"] = ",".join(renamed_generate_features)
|
|
607
599
|
|
|
608
600
|
return runtime_parameters
|
|
609
601
|
|
upgini/features_enricher.py
CHANGED
|
@@ -222,7 +222,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
222
222
|
loss: Optional[str] = None,
|
|
223
223
|
detect_missing_search_keys: bool = True,
|
|
224
224
|
generate_features: Optional[List[str]] = None,
|
|
225
|
-
columns_for_online_api: Optional[List[str]] = None,
|
|
226
225
|
round_embeddings: Optional[int] = None,
|
|
227
226
|
logs_enabled: bool = True,
|
|
228
227
|
raise_validation_error: bool = True,
|
|
@@ -346,9 +345,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
346
345
|
self.logger.error(msg)
|
|
347
346
|
raise ValidationError(msg)
|
|
348
347
|
self.runtime_parameters.properties["round_embeddings"] = round_embeddings
|
|
349
|
-
self.columns_for_online_api = columns_for_online_api
|
|
350
|
-
if columns_for_online_api is not None:
|
|
351
|
-
self.runtime_parameters.properties["columns_for_online_api"] = ",".join(columns_for_online_api)
|
|
352
348
|
maybe_downsampling_limit = self.runtime_parameters.properties.get("downsampling_limit")
|
|
353
349
|
if maybe_downsampling_limit is not None:
|
|
354
350
|
Dataset.FIT_SAMPLE_THRESHOLD = int(maybe_downsampling_limit)
|
|
@@ -2624,18 +2620,17 @@ if response.status_code == 200:
|
|
|
2624
2620
|
checked_generate_features = []
|
|
2625
2621
|
for gen_feature in self.generate_features:
|
|
2626
2622
|
if gen_feature not in x_columns:
|
|
2627
|
-
|
|
2628
|
-
|
|
2623
|
+
if gen_feature == self._get_phone_column(self.search_keys):
|
|
2624
|
+
raise ValidationError(
|
|
2625
|
+
self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
|
|
2626
|
+
)
|
|
2627
|
+
else:
|
|
2628
|
+
self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
|
|
2629
2629
|
else:
|
|
2630
2630
|
checked_generate_features.append(gen_feature)
|
|
2631
2631
|
self.generate_features = checked_generate_features
|
|
2632
2632
|
self.runtime_parameters.properties["generate_features"] = ",".join(self.generate_features)
|
|
2633
2633
|
|
|
2634
|
-
if self.columns_for_online_api is not None and len(self.columns_for_online_api) > 0:
|
|
2635
|
-
for column in self.columns_for_online_api:
|
|
2636
|
-
if column not in validated_X.columns:
|
|
2637
|
-
raise ValidationError(self.bundle.get("missing_column_for_online_api").format(column))
|
|
2638
|
-
|
|
2639
2634
|
if self.id_columns is not None:
|
|
2640
2635
|
for id_column in self.id_columns:
|
|
2641
2636
|
if id_column not in validated_X.columns:
|
|
@@ -111,7 +111,6 @@ x_is_empty=X is empty
|
|
|
111
111
|
y_is_empty=y is empty
|
|
112
112
|
x_contains_reserved_column_name=Column name {} is reserved. Please rename column and try again
|
|
113
113
|
missing_generate_feature=Feature {} specified in `generate_features` is not present in input columns: {}
|
|
114
|
-
missing_column_for_online_api=Column {} specified in `columns_for_online_api` is not present in input columns: {}
|
|
115
114
|
x_unstable_by_date=Your training sample is unstable in number of rows per date. It is recommended to redesign the training sample
|
|
116
115
|
train_unstable_target=Your training sample contains an unstable target event, PSI = {}. This will lead to unstable scoring on deferred samples. It is recommended to redesign the training sample
|
|
117
116
|
eval_unstable_target=Your training and evaluation samples have a difference in target distribution. PSI = {}. The results will be unstable. It is recommended to redesign the training and evaluation samples
|
upgini/utils/email_utils.py
CHANGED
|
@@ -116,17 +116,17 @@ class EmailSearchKeyConverter:
|
|
|
116
116
|
else:
|
|
117
117
|
df[self.hem_column] = df[self.hem_column].astype("string").str.lower()
|
|
118
118
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
119
|
+
del self.search_keys[self.email_column]
|
|
120
|
+
if self.email_column in self.unnest_search_keys:
|
|
121
|
+
self.unnest_search_keys.remove(self.email_column)
|
|
122
122
|
|
|
123
123
|
one_domain_name = self.email_column + self.ONE_DOMAIN_SUFFIX
|
|
124
124
|
df[one_domain_name] = df[self.email_column].apply(self._email_to_one_domain)
|
|
125
125
|
self.columns_renaming[one_domain_name] = original_email_column
|
|
126
126
|
self.search_keys[one_domain_name] = SearchKey.EMAIL_ONE_DOMAIN
|
|
127
127
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
if self.email_converted_to_hem:
|
|
129
|
+
df = df.drop(columns=self.email_column)
|
|
130
|
+
del self.columns_renaming[self.email_column]
|
|
131
131
|
|
|
132
132
|
return df
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=hEEF6GcdtkrwOFvTZT6RsY7a2xF_63NKZKmfJ4hm0-M,23
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=vT4JyHmafLNbj54SySXr93f5hNS6-t94aFslbBy-7No,33535
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=FkAKQV_XOXTobwOXpdy9BPfRkL4fkgoNa2B6NniiCrs,201554
|
|
7
7
|
upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
|
|
@@ -23,14 +23,14 @@ upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
|
|
|
23
23
|
upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
|
|
24
24
|
upgini/autofe/vector.py,sha256=udkg4pP7IIeLjt0Cg6rzEKUmGaubOnqsEz3bz9R6E44,7110
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
26
|
+
upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
28
28
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
29
29
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
|
|
31
31
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
32
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
33
|
+
upgini/resource_bundle/strings.properties,sha256=0_KAExIi1u48N1CQ13LKJS3bgDlRs-MPOyU3VxcE-qY,27350
|
|
34
34
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
35
35
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -46,7 +46,7 @@ upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
|
46
46
|
upgini/utils/datetime_utils.py,sha256=RVAk4_rakK8X9zjybK3-rj0to0e3elye8tnBuA4wTWU,13491
|
|
47
47
|
upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
|
|
48
48
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
49
|
-
upgini/utils/email_utils.py,sha256=
|
|
49
|
+
upgini/utils/email_utils.py,sha256=GbnhHJn1nhUBytmK6PophYqaoq4t7Lp6i0-O0Gd3RV8,5265
|
|
50
50
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
51
51
|
upgini/utils/feature_info.py,sha256=0rOXSyCj-sw-8migWP0ge8qrOzGU50dQvH0JUJUrDfQ,6766
|
|
52
52
|
upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
65
|
-
upgini-1.2.
|
|
62
|
+
upgini-1.2.58.dist-info/METADATA,sha256=kjEbHr6BxnFeGwzavSJmuRCaQw1HaH0JQYivX96Uabk,49055
|
|
63
|
+
upgini-1.2.58.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.58.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.58.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|