upgini 1.1.312a4__py3-none-any.whl → 1.1.312a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.312a4"
1
+ __version__ = "1.1.312a5"
@@ -91,7 +91,11 @@ from upgini.utils.display_utils import (
91
91
  prepare_and_show_report,
92
92
  show_request_quote_button,
93
93
  )
94
- from upgini.utils.email_utils import EmailDomainGenerator, EmailSearchKeyConverter, EmailSearchKeyDetector
94
+ from upgini.utils.email_utils import (
95
+ EmailDomainGenerator,
96
+ EmailSearchKeyConverter,
97
+ EmailSearchKeyDetector,
98
+ )
95
99
  from upgini.utils.features_validator import FeaturesValidator
96
100
  from upgini.utils.format import Format
97
101
  from upgini.utils.ip_utils import IpSearchKeyConverter
@@ -1030,7 +1034,7 @@ class FeaturesEnricher(TransformerMixin):
1030
1034
  self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
1031
1035
  }
1032
1036
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1033
- y_sorted
1037
+ effective_y
1034
1038
  ):
1035
1039
  train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1036
1040
  np.mean(effective_y), 4
@@ -1103,7 +1107,7 @@ class FeaturesEnricher(TransformerMixin):
1103
1107
  # self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
1104
1108
  }
1105
1109
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1106
- eval_y_sorted
1110
+ effective_eval_set[idx][1]
1107
1111
  ):
1108
1112
  eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1109
1113
  np.mean(effective_eval_set[idx][1]), 4
@@ -1363,6 +1367,7 @@ class FeaturesEnricher(TransformerMixin):
1363
1367
  importance_threshold,
1364
1368
  max_features,
1365
1369
  )
1370
+ filtered_enriched_features = [c for c in filtered_enriched_features if c not in client_features]
1366
1371
 
1367
1372
  X_sorted, y_sorted = self._sort_by_system_record_id(X_sampled, y_sampled, self.cv)
1368
1373
  enriched_X_sorted, enriched_y_sorted = self._sort_by_system_record_id(enriched_X, y_sampled, self.cv)
@@ -2217,7 +2222,9 @@ class FeaturesEnricher(TransformerMixin):
2217
2222
  result = enrich()
2218
2223
 
2219
2224
  filtered_columns = self.__filtered_enriched_features(importance_threshold, max_features)
2220
- existing_filtered_columns = [c for c in filtered_columns if c in result.columns]
2225
+ existing_filtered_columns = [
2226
+ c for c in filtered_columns if c in result.columns and c not in validated_X.columns
2227
+ ]
2221
2228
  selecting_columns = validated_X.columns.tolist() + generated_features + existing_filtered_columns
2222
2229
  if add_fit_system_record_id:
2223
2230
  selecting_columns.append(SORT_ID)
@@ -2430,9 +2437,7 @@ class FeaturesEnricher(TransformerMixin):
2430
2437
 
2431
2438
  email_columns = SearchKey.find_all_keys(self.fit_search_keys, SearchKey.EMAIL)
2432
2439
  if email_columns:
2433
- generator = EmailDomainGenerator(
2434
- email_columns
2435
- )
2440
+ generator = EmailDomainGenerator(email_columns)
2436
2441
  df = generator.generate(df)
2437
2442
  self.fit_generated_features.extend(generator.generated_features)
2438
2443
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.312a4
3
+ Version: 1.1.312a5
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=OhL45U2LeBzyliA8yt6RNbJt0Lv5GjiINZYAIDhLhzg,26
1
+ upgini/__about__.py,sha256=AYWzaD5lVHd4s3vopg53TBKzkKgWzJVi62VrmfO7rRU,26
2
2
  upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=c6jghh32P9_2CspELYCOsmNIOiShuCADnCCJ8Jj2t50,30834
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=FfFlFW3BArv2rQWGCs-SXrDDDcjQTwwJxzRysZlJfq0,186961
6
+ upgini/features_enricher.py,sha256=LqGOMObkFsAm58sBL3UhTmc7TOnDQmLivxl3jbXh-n0,187132
7
7
  upgini/http.py,sha256=a4Epc9YLIJBuYk4t8E_2-QDLBtJFqKO35jn2SnYQZCg,42920
8
8
  upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
9
9
  upgini/metadata.py,sha256=YQ-1HZGyPOksP2iM50ff_pMHXLyzvpChqSfNh8Z0ke4,10833
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.1.312a4.dist-info/METADATA,sha256=E_22GljWuDI_8hVGc09IVXufsmdscmuDypm3imVekDE,48155
61
- upgini-1.1.312a4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.1.312a4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.1.312a4.dist-info/RECORD,,
60
+ upgini-1.1.312a5.dist-info/METADATA,sha256=MdhSWCWMCuajyC7B1bVWfVFOs7b5iIsmD7m3Z48egng,48155
61
+ upgini-1.1.312a5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.1.312a5.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.1.312a5.dist-info/RECORD,,