upgini 1.2.57a3675.dev4__tar.gz → 1.2.58__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/PKG-INFO +2 -2
  2. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/pyproject.toml +1 -1
  3. upgini-1.2.58/src/upgini/__about__.py +1 -0
  4. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/data_source/data_source_publisher.py +0 -1
  5. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/dataset.py +8 -16
  6. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/features_enricher.py +6 -11
  7. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/resource_bundle/strings.properties +0 -1
  8. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/email_utils.py +6 -6
  9. upgini-1.2.57a3675.dev4/src/upgini/__about__.py +0 -1
  10. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/.gitignore +0 -0
  11. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/LICENSE +0 -0
  12. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/README.md +0 -0
  13. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/__init__.py +0 -0
  14. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/ads.py +0 -0
  15. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/ads_management/__init__.py +0 -0
  16. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/ads_management/ads_manager.py +0 -0
  17. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/__init__.py +0 -0
  18. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/all_operands.py +0 -0
  19. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/binary.py +0 -0
  20. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/date.py +0 -0
  21. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/feature.py +0 -0
  22. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/groupby.py +0 -0
  23. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/operand.py +0 -0
  24. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/unary.py +0 -0
  25. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/autofe/vector.py +0 -0
  26. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/data_source/__init__.py +0 -0
  27. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/errors.py +0 -0
  28. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/http.py +0 -0
  29. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/lazy_import.py +0 -0
  30. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/mdc/__init__.py +0 -0
  31. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/mdc/context.py +0 -0
  32. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/metadata.py +0 -0
  33. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/metrics.py +0 -0
  34. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/normalizer/__init__.py +0 -0
  35. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/normalizer/normalize_utils.py +0 -0
  36. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/resource_bundle/__init__.py +0 -0
  37. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/resource_bundle/exceptions.py +0 -0
  38. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  46. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/__init__.py +0 -0
  47. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/base_search_key_detector.py +0 -0
  48. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/blocked_time_series.py +0 -0
  49. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/country_utils.py +0 -0
  50. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/custom_loss_utils.py +0 -0
  51. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/cv_utils.py +0 -0
  52. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/datetime_utils.py +0 -0
  53. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/deduplicate_utils.py +0 -0
  54. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/display_utils.py +0 -0
  55. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/feature_info.py +0 -0
  57. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/features_validator.py +0 -0
  58. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/format.py +0 -0
  59. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/ip_utils.py +0 -0
  60. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.57a3675.dev4 → upgini-1.2.58}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.57a3675.dev4
3
+ Version: 1.2.58
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -34,7 +34,7 @@ Requires-Dist: pydantic<3.0.0,>1.0.0
34
34
  Requires-Dist: pyjwt>=2.8.0
35
35
  Requires-Dist: python-bidi==0.4.2
36
36
  Requires-Dist: python-dateutil>=2.8.0
37
- Requires-Dist: python-json-logger>=2.0.2
37
+ Requires-Dist: python-json-logger>=3.3.0
38
38
  Requires-Dist: requests>=2.8.0
39
39
  Requires-Dist: scikit-learn>=1.3.0
40
40
  Requires-Dist: xhtml2pdf<0.3.0,>=0.2.11
@@ -43,7 +43,7 @@ dependencies = [
43
43
  "pydantic>1.0.0,<3.0.0",
44
44
  "pyjwt>=2.8.0",
45
45
  "python-dateutil>=2.8.0",
46
- "python-json-logger>=2.0.2",
46
+ "python-json-logger>=3.3.0",
47
47
  "requests>=2.8.0",
48
48
  "scikit-learn>=1.3.0",
49
49
  "python-bidi==0.4.2",
@@ -0,0 +1 @@
1
+ __version__ = "1.2.58"
@@ -386,7 +386,6 @@ class DataSourcePublisher:
386
386
  search_keys = [k.value.value for k in search_keys] if search_keys else None
387
387
  request = {"bqTableId": bq_table_id, "searchKeys": search_keys}
388
388
  task_id = self._rest_client.upload_online(request, trace_id)
389
- print(f"Uploading online task created. task_id={task_id}")
390
389
  with Spinner():
391
390
  status_response = self._rest_client.poll_ads_management_task_status(task_id, trace_id)
392
391
  while status_response["status"] not in self.FINAL_STATUSES:
@@ -587,23 +587,15 @@ class Dataset: # (pd.DataFrame):
587
587
  if (
588
588
  runtime_parameters is not None
589
589
  and runtime_parameters.properties is not None
590
+ and "generate_features" in runtime_parameters.properties
590
591
  ):
591
- if "generate_features" in runtime_parameters.properties:
592
- generate_features = runtime_parameters.properties["generate_features"].split(",")
593
- renamed_generate_features = []
594
- for f in generate_features:
595
- for new_column, orig_column in self.columns_renaming.items():
596
- if f == orig_column:
597
- renamed_generate_features.append(new_column)
598
- runtime_parameters.properties["generate_features"] = ",".join(renamed_generate_features)
599
- if "columns_for_online_api" in runtime_parameters.properties:
600
- columns_for_online_api = runtime_parameters.properties["columns_for_online_api"].split(",")
601
- renamed_columns_for_online_api = []
602
- for f in columns_for_online_api:
603
- for new_column, orig_column in self.columns_renaming.items():
604
- if f == orig_column:
605
- renamed_columns_for_online_api.append(new_column)
606
- runtime_parameters.properties["columns_for_online_api"] = ",".join(renamed_columns_for_online_api)
592
+ generate_features = runtime_parameters.properties["generate_features"].split(",")
593
+ renamed_generate_features = []
594
+ for f in generate_features:
595
+ for new_column, orig_column in self.columns_renaming.items():
596
+ if f == orig_column:
597
+ renamed_generate_features.append(new_column)
598
+ runtime_parameters.properties["generate_features"] = ",".join(renamed_generate_features)
607
599
 
608
600
  return runtime_parameters
609
601
 
@@ -222,7 +222,6 @@ class FeaturesEnricher(TransformerMixin):
222
222
  loss: Optional[str] = None,
223
223
  detect_missing_search_keys: bool = True,
224
224
  generate_features: Optional[List[str]] = None,
225
- columns_for_online_api: Optional[List[str]] = None,
226
225
  round_embeddings: Optional[int] = None,
227
226
  logs_enabled: bool = True,
228
227
  raise_validation_error: bool = True,
@@ -346,9 +345,6 @@ class FeaturesEnricher(TransformerMixin):
346
345
  self.logger.error(msg)
347
346
  raise ValidationError(msg)
348
347
  self.runtime_parameters.properties["round_embeddings"] = round_embeddings
349
- self.columns_for_online_api = columns_for_online_api
350
- if columns_for_online_api is not None:
351
- self.runtime_parameters.properties["columns_for_online_api"] = ",".join(columns_for_online_api)
352
348
  maybe_downsampling_limit = self.runtime_parameters.properties.get("downsampling_limit")
353
349
  if maybe_downsampling_limit is not None:
354
350
  Dataset.FIT_SAMPLE_THRESHOLD = int(maybe_downsampling_limit)
@@ -2624,18 +2620,17 @@ if response.status_code == 200:
2624
2620
  checked_generate_features = []
2625
2621
  for gen_feature in self.generate_features:
2626
2622
  if gen_feature not in x_columns:
2627
- msg = self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
2628
- self.__log_warning(msg)
2623
+ if gen_feature == self._get_phone_column(self.search_keys):
2624
+ raise ValidationError(
2625
+ self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
2626
+ )
2627
+ else:
2628
+ self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
2629
2629
  else:
2630
2630
  checked_generate_features.append(gen_feature)
2631
2631
  self.generate_features = checked_generate_features
2632
2632
  self.runtime_parameters.properties["generate_features"] = ",".join(self.generate_features)
2633
2633
 
2634
- if self.columns_for_online_api is not None and len(self.columns_for_online_api) > 0:
2635
- for column in self.columns_for_online_api:
2636
- if column not in validated_X.columns:
2637
- raise ValidationError(self.bundle.get("missing_column_for_online_api").format(column))
2638
-
2639
2634
  if self.id_columns is not None:
2640
2635
  for id_column in self.id_columns:
2641
2636
  if id_column not in validated_X.columns:
@@ -111,7 +111,6 @@ x_is_empty=X is empty
111
111
  y_is_empty=y is empty
112
112
  x_contains_reserved_column_name=Column name {} is reserved. Please rename column and try again
113
113
  missing_generate_feature=Feature {} specified in `generate_features` is not present in input columns: {}
114
- missing_column_for_online_api=Column {} specified in `columns_for_online_api` is not present in input columns: {}
115
114
  x_unstable_by_date=Your training sample is unstable in number of rows per date. It is recommended to redesign the training sample
116
115
  train_unstable_target=Your training sample contains an unstable target event, PSI = {}. This will lead to unstable scoring on deferred samples. It is recommended to redesign the training sample
117
116
  eval_unstable_target=Your training and evaluation samples have a difference in target distribution. PSI = {}. The results will be unstable. It is recommended to redesign the training and evaluation samples
@@ -116,17 +116,17 @@ class EmailSearchKeyConverter:
116
116
  else:
117
117
  df[self.hem_column] = df[self.hem_column].astype("string").str.lower()
118
118
 
119
- # del self.search_keys[self.email_column]
120
- # if self.email_column in self.unnest_search_keys:
121
- # self.unnest_search_keys.remove(self.email_column)
119
+ del self.search_keys[self.email_column]
120
+ if self.email_column in self.unnest_search_keys:
121
+ self.unnest_search_keys.remove(self.email_column)
122
122
 
123
123
  one_domain_name = self.email_column + self.ONE_DOMAIN_SUFFIX
124
124
  df[one_domain_name] = df[self.email_column].apply(self._email_to_one_domain)
125
125
  self.columns_renaming[one_domain_name] = original_email_column
126
126
  self.search_keys[one_domain_name] = SearchKey.EMAIL_ONE_DOMAIN
127
127
 
128
- # if self.email_converted_to_hem:
129
- # df = df.drop(columns=self.email_column)
130
- # del self.columns_renaming[self.email_column]
128
+ if self.email_converted_to_hem:
129
+ df = df.drop(columns=self.email_column)
130
+ del self.columns_renaming[self.email_column]
131
131
 
132
132
  return df
@@ -1 +0,0 @@
1
- __version__ = "1.2.57a3675.dev4"
File without changes
File without changes
File without changes