upgini 1.2.25a1__tar.gz → 1.2.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.25a1 → upgini-1.2.27}/PKG-INFO +1 -1
  2. {upgini-1.2.25a1 → upgini-1.2.27}/pyproject.toml +0 -6
  3. upgini-1.2.27/src/upgini/__about__.py +1 -0
  4. upgini-1.2.27/src/upgini/__init__.py +5 -0
  5. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/features_enricher.py +17 -14
  6. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/resource_bundle/strings.properties +1 -1
  7. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/display_utils.py +0 -1
  8. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/target_utils.py +18 -15
  9. upgini-1.2.25a1/src/upgini/__about__.py +0 -1
  10. upgini-1.2.25a1/src/upgini/__init__.py +0 -13
  11. {upgini-1.2.25a1 → upgini-1.2.27}/.gitignore +0 -0
  12. {upgini-1.2.25a1 → upgini-1.2.27}/LICENSE +0 -0
  13. {upgini-1.2.25a1 → upgini-1.2.27}/README.md +0 -0
  14. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/ads.py +0 -0
  15. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/ads_management/__init__.py +0 -0
  16. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/ads_management/ads_manager.py +0 -0
  17. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/__init__.py +0 -0
  18. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/all_operands.py +0 -0
  19. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/binary.py +0 -0
  20. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/date.py +0 -0
  21. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/feature.py +0 -0
  22. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/groupby.py +0 -0
  23. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/operand.py +0 -0
  24. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/unary.py +0 -0
  25. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/autofe/vector.py +0 -0
  26. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/data_source/__init__.py +0 -0
  27. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/data_source/data_source_publisher.py +0 -0
  28. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/dataset.py +0 -0
  29. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/errors.py +0 -0
  30. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/http.py +0 -0
  31. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/lazy_import.py +0 -0
  32. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/mdc/__init__.py +0 -0
  33. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/mdc/context.py +0 -0
  34. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/metadata.py +0 -0
  35. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/metrics.py +0 -0
  36. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/normalizer/__init__.py +0 -0
  37. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/normalizer/normalize_utils.py +0 -0
  38. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/resource_bundle/__init__.py +0 -0
  39. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/resource_bundle/exceptions.py +0 -0
  40. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  41. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/sampler/__init__.py +0 -0
  42. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/sampler/base.py +0 -0
  43. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/sampler/random_under_sampler.py +0 -0
  44. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/sampler/utils.py +0 -0
  45. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/search_task.py +0 -0
  46. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/spinner.py +0 -0
  47. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  48. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/__init__.py +0 -0
  49. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/base_search_key_detector.py +0 -0
  50. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/blocked_time_series.py +0 -0
  51. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/country_utils.py +0 -0
  52. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/custom_loss_utils.py +0 -0
  53. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/cv_utils.py +0 -0
  54. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/datetime_utils.py +0 -0
  55. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/deduplicate_utils.py +0 -0
  56. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/email_utils.py +0 -0
  57. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/fallback_progress_bar.py +0 -0
  58. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/features_validator.py +0 -0
  59. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/format.py +0 -0
  60. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/ip_utils.py +0 -0
  61. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/phone_utils.py +0 -0
  62. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/postal_code_utils.py +0 -0
  63. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/progress_bar.py +0 -0
  64. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/sklearn_ext.py +0 -0
  65. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.25a1
3
+ Version: 1.2.27
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -52,12 +52,6 @@ dependencies = [
52
52
  "levenshtein>=0.25.1",
53
53
  ]
54
54
 
55
- [tool.setuptools]
56
- include-package-data = true
57
-
58
- [tool.setuptools.package-data]
59
- "upgini" = ["utils/Roboto-Regular.ttf"]
60
-
61
55
  [project.urls]
62
56
  "Bug Reports" = "https://github.com/upgini/upgini/issues"
63
57
  Homepage = "https://upgini.com/"
@@ -0,0 +1 @@
1
+ __version__ = "1.2.27"
@@ -0,0 +1,5 @@
1
+ from upgini.features_enricher import FeaturesEnricher # noqa: F401
2
+ from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType # noqa: F401
3
+ import warnings
4
+
5
+ warnings.filterwarnings("ignore", category=UserWarning, module="_distutils_hack")
@@ -2546,9 +2546,11 @@ class FeaturesEnricher(TransformerMixin):
2546
2546
  self.fit_generated_features.extend(generator.generated_features)
2547
2547
 
2548
2548
  # Checks that need validated date
2549
-
2550
- if not is_dates_distribution_valid(df, self.fit_search_keys):
2551
- self.__log_warning(bundle.get("x_unstable_by_date"))
2549
+ try:
2550
+ if not is_dates_distribution_valid(df, self.fit_search_keys):
2551
+ self.__log_warning(bundle.get("x_unstable_by_date"))
2552
+ except Exception:
2553
+ self.logger.exception("Failed to check dates distribution validity")
2552
2554
 
2553
2555
  if (
2554
2556
  is_numeric_dtype(df[self.TARGET_NAME])
@@ -3194,9 +3196,8 @@ class FeaturesEnricher(TransformerMixin):
3194
3196
 
3195
3197
  return df
3196
3198
 
3197
- @staticmethod
3198
3199
  def _add_current_date_as_key(
3199
- df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
3200
+ self, df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
3200
3201
  ) -> pd.DataFrame:
3201
3202
  if (
3202
3203
  set(search_keys.values()) == {SearchKey.PHONE}
@@ -3204,9 +3205,7 @@ class FeaturesEnricher(TransformerMixin):
3204
3205
  or set(search_keys.values()) == {SearchKey.HEM}
3205
3206
  or set(search_keys.values()) == {SearchKey.COUNTRY, SearchKey.POSTAL_CODE}
3206
3207
  ):
3207
- msg = bundle.get("current_date_added")
3208
- print(msg)
3209
- logger.warning(msg)
3208
+ self.__log_warning(bundle.get("current_date_added"))
3210
3209
  df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
3211
3210
  search_keys[FeaturesEnricher.CURRENT_DATE] = SearchKey.DATE
3212
3211
  converter = DateTimeSearchKeyConverter(FeaturesEnricher.CURRENT_DATE)
@@ -4041,15 +4040,19 @@ class FeaturesEnricher(TransformerMixin):
4041
4040
  half_train = round(len(train) / 2)
4042
4041
  part1 = train[:half_train]
4043
4042
  part2 = train[half_train:]
4044
- train_psi = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
4045
- if train_psi > 0.2:
4046
- self.__log_warning(self.bundle.get("train_unstable_target").format(train_psi))
4043
+ train_psi_result = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
4044
+ if isinstance(train_psi_result, Exception):
4045
+ self.logger.exception("Failed to calculate train PSI", train_psi_result)
4046
+ elif train_psi_result > 0.2:
4047
+ self.__log_warning(self.bundle.get("train_unstable_target").format(train_psi_result))
4047
4048
 
4048
4049
  # 2. Check train-test PSI
4049
4050
  if eval1 is not None:
4050
- train_test_psi = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
4051
- if train_test_psi > 0.2:
4052
- self.__log_warning(self.bundle.get("eval_unstable_target").format(train_test_psi))
4051
+ train_test_psi_result = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
4052
+ if isinstance(train_test_psi_result, Exception):
4053
+ self.logger.exception("Failed to calculate test PSI", train_test_psi_result)
4054
+ elif train_test_psi_result > 0.2:
4055
+ self.__log_warning(self.bundle.get("eval_unstable_target").format(train_test_psi_result))
4053
4056
 
4054
4057
  def _dump_python_libs(self):
4055
4058
  try:
@@ -9,7 +9,7 @@ search_stopped=Search request stopped
9
9
  polling_search_task=\nRunning search request, search_id={}
10
10
  polling_unregister_information=We'll send email notification once it's completed, just use your personal api_key from profile.upgini.com
11
11
  ads_upload_finish=Thank you for your submission!\nWe'll check your data sharing proposal and get back to you
12
- demo_dataset_info=Demo training dataset detected. Registration for an API key is not required.
12
+ demo_dataset_info=Demo training dataset detected. Registration for an API key is not required.\n
13
13
  transform_usage_info=You use Trial access to Upgini data enrichment. Limit for Trial: {} rows. You have already enriched: {} rows.
14
14
  transform_usage_warning=You are trying to launch enrichment for {} rows, which will exceed the rest limit {}.
15
15
 
@@ -169,7 +169,6 @@ def make_html_report(
169
169
  from pkg_resources import resource_filename
170
170
  font_path = resource_filename('upgini.utils', 'Roboto-Regular.ttf')
171
171
 
172
- print(font_path)
173
172
  return f"""<html>
174
173
  <head>
175
174
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
@@ -3,7 +3,7 @@ from typing import Optional, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
6
- from pandas.api.types import is_numeric_dtype
6
+ from pandas.api.types import is_numeric_dtype, is_bool_dtype
7
7
 
8
8
  from upgini.errors import ValidationError
9
9
  from upgini.metadata import SYSTEM_RECORD_ID, ModelTaskType
@@ -229,22 +229,25 @@ def balance_undersample(
229
229
  return resampled_data
230
230
 
231
231
 
232
- def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
233
- df = pd.concat([expected, actual])
232
+ def calculate_psi(expected: pd.Series, actual: pd.Series) -> Union[float, Exception]:
233
+ try:
234
+ df = pd.concat([expected, actual])
234
235
 
235
- # Define the bins for the target variable
236
- df_min = df.min()
237
- df_max = df.max()
238
- bins = [df_min, (df_min + df_max) / 2, df_max]
236
+ if is_bool_dtype(df):
237
+ df = np.where(df, 1, 0)
239
238
 
240
- # Calculate the base distribution
241
- train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
239
+ # Define the bins for the target variable
240
+ df_min = df.min()
241
+ df_max = df.max()
242
+ bins = [df_min, (df_min + df_max) / 2, df_max]
242
243
 
243
- # Calculate the target distribution
244
- test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
244
+ # Calculate the base distribution
245
+ train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
245
246
 
246
- # Calculate the PSI
247
- try:
247
+ # Calculate the target distribution
248
+ test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
249
+
250
+ # Calculate the PSI
248
251
  return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
249
- except Exception:
250
- return np.nan
252
+ except Exception as e:
253
+ return e
@@ -1 +0,0 @@
1
- __version__ = "1.2.25a1"
@@ -1,13 +0,0 @@
1
- import os
2
-
3
- from upgini.features_enricher import FeaturesEnricher # noqa: F401
4
- from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType # noqa: F401
5
- # from .lazy_import import LazyImport
6
-
7
- os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
8
-
9
- # FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
10
- # SearchKey = LazyImport("upgini.metadata", "SearchKey")
11
- # RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
12
- # CVType = LazyImport("upgini.metadata", "CVType")
13
- # ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes