upgini 1.2.142a2__tar.gz → 1.2.143__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (82) hide show
  1. {upgini-1.2.142a2 → upgini-1.2.143}/PKG-INFO +1 -1
  2. upgini-1.2.143/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/features_enricher.py +5 -5
  4. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/postal_code_utils.py +31 -2
  5. upgini-1.2.142a2/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.142a2 → upgini-1.2.143}/.gitignore +0 -0
  7. {upgini-1.2.142a2 → upgini-1.2.143}/LICENSE +0 -0
  8. {upgini-1.2.142a2 → upgini-1.2.143}/README.md +0 -0
  9. {upgini-1.2.142a2 → upgini-1.2.143}/pyproject.toml +0 -0
  10. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/all_operators.py +0 -0
  16. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/operator.py +0 -0
  21. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/__init__.py +0 -0
  22. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/base.py +0 -0
  23. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/cross.py +0 -0
  24. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/delta.py +0 -0
  25. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/lag.py +0 -0
  26. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/roll.py +0 -0
  27. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/trend.py +0 -0
  28. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/timeseries/volatility.py +0 -0
  29. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/unary.py +0 -0
  30. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/utils.py +0 -0
  31. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/autofe/vector.py +0 -0
  32. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/data_source/__init__.py +0 -0
  33. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/data_source/data_source_publisher.py +0 -0
  34. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/dataset.py +0 -0
  35. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/errors.py +0 -0
  36. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/http.py +0 -0
  37. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/metadata.py +0 -0
  40. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/metrics.py +0 -0
  41. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/normalizer/normalize_utils.py +0 -0
  43. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/resource_bundle/__init__.py +0 -0
  44. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/resource_bundle/exceptions.py +0 -0
  45. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/resource_bundle/strings.properties +0 -0
  46. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  47. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/sampler/__init__.py +0 -0
  48. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/sampler/base.py +0 -0
  49. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/sampler/random_under_sampler.py +0 -0
  50. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/sampler/utils.py +0 -0
  51. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/search_task.py +0 -0
  52. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/spinner.py +0 -0
  53. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  54. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/__init__.py +0 -0
  55. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/base_search_key_detector.py +0 -0
  56. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/blocked_time_series.py +0 -0
  57. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/config.py +0 -0
  58. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/country_utils.py +0 -0
  59. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/custom_loss_utils.py +0 -0
  60. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/cv_utils.py +0 -0
  61. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/datetime_utils.py +0 -0
  62. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/deduplicate_utils.py +0 -0
  63. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/display_utils.py +0 -0
  64. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/email_utils.py +0 -0
  65. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/fallback_progress_bar.py +0 -0
  66. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/feature_info.py +0 -0
  67. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/features_validator.py +0 -0
  68. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/format.py +0 -0
  69. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/hash_utils.py +0 -0
  70. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/ip_utils.py +0 -0
  71. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/mstats.py +0 -0
  72. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/phone_utils.py +0 -0
  73. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/psi.py +0 -0
  75. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/sample_utils.py +0 -0
  76. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/sklearn_ext.py +0 -0
  77. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.142a2 → upgini-1.2.143}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: upgini
3
- Version: 1.2.142a2
3
+ Version: 1.2.143
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.143"
@@ -2553,9 +2553,7 @@ if response.status_code == 200:
2553
2553
 
2554
2554
  self.__validate_search_keys(search_keys, self.search_id)
2555
2555
 
2556
- validated_X, validated_y, validated_eval_set = self._validate_train_eval(
2557
- X, y, eval_set=None, is_transform=True
2558
- )
2556
+ validated_X, validated_y, validated_eval_set = self._validate_train_eval(X, y, eval_set=None, is_transform=True)
2559
2557
  df = self.__combine_train_and_eval_sets(validated_X, validated_y, validated_eval_set)
2560
2558
 
2561
2559
  validated_Xy = df.copy()
@@ -4784,7 +4782,7 @@ if response.status_code == 200:
4784
4782
  maybe_date = [k for k, v in valid_search_keys.items() if v in [SearchKey.DATE, SearchKey.DATETIME]]
4785
4783
  if (self.cv is None or self.cv == CVType.k_fold) and len(maybe_date) > 0 and not silent_mode:
4786
4784
  date_column = next(iter(maybe_date))
4787
- if x[date_column].nunique() > 0.9 * _num_samples(x):
4785
+ if x[date_column].nunique() > 0.9 * _num_samples(x) and not is_transform:
4788
4786
  msg = self.bundle.get("date_search_without_time_series")
4789
4787
  self.__log_warning(msg)
4790
4788
 
@@ -5025,7 +5023,9 @@ if response.status_code == 200:
5025
5023
  f"File x.parquet was already uploaded with digest {x_digest_sha256}, skipping"
5026
5024
  )
5027
5025
  else:
5028
- self.rest_client.dump_input_file(f"{tmp_dir}/x.parquet", "x.parquet", x_digest_sha256)
5026
+ self.rest_client.dump_input_file(
5027
+ trace_id_, f"{tmp_dir}/x.parquet", "x.parquet", x_digest_sha256
5028
+ )
5029
5029
 
5030
5030
  if y_ is not None:
5031
5031
  if isinstance(y_, pd.Series):
@@ -4,16 +4,45 @@ from pandas.api.types import (
4
4
  is_object_dtype,
5
5
  is_string_dtype,
6
6
  )
7
+ import re
7
8
 
8
9
  from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
9
10
 
10
11
 
11
12
  class PostalCodeSearchKeyDetector(BaseSearchKeyDetector):
13
+ postal_pattern = re.compile(r'^[A-Za-z0-9][A-Za-z0-9\s\-]{1,9}$')
14
+
12
15
  def _is_search_key_by_name(self, column_name: str) -> bool:
13
- return str(column_name).lower() in ["zip", "zipcode", "zip_code", "postal_code", "postalcode"]
16
+ return "zip" in str(column_name).lower() or "postal" in str(column_name).lower()
14
17
 
15
18
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
16
- return False
19
+ """
20
+ # Fast two-step check whether the column looks like a postal code.
21
+ # Returns True if, after removing missing values, values remain,
22
+ # and all of them match the common characteristics of a postal code.
23
+ """
24
+ s = column.copy().dropna().astype(str).str.strip()
25
+ s = s[s != ""] # remove empty strings
26
+ if s.empty:
27
+ return False
28
+
29
+ # remove suffix ".0" (often after float)
30
+ s = s.str.replace(r"\.0$", "", regex=True)
31
+
32
+ # --- Step 1: fast filtering ---
33
+ mask_len = s.str.len().between(2, 10)
34
+ mask_digit = s.str.contains(r'\d', regex=True)
35
+ mask_chars = ~s.str.contains(r'[^A-Za-z0-9\s\-]', regex=True)
36
+ fast_mask = mask_len & mask_digit & mask_chars
37
+
38
+ # if any of them failed the fast check, return False
39
+ if not fast_mask.all():
40
+ return False
41
+
42
+ # --- Step 2: regex check ---
43
+ # only if the first step passed
44
+ valid_mask = s.apply(lambda x: bool(self.postal_pattern.fullmatch(x)))
45
+ return valid_mask.all()
17
46
 
18
47
 
19
48
  class PostalCodeSearchKeyConverter:
@@ -1 +0,0 @@
1
- __version__ = "1.2.142a2"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes