upgini 1.1.277__tar.gz → 1.1.278a2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (86) hide show
  1. {upgini-1.1.277/src/upgini.egg-info → upgini-1.1.278a2}/PKG-INFO +1 -1
  2. {upgini-1.1.277 → upgini-1.1.278a2}/setup.py +1 -1
  3. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/dataset.py +11 -2
  4. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/features_enricher.py +213 -100
  5. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/metadata.py +10 -2
  6. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/metrics.py +1 -1
  7. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/strings.properties +1 -0
  8. upgini-1.1.278a2/src/upgini/utils/base_search_key_detector.py +27 -0
  9. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/datetime_utils.py +2 -2
  10. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/deduplicate_utils.py +11 -1
  11. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/email_utils.py +5 -0
  12. {upgini-1.1.277 → upgini-1.1.278a2/src/upgini.egg-info}/PKG-INFO +1 -1
  13. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/SOURCES.txt +0 -1
  14. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_country_utils.py +4 -4
  15. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_email_utils.py +8 -10
  16. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_etalon_validation.py +21 -2
  17. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_features_enricher.py +11 -2
  18. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_phone_utils.py +6 -6
  19. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_postal_code_utils.py +6 -6
  20. upgini-1.1.277/src/upgini/fingerprint.js +0 -8
  21. upgini-1.1.277/src/upgini/utils/base_search_key_detector.py +0 -25
  22. {upgini-1.1.277 → upgini-1.1.278a2}/LICENSE +0 -0
  23. {upgini-1.1.277 → upgini-1.1.278a2}/README.md +0 -0
  24. {upgini-1.1.277 → upgini-1.1.278a2}/pyproject.toml +0 -0
  25. {upgini-1.1.277 → upgini-1.1.278a2}/setup.cfg +0 -0
  26. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/__init__.py +0 -0
  27. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/ads.py +0 -0
  28. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/ads_management/__init__.py +0 -0
  29. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/ads_management/ads_manager.py +0 -0
  30. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/__init__.py +0 -0
  31. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/all_operands.py +0 -0
  32. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/binary.py +0 -0
  33. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/date.py +0 -0
  34. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/feature.py +0 -0
  35. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/groupby.py +0 -0
  36. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/operand.py +0 -0
  37. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/unary.py +0 -0
  38. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/autofe/vector.py +0 -0
  39. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/data_source/__init__.py +0 -0
  40. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/data_source/data_source_publisher.py +0 -0
  41. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/errors.py +0 -0
  42. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/http.py +0 -0
  43. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/mdc/__init__.py +0 -0
  44. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/mdc/context.py +0 -0
  45. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/normalizer/__init__.py +0 -0
  46. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/normalizer/phone_normalizer.py +0 -0
  47. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/__init__.py +0 -0
  48. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/exceptions.py +0 -0
  49. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  50. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/__init__.py +0 -0
  51. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/base.py +0 -0
  52. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/random_under_sampler.py +0 -0
  53. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/sampler/utils.py +0 -0
  54. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/search_task.py +0 -0
  55. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/spinner.py +0 -0
  56. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/__init__.py +0 -0
  57. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/blocked_time_series.py +0 -0
  58. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/country_utils.py +0 -0
  59. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/custom_loss_utils.py +0 -0
  60. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/cv_utils.py +0 -0
  61. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/display_utils.py +0 -0
  62. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/fallback_progress_bar.py +0 -0
  63. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/features_validator.py +0 -0
  64. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/format.py +0 -0
  65. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/ip_utils.py +0 -0
  66. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/phone_utils.py +0 -0
  67. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/postal_code_utils.py +0 -0
  68. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/progress_bar.py +0 -0
  69. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/sklearn_ext.py +0 -0
  70. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/target_utils.py +0 -0
  71. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/track_info.py +0 -0
  72. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/utils/warning_counter.py +0 -0
  73. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini/version_validator.py +0 -0
  74. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/dependency_links.txt +0 -0
  75. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/requires.txt +0 -0
  76. {upgini-1.1.277 → upgini-1.1.278a2}/src/upgini.egg-info/top_level.txt +0 -0
  77. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_autofe_operands.py +0 -0
  78. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_binary_dataset.py +0 -0
  79. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_blocked_time_series.py +0 -0
  80. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_categorical_dataset.py +0 -0
  81. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_continuous_dataset.py +0 -0
  82. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_custom_loss_utils.py +0 -0
  83. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_datetime_utils.py +0 -0
  84. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_metrics.py +0 -0
  85. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_target_utils.py +0 -0
  86. {upgini-1.1.277 → upgini-1.1.278a2}/tests/test_widget.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.277
3
+ Version: 1.1.278a2
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -40,7 +40,7 @@ def send_log(msg: str):
40
40
 
41
41
 
42
42
  here = Path(__file__).parent.resolve()
43
- version = "1.1.277"
43
+ version = "1.1.278a2"
44
44
  try:
45
45
  send_log(f"Start setup PyLib version {version}")
46
46
  setup(
@@ -23,7 +23,9 @@ from pandas.api.types import (
23
23
  from upgini.errors import ValidationError
24
24
  from upgini.http import ProgressStage, SearchProgress, _RestClient
25
25
  from upgini.metadata import (
26
+ ENTITY_SYSTEM_RECORD_ID,
26
27
  EVAL_SET_INDEX,
28
+ SEARCH_KEY_UNNEST,
27
29
  SYSTEM_COLUMNS,
28
30
  SYSTEM_RECORD_ID,
29
31
  TARGET,
@@ -79,6 +81,7 @@ class Dataset: # (pd.DataFrame):
79
81
  path: Optional[str] = None,
80
82
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
81
83
  search_keys: Optional[List[Tuple[str, ...]]] = None,
84
+ unnest_search_keys: Optional[Dict[str, str]] = None,
82
85
  model_task_type: Optional[ModelTaskType] = None,
83
86
  random_state: Optional[int] = None,
84
87
  rest_client: Optional[_RestClient] = None,
@@ -113,6 +116,7 @@ class Dataset: # (pd.DataFrame):
113
116
  self.description = description
114
117
  self.meaning_types = meaning_types
115
118
  self.search_keys = search_keys
119
+ self.unnest_search_keys = unnest_search_keys
116
120
  self.ignore_columns = []
117
121
  self.hierarchical_group_keys = []
118
122
  self.hierarchical_subgroup_keys = []
@@ -172,7 +176,7 @@ class Dataset: # (pd.DataFrame):
172
176
  new_columns = []
173
177
  dup_counter = 0
174
178
  for column in self.data.columns:
175
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
179
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
176
180
  self.columns_renaming[column] = column
177
181
  new_columns.append(column)
178
182
  continue
@@ -353,7 +357,9 @@ class Dataset: # (pd.DataFrame):
353
357
 
354
358
  if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
355
359
  try:
356
- self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
360
+ self.data[postal_code] = (
361
+ self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
362
+ )
357
363
  except Exception:
358
364
  pass
359
365
  elif is_float_dtype(self.data[postal_code]):
@@ -803,6 +809,9 @@ class Dataset: # (pd.DataFrame):
803
809
  meaningType=meaning_type,
804
810
  minMaxValues=min_max_values,
805
811
  )
812
+ if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
813
+ column_meta.isUnnest = True
814
+ column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
806
815
 
807
816
  columns.append(column_meta)
808
817