upgini 1.1.278a2__tar.gz → 1.1.279a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (86) hide show
  1. {upgini-1.1.278a2/src/upgini.egg-info → upgini-1.1.279a1}/PKG-INFO +1 -1
  2. {upgini-1.1.278a2 → upgini-1.1.279a1}/setup.py +1 -1
  3. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/dataset.py +2 -11
  4. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/features_enricher.py +100 -213
  5. upgini-1.1.279a1/src/upgini/fingerprint.js +8 -0
  6. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/metadata.py +2 -10
  7. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/metrics.py +1 -1
  8. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/strings.properties +0 -1
  9. upgini-1.1.279a1/src/upgini/utils/base_search_key_detector.py +25 -0
  10. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/datetime_utils.py +9 -10
  11. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/deduplicate_utils.py +1 -11
  12. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/email_utils.py +0 -5
  13. {upgini-1.1.278a2 → upgini-1.1.279a1/src/upgini.egg-info}/PKG-INFO +1 -1
  14. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/SOURCES.txt +1 -0
  15. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_country_utils.py +4 -4
  16. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_email_utils.py +10 -8
  17. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_etalon_validation.py +2 -21
  18. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_features_enricher.py +2 -11
  19. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_phone_utils.py +6 -6
  20. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_postal_code_utils.py +6 -6
  21. upgini-1.1.278a2/src/upgini/utils/base_search_key_detector.py +0 -27
  22. {upgini-1.1.278a2 → upgini-1.1.279a1}/LICENSE +0 -0
  23. {upgini-1.1.278a2 → upgini-1.1.279a1}/README.md +0 -0
  24. {upgini-1.1.278a2 → upgini-1.1.279a1}/pyproject.toml +0 -0
  25. {upgini-1.1.278a2 → upgini-1.1.279a1}/setup.cfg +0 -0
  26. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/__init__.py +0 -0
  27. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/ads.py +0 -0
  28. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/ads_management/__init__.py +0 -0
  29. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/ads_management/ads_manager.py +0 -0
  30. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/__init__.py +0 -0
  31. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/all_operands.py +0 -0
  32. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/binary.py +0 -0
  33. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/date.py +0 -0
  34. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/feature.py +0 -0
  35. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/groupby.py +0 -0
  36. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/operand.py +0 -0
  37. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/unary.py +0 -0
  38. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/autofe/vector.py +0 -0
  39. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/data_source/__init__.py +0 -0
  40. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/data_source/data_source_publisher.py +0 -0
  41. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/errors.py +0 -0
  42. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/http.py +0 -0
  43. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/mdc/__init__.py +0 -0
  44. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/mdc/context.py +0 -0
  45. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/normalizer/__init__.py +0 -0
  46. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/normalizer/phone_normalizer.py +0 -0
  47. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/__init__.py +0 -0
  48. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/exceptions.py +0 -0
  49. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  50. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/__init__.py +0 -0
  51. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/base.py +0 -0
  52. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/random_under_sampler.py +0 -0
  53. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/sampler/utils.py +0 -0
  54. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/search_task.py +0 -0
  55. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/spinner.py +0 -0
  56. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/__init__.py +0 -0
  57. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/blocked_time_series.py +0 -0
  58. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/country_utils.py +0 -0
  59. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/custom_loss_utils.py +0 -0
  60. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/cv_utils.py +0 -0
  61. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/display_utils.py +0 -0
  62. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  63. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/features_validator.py +0 -0
  64. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/format.py +0 -0
  65. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/ip_utils.py +0 -0
  66. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/phone_utils.py +0 -0
  67. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/postal_code_utils.py +0 -0
  68. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/progress_bar.py +0 -0
  69. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/sklearn_ext.py +0 -0
  70. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/target_utils.py +0 -0
  71. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/track_info.py +0 -0
  72. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/utils/warning_counter.py +0 -0
  73. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini/version_validator.py +0 -0
  74. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/dependency_links.txt +0 -0
  75. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/requires.txt +0 -0
  76. {upgini-1.1.278a2 → upgini-1.1.279a1}/src/upgini.egg-info/top_level.txt +0 -0
  77. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_autofe_operands.py +0 -0
  78. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_binary_dataset.py +0 -0
  79. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_blocked_time_series.py +0 -0
  80. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_categorical_dataset.py +0 -0
  81. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_continuous_dataset.py +0 -0
  82. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_custom_loss_utils.py +0 -0
  83. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_datetime_utils.py +0 -0
  84. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_metrics.py +0 -0
  85. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_target_utils.py +0 -0
  86. {upgini-1.1.278a2 → upgini-1.1.279a1}/tests/test_widget.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.278a2
3
+ Version: 1.1.279a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -40,7 +40,7 @@ def send_log(msg: str):
40
40
 
41
41
 
42
42
  here = Path(__file__).parent.resolve()
43
- version = "1.1.278a2"
43
+ version = "1.1.279a1"
44
44
  try:
45
45
  send_log(f"Start setup PyLib version {version}")
46
46
  setup(
@@ -23,9 +23,7 @@ from pandas.api.types import (
23
23
  from upgini.errors import ValidationError
24
24
  from upgini.http import ProgressStage, SearchProgress, _RestClient
25
25
  from upgini.metadata import (
26
- ENTITY_SYSTEM_RECORD_ID,
27
26
  EVAL_SET_INDEX,
28
- SEARCH_KEY_UNNEST,
29
27
  SYSTEM_COLUMNS,
30
28
  SYSTEM_RECORD_ID,
31
29
  TARGET,
@@ -81,7 +79,6 @@ class Dataset: # (pd.DataFrame):
81
79
  path: Optional[str] = None,
82
80
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
83
81
  search_keys: Optional[List[Tuple[str, ...]]] = None,
84
- unnest_search_keys: Optional[Dict[str, str]] = None,
85
82
  model_task_type: Optional[ModelTaskType] = None,
86
83
  random_state: Optional[int] = None,
87
84
  rest_client: Optional[_RestClient] = None,
@@ -116,7 +113,6 @@ class Dataset: # (pd.DataFrame):
116
113
  self.description = description
117
114
  self.meaning_types = meaning_types
118
115
  self.search_keys = search_keys
119
- self.unnest_search_keys = unnest_search_keys
120
116
  self.ignore_columns = []
121
117
  self.hierarchical_group_keys = []
122
118
  self.hierarchical_subgroup_keys = []
@@ -176,7 +172,7 @@ class Dataset: # (pd.DataFrame):
176
172
  new_columns = []
177
173
  dup_counter = 0
178
174
  for column in self.data.columns:
179
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
175
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
180
176
  self.columns_renaming[column] = column
181
177
  new_columns.append(column)
182
178
  continue
@@ -357,9 +353,7 @@ class Dataset: # (pd.DataFrame):
357
353
 
358
354
  if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
359
355
  try:
360
- self.data[postal_code] = (
361
- self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
362
- )
356
+ self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
363
357
  except Exception:
364
358
  pass
365
359
  elif is_float_dtype(self.data[postal_code]):
@@ -809,9 +803,6 @@ class Dataset: # (pd.DataFrame):
809
803
  meaningType=meaning_type,
810
804
  minMaxValues=min_max_values,
811
805
  )
812
- if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
813
- column_meta.isUnnest = True
814
- column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
815
806
 
816
807
  columns.append(column_meta)
817
808