upgini 1.1.275a1__tar.gz → 1.1.275a99__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (86) hide show
  1. {upgini-1.1.275a1/src/upgini.egg-info → upgini-1.1.275a99}/PKG-INFO +2 -2
  2. {upgini-1.1.275a1 → upgini-1.1.275a99}/setup.py +2 -2
  3. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/date.py +9 -2
  4. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/data_source/data_source_publisher.py +1 -1
  5. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/dataset.py +2 -10
  6. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/features_enricher.py +150 -218
  7. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/metadata.py +1 -9
  8. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/metrics.py +12 -0
  9. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/normalizer/phone_normalizer.py +2 -2
  10. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/resource_bundle/strings.properties +2 -2
  11. upgini-1.1.275a99/src/upgini/utils/base_search_key_detector.py +25 -0
  12. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/datetime_utils.py +3 -0
  13. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/deduplicate_utils.py +1 -11
  14. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/email_utils.py +0 -5
  15. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/features_validator.py +2 -1
  16. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/track_info.py +25 -13
  17. {upgini-1.1.275a1 → upgini-1.1.275a99/src/upgini.egg-info}/PKG-INFO +2 -2
  18. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini.egg-info/requires.txt +1 -1
  19. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_autofe_operands.py +2 -1
  20. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_country_utils.py +4 -4
  21. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_email_utils.py +10 -8
  22. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_etalon_validation.py +2 -21
  23. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_features_enricher.py +18 -23
  24. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_phone_utils.py +6 -6
  25. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_postal_code_utils.py +6 -6
  26. upgini-1.1.275a1/src/upgini/utils/base_search_key_detector.py +0 -27
  27. {upgini-1.1.275a1 → upgini-1.1.275a99}/LICENSE +0 -0
  28. {upgini-1.1.275a1 → upgini-1.1.275a99}/README.md +0 -0
  29. {upgini-1.1.275a1 → upgini-1.1.275a99}/pyproject.toml +0 -0
  30. {upgini-1.1.275a1 → upgini-1.1.275a99}/setup.cfg +0 -0
  31. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/__init__.py +0 -0
  32. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/ads.py +0 -0
  33. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/ads_management/__init__.py +0 -0
  34. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/ads_management/ads_manager.py +0 -0
  35. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/__init__.py +0 -0
  36. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/all_operands.py +0 -0
  37. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/binary.py +0 -0
  38. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/feature.py +0 -0
  39. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/groupby.py +0 -0
  40. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/operand.py +0 -0
  41. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/unary.py +0 -0
  42. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/autofe/vector.py +0 -0
  43. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/data_source/__init__.py +0 -0
  44. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/errors.py +0 -0
  45. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/fingerprint.js +0 -0
  46. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/http.py +0 -0
  47. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/mdc/__init__.py +0 -0
  48. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/mdc/context.py +0 -0
  49. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/normalizer/__init__.py +0 -0
  50. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/resource_bundle/__init__.py +0 -0
  51. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/resource_bundle/exceptions.py +0 -0
  52. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  53. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/sampler/__init__.py +0 -0
  54. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/sampler/base.py +0 -0
  55. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/sampler/random_under_sampler.py +0 -0
  56. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/sampler/utils.py +0 -0
  57. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/search_task.py +0 -0
  58. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/spinner.py +0 -0
  59. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/__init__.py +0 -0
  60. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/blocked_time_series.py +0 -0
  61. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/country_utils.py +0 -0
  62. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/custom_loss_utils.py +0 -0
  63. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/cv_utils.py +0 -0
  64. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/display_utils.py +0 -0
  65. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/fallback_progress_bar.py +0 -0
  66. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/format.py +0 -0
  67. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/ip_utils.py +0 -0
  68. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/phone_utils.py +0 -0
  69. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/postal_code_utils.py +0 -0
  70. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/progress_bar.py +0 -0
  71. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/sklearn_ext.py +0 -0
  72. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/target_utils.py +0 -0
  73. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/utils/warning_counter.py +0 -0
  74. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini/version_validator.py +0 -0
  75. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini.egg-info/SOURCES.txt +0 -0
  76. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini.egg-info/dependency_links.txt +0 -0
  77. {upgini-1.1.275a1 → upgini-1.1.275a99}/src/upgini.egg-info/top_level.txt +0 -0
  78. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_binary_dataset.py +0 -0
  79. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_blocked_time_series.py +0 -0
  80. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_categorical_dataset.py +0 -0
  81. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_continuous_dataset.py +0 -0
  82. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_custom_loss_utils.py +0 -0
  83. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_datetime_utils.py +0 -0
  84. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_metrics.py +0 -0
  85. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_target_utils.py +0 -0
  86. {upgini-1.1.275a1 → upgini-1.1.275a99}/tests/test_widget.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.275a1
3
+ Version: 1.1.275a99
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -28,7 +28,7 @@ Description-Content-Type: text/markdown
28
28
  License-File: LICENSE
29
29
  Requires-Dist: python-dateutil>=2.8.0
30
30
  Requires-Dist: requests>=2.8.0
31
- Requires-Dist: pandas<2.0.0,>=1.1.0
31
+ Requires-Dist: pandas<3.0.0,>=1.1.0
32
32
  Requires-Dist: numpy>=1.19.0
33
33
  Requires-Dist: scikit-learn>=1.3.0
34
34
  Requires-Dist: pydantic<2.0.0,>=1.8.2
@@ -40,7 +40,7 @@ def send_log(msg: str):
40
40
 
41
41
 
42
42
  here = Path(__file__).parent.resolve()
43
- version = "1.1.275a1"
43
+ version = "1.1.275a99"
44
44
  try:
45
45
  send_log(f"Start setup PyLib version {version}")
46
46
  setup(
@@ -77,7 +77,7 @@ try:
77
77
  install_requires=[
78
78
  "python-dateutil>=2.8.0",
79
79
  "requests>=2.8.0",
80
- "pandas>=1.1.0,<2.0.0",
80
+ "pandas>=1.1.0,<3.0.0",
81
81
  "numpy>=1.19.0",
82
82
  "scikit-learn>=1.3.0",
83
83
  "pydantic>=1.8.2,<2.0.0",
@@ -2,6 +2,7 @@ from typing import Any, Optional, Union
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
  from pydantic import BaseModel
5
+ from pandas.core.arrays.timedeltas import TimedeltaArray
5
6
 
6
7
  from upgini.autofe.operand import PandasOperand
7
8
 
@@ -46,6 +47,7 @@ class DateDiffType2(PandasOperand, DateDiffMixin):
46
47
  future = right + (left.dt.year - right.dt.year).apply(
47
48
  lambda y: np.datetime64("NaT") if np.isnan(y) else pd.tseries.offsets.DateOffset(years=y)
48
49
  )
50
+ future = pd.to_datetime(future)
49
51
  before = future[future < left]
50
52
  future[future < left] = before + pd.tseries.offsets.DateOffset(years=1)
51
53
  diff = (future - left) / np.timedelta64(1, self.diff_unit)
@@ -72,8 +74,13 @@ class DateListDiff(PandasOperand, DateDiffMixin):
72
74
 
73
75
  return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
74
76
 
75
- def _diff(self, x):
76
- x = x / np.timedelta64(1, self.diff_unit)
77
+ def _diff(self, x: TimedeltaArray):
78
+ if self.diff_unit == "Y":
79
+ x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
80
+ elif self.diff_unit == "M":
81
+ raise Exception("Unsupported difference unit: Month")
82
+ else:
83
+ x = x / np.timedelta64(1, self.diff_unit)
77
84
  return x[x > 0]
78
85
 
79
86
  def _agg(self, x):
@@ -48,6 +48,7 @@ class DataSourcePublisher:
48
48
  data_table_uri: str,
49
49
  search_keys: Dict[str, SearchKey],
50
50
  update_frequency: str,
51
+ exclude_from_autofe_generation: Optional[List[str]],
51
52
  secondary_search_keys: Optional[Dict[str, SearchKey]] = None,
52
53
  sort_column: Optional[str] = None,
53
54
  date_format: Optional[str] = None,
@@ -57,7 +58,6 @@ class DataSourcePublisher:
57
58
  join_date_abs_limit_days: Optional[int] = None,
58
59
  features_for_embeddings: Optional[List[str]] = DEFAULT_GENERATE_EMBEDDINGS,
59
60
  data_table_id_to_replace: Optional[str] = None,
60
- exclude_from_autofe_generation: Optional[List[str]] = None,
61
61
  _force_generation=False,
62
62
  _silent=False,
63
63
  ) -> str:
@@ -22,9 +22,7 @@ from pandas.api.types import (
22
22
  from upgini.errors import ValidationError
23
23
  from upgini.http import ProgressStage, SearchProgress, _RestClient
24
24
  from upgini.metadata import (
25
- ENTITY_SYSTEM_RECORD_ID,
26
25
  EVAL_SET_INDEX,
27
- SEARCH_KEY_UNNEST,
28
26
  SYSTEM_COLUMNS,
29
27
  SYSTEM_RECORD_ID,
30
28
  TARGET,
@@ -80,7 +78,6 @@ class Dataset: # (pd.DataFrame):
80
78
  path: Optional[str] = None,
81
79
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
82
80
  search_keys: Optional[List[Tuple[str, ...]]] = None,
83
- unnest_search_keys: Optional[List[str]] = None,
84
81
  model_task_type: Optional[ModelTaskType] = None,
85
82
  random_state: Optional[int] = None,
86
83
  rest_client: Optional[_RestClient] = None,
@@ -115,7 +112,6 @@ class Dataset: # (pd.DataFrame):
115
112
  self.description = description
116
113
  self.meaning_types = meaning_types
117
114
  self.search_keys = search_keys
118
- self.unnest_search_keys = unnest_search_keys
119
115
  self.ignore_columns = []
120
116
  self.hierarchical_group_keys = []
121
117
  self.hierarchical_subgroup_keys = []
@@ -175,7 +171,7 @@ class Dataset: # (pd.DataFrame):
175
171
  new_columns = []
176
172
  dup_counter = 0
177
173
  for column in self.data.columns:
178
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
174
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
179
175
  self.columns_renaming[column] = column
180
176
  new_columns.append(column)
181
177
  continue
@@ -356,9 +352,7 @@ class Dataset: # (pd.DataFrame):
356
352
 
357
353
  if is_string_dtype(self.data[postal_code]):
358
354
  try:
359
- self.data[postal_code] = (
360
- self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
361
- )
355
+ self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
362
356
  except Exception:
363
357
  pass
364
358
  elif is_float_dtype(self.data[postal_code]):
@@ -808,8 +802,6 @@ class Dataset: # (pd.DataFrame):
808
802
  meaningType=meaning_type,
809
803
  minMaxValues=min_max_values,
810
804
  )
811
- if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
812
- column_meta.isUnnest = True
813
805
 
814
806
  columns.append(column_meta)
815
807