upgini 1.1.290a3232.post1__tar.gz → 1.1.291__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (64) hide show
  1. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/PKG-INFO +1 -1
  2. upgini-1.1.291/src/upgini/__about__.py +1 -0
  3. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/all_operands.py +2 -2
  4. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/binary.py +7 -0
  5. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/operand.py +1 -0
  6. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/unary.py +8 -7
  7. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/datetime_utils.py +15 -8
  8. upgini-1.1.290a3232.post1/src/upgini/__about__.py +0 -1
  9. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/.gitignore +0 -0
  10. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/LICENSE +0 -0
  11. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/README.md +0 -0
  12. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/pyproject.toml +0 -0
  13. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/__init__.py +0 -0
  14. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/ads.py +0 -0
  15. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/ads_management/__init__.py +0 -0
  16. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/ads_management/ads_manager.py +0 -0
  17. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/__init__.py +0 -0
  18. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/vector.py +0 -0
  22. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/dataset.py +0 -0
  25. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/errors.py +0 -0
  26. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/features_enricher.py +0 -0
  27. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/http.py +0 -0
  28. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/metadata.py +0 -0
  31. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/metrics.py +0 -0
  32. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/normalizer/phone_normalizer.py +0 -0
  34. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/search_task.py +0 -0
  43. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/spinner.py +0 -0
  44. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/deduplicate_utils.py +0 -0
  51. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/display_utils.py +0 -0
  52. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/email_utils.py +0 -0
  53. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/fallback_progress_bar.py +0 -0
  54. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/features_validator.py +0 -0
  55. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/format.py +0 -0
  56. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/ip_utils.py +0 -0
  57. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/phone_utils.py +0 -0
  58. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/postal_code_utils.py +0 -0
  59. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/progress_bar.py +0 -0
  60. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/sklearn_ext.py +0 -0
  61. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/target_utils.py +0 -0
  62. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/track_info.py +0 -0
  63. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/warning_counter.py +0 -0
  64. {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.290a3232.post1
3
+ Version: 1.1.291
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.291"
@@ -4,7 +4,7 @@ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
4
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
5
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
6
6
  from upgini.autofe.operand import Operand
7
- from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Scale, Sigmoid, Sqrt, Square
7
+ from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
8
8
  from upgini.autofe.vector import Mean, Sum
9
9
 
10
10
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -50,7 +50,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
50
50
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
51
51
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
52
52
  DatePercentile(),
53
- Scale(),
53
+ Norm(),
54
54
  ]
55
55
  }
56
56
 
@@ -9,6 +9,7 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
9
9
  class Min(PandasOperand):
10
10
  name = "min"
11
11
  is_binary = True
12
+ is_symmetrical = True
12
13
  has_symmetry_importance = True
13
14
 
14
15
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
@@ -18,6 +19,7 @@ class Min(PandasOperand):
18
19
  class Max(PandasOperand):
19
20
  name = "max"
20
21
  is_binary = True
22
+ is_symmetrical = True
21
23
  has_symmetry_importance = True
22
24
 
23
25
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
@@ -28,6 +30,7 @@ class Add(PandasOperand, VectorizableMixin):
28
30
  name = "+"
29
31
  alias = "add"
30
32
  is_binary = True
33
+ is_symmetrical = True
31
34
  has_symmetry_importance = True
32
35
  is_vectorizable = True
33
36
 
@@ -46,6 +49,7 @@ class Subtract(PandasOperand, VectorizableMixin):
46
49
  name = "-"
47
50
  alias = "sub"
48
51
  is_binary = True
52
+ is_symmetrical = True
49
53
  has_symmetry_importance = True
50
54
  is_vectorizable = True
51
55
 
@@ -64,6 +68,7 @@ class Multiply(PandasOperand, VectorizableMixin):
64
68
  name = "*"
65
69
  alias = "mul"
66
70
  is_binary = True
71
+ is_symmetrical = True
67
72
  has_symmetry_importance = True
68
73
  is_vectorizable = True
69
74
 
@@ -112,6 +117,7 @@ class Combine(PandasOperand):
112
117
  class CombineThenFreq(PandasOperand):
113
118
  name = "CombineThenFreq"
114
119
  is_binary = True
120
+ is_symmetrical = True
115
121
  has_symmetry_importance = True
116
122
  output_type = "float"
117
123
  is_distribution_dependent = True
@@ -128,6 +134,7 @@ class Sim(PandasOperand):
128
134
  name = "sim"
129
135
  is_binary = True
130
136
  output_type = "float"
137
+ is_symmetrical = True
131
138
  has_symmetry_importance = True
132
139
 
133
140
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
@@ -10,6 +10,7 @@ class Operand(BaseModel):
10
10
  name: str
11
11
  alias: Optional[str]
12
12
  is_unary: bool = False
13
+ is_symmetrical: bool = False
13
14
  has_symmetry_importance: bool = False
14
15
  input_type: Optional[str]
15
16
  output_type: Optional[str]
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
- from sklearn.preprocessing import robust_scale
3
+ from sklearn.preprocessing import Normalizer
4
4
 
5
5
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
6
6
 
@@ -114,13 +114,14 @@ class Freq(PandasOperand):
114
114
  return self._loc(data, value_counts)
115
115
 
116
116
 
117
- class Scale(PandasOperand, VectorizableMixin):
118
- name = "scale"
117
+ class Norm(PandasOperand):
118
+ name = "norm"
119
119
  is_unary = True
120
120
  output_type = "float"
121
121
 
122
122
  def calculate_unary(self, data: pd.Series) -> pd.Series:
123
- return pd.Series(robust_scale(data), index=data.index, name=data.name)
124
-
125
- def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
126
- return pd.DataFrame(robust_scale(data), index=data.index, columns=data.columns)
123
+ data_dropna = data.dropna()
124
+ normalized_data = Normalizer().transform(data_dropna.to_frame().T).T
125
+ normalized_data = pd.Series(normalized_data[:, 0], index=data_dropna.index, name=data.name)
126
+ normalized_data = normalized_data.reindex(data.index)
127
+ return normalized_data
@@ -82,19 +82,19 @@ class DateTimeSearchKeyConverter:
82
82
  elif isinstance(df[self.date_column].values[0], datetime.date):
83
83
  df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
84
84
  elif is_period_dtype(df[self.date_column]):
85
- df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
85
+ df[self.date_column] = df[self.date_column].dt.to_timestamp()
86
86
  elif is_numeric_dtype(df[self.date_column]):
87
87
  # 315532801 - 2524608001 - seconds
88
88
  # 315532801000 - 2524608001000 - milliseconds
89
89
  # 315532801000000 - 2524608001000000 - microseconds
90
90
  # 315532801000000000 - 2524608001000000000 - nanoseconds
91
- if df[self.date_column].apply(lambda x: 10**16 < x).all():
91
+ if df[self.date_column].apply(lambda x: 10 ** 16 < x).all():
92
92
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
93
- elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
93
+ elif df[self.date_column].apply(lambda x: 10 ** 14 < x < 10 ** 16).all():
94
94
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
95
- elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
95
+ elif df[self.date_column].apply(lambda x: 10 ** 11 < x < 10 ** 14).all():
96
96
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
97
- elif df[self.date_column].apply(lambda x: 0 < x < 10 * 11).all():
97
+ elif df[self.date_column].apply(lambda x: 0 < x < 10 ** 11).all():
98
98
  df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
99
99
  else:
100
100
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
@@ -185,7 +185,10 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
185
185
  def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
186
186
  df = df.copy()
187
187
  seconds = "datetime_seconds"
188
- df[date_col] = pd.to_datetime(df[date_col])
188
+ if is_period_dtype(df[date_col]):
189
+ df[date_col] = df[date_col].dt.to_timestamp()
190
+ else:
191
+ df[date_col] = pd.to_datetime(df[date_col])
189
192
  df[date_col] = df[date_col].dt.tz_localize(None)
190
193
  df[seconds] = (df[date_col] - df[date_col].dt.floor("D")).dt.seconds
191
194
 
@@ -248,7 +251,9 @@ def validate_dates_distribution(
248
251
  if col in search_keys:
249
252
  continue
250
253
  try:
251
- if pd.__version__ >= "2.0.0":
254
+ if is_period_dtype(X[col]):
255
+ pass
256
+ elif pd.__version__ >= "2.0.0":
252
257
  # Format mixed to avoid massive warnings
253
258
  pd.to_datetime(X[col], format="mixed")
254
259
  else:
@@ -261,7 +266,9 @@ def validate_dates_distribution(
261
266
  if maybe_date_col is None:
262
267
  return
263
268
 
264
- if pd.__version__ >= "2.0.0":
269
+ if is_period_dtype(X[maybe_date_col]):
270
+ dates = X[maybe_date_col].dt.to_timestamp().dt.date
271
+ elif pd.__version__ >= "2.0.0":
265
272
  dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
266
273
  else:
267
274
  dates = pd.to_datetime(X[maybe_date_col]).dt.date
@@ -1 +0,0 @@
1
- __version__ = "1.1.290a3232-1"
File without changes
File without changes
File without changes