upgini 1.1.299__tar.gz → 1.1.299a3511.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (66) hide show
  1. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/PKG-INFO +5 -3
  2. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/README.md +2 -2
  3. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/pyproject.toml +3 -0
  4. upgini-1.1.299a3511.dev6/src/upgini/__about__.py +1 -0
  5. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/all_operands.py +26 -7
  6. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/binary.py +91 -2
  7. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/date.py +16 -3
  8. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/feature.py +3 -2
  9. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/unary.py +7 -0
  10. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/dataset.py +2 -11
  11. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/features_enricher.py +101 -222
  12. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/metadata.py +2 -10
  13. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/metrics.py +1 -1
  14. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/resource_bundle/strings.properties +0 -1
  15. upgini-1.1.299a3511.dev6/src/upgini/utils/base_search_key_detector.py +25 -0
  16. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/deduplicate_utils.py +1 -11
  17. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/email_utils.py +0 -5
  18. upgini-1.1.299/src/upgini/__about__.py +0 -1
  19. upgini-1.1.299/src/upgini/utils/base_search_key_detector.py +0 -27
  20. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/.gitignore +0 -0
  21. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/LICENSE +0 -0
  22. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/__init__.py +0 -0
  23. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/ads.py +0 -0
  24. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/ads_management/__init__.py +0 -0
  25. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/ads_management/ads_manager.py +0 -0
  26. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/__init__.py +0 -0
  27. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/groupby.py +0 -0
  28. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/operand.py +0 -0
  29. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/autofe/vector.py +0 -0
  30. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/data_source/__init__.py +0 -0
  31. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/data_source/data_source_publisher.py +0 -0
  32. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/errors.py +0 -0
  33. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/http.py +0 -0
  34. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/lazy_import.py +0 -0
  35. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/mdc/__init__.py +0 -0
  36. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/mdc/context.py +0 -0
  37. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/normalizer/__init__.py +0 -0
  38. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/normalizer/phone_normalizer.py +0 -0
  39. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/resource_bundle/__init__.py +0 -0
  40. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/resource_bundle/exceptions.py +0 -0
  41. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  42. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/sampler/__init__.py +0 -0
  43. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/sampler/base.py +0 -0
  44. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/sampler/random_under_sampler.py +0 -0
  45. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/sampler/utils.py +0 -0
  46. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/search_task.py +0 -0
  47. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/spinner.py +0 -0
  48. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/__init__.py +0 -0
  49. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/blocked_time_series.py +0 -0
  50. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/country_utils.py +0 -0
  51. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/custom_loss_utils.py +0 -0
  52. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/cv_utils.py +0 -0
  53. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/datetime_utils.py +0 -0
  54. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/display_utils.py +0 -0
  55. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/phone_utils.py +0 -0
  60. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/postal_code_utils.py +0 -0
  61. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/progress_bar.py +0 -0
  62. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/sklearn_ext.py +0 -0
  63. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/target_utils.py +0 -0
  64. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/track_info.py +0 -0
  65. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/utils/warning_counter.py +0 -0
  66. {upgini-1.1.299 → upgini-1.1.299a3511.dev6}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.299
3
+ Version: 1.1.299a3511.dev6
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -26,6 +26,8 @@ Requires-Python: <3.11,>=3.8
26
26
  Requires-Dist: catboost>=1.0.3
27
27
  Requires-Dist: fastparquet>=0.8.1
28
28
  Requires-Dist: ipywidgets>=8.1.0
29
+ Requires-Dist: jarowinkler>=2.0.0
30
+ Requires-Dist: levenshtein>=0.25.1
29
31
  Requires-Dist: lightgbm>=3.3.2
30
32
  Requires-Dist: numpy>=1.19.0
31
33
  Requires-Dist: pandas<3.0.0,>=1.1.0
@@ -131,7 +133,7 @@ Description-Content-Type: text/markdown
131
133
  |Consumer Confidence index| 44 |22|-|Monthly|date, country|No
132
134
  |World economic indicators|191 |41|-|Monthly|date, country|No
133
135
  |Markets data|-|17|-|Monthly|date, datetime|No
134
- |World mobile & fixed broadband network coverage and performance |167|-|3|Monthly|country, postal/ZIP code|No
136
+ |World mobile & fixed broadband network coverage and perfomance |167|-|3|Monthly|country, postal/ZIP code|No
135
137
  |World demographic data |90|-|2|Annual|country, postal/ZIP code|No
136
138
  |World house prices |44|-|3|Annual|country, postal/ZIP code|No
137
139
  |Public social media profile data |104|-|-|Monthly|date, email/HEM, phone |Yes
@@ -840,4 +842,4 @@ Some convenient ways to start contributing are:
840
842
  - [More perks for registered users](https://profile.upgini.com)
841
843
 
842
844
  <sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
843
- Please report it here</a></sup>
845
+ Please report it here</a></sup>
@@ -90,7 +90,7 @@
90
90
  |Consumer Confidence index| 44 |22|-|Monthly|date, country|No
91
91
  |World economic indicators|191 |41|-|Monthly|date, country|No
92
92
  |Markets data|-|17|-|Monthly|date, datetime|No
93
- |World mobile & fixed broadband network coverage and performance |167|-|3|Monthly|country, postal/ZIP code|No
93
+ |World mobile & fixed broadband network coverage and perfomance |167|-|3|Monthly|country, postal/ZIP code|No
94
94
  |World demographic data |90|-|2|Annual|country, postal/ZIP code|No
95
95
  |World house prices |44|-|3|Annual|country, postal/ZIP code|No
96
96
  |Public social media profile data |104|-|-|Monthly|date, email/HEM, phone |Yes
@@ -799,4 +799,4 @@ Some convenient ways to start contributing are:
799
799
  - [More perks for registered users](https://profile.upgini.com)
800
800
 
801
801
  <sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
802
- Please report it here</a></sup>
802
+ Please report it here</a></sup>
@@ -49,6 +49,9 @@ dependencies = [
49
49
  "scikit-learn>=1.3.0",
50
50
  "python-bidi==0.4.2",
51
51
  "xhtml2pdf==0.2.11",
52
+ "jarowinkler>=2.0.0",
53
+ "levenshtein>=0.25.1",
54
+ "python-bidi==0.4.2",
52
55
  ]
53
56
 
54
57
  [project.urls]
@@ -0,0 +1 @@
1
+ __version__ = "1.1.299a3511.dev6"
@@ -1,6 +1,20 @@
1
1
  from typing import Dict
2
2
 
3
- from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
3
+ from upgini.autofe.binary import (
4
+ Add,
5
+ Combine,
6
+ CombineThenFreq,
7
+ Distance,
8
+ Divide,
9
+ JaroWinklerSim1,
10
+ JaroWinklerSim2,
11
+ LevenshteinSim,
12
+ Max,
13
+ Min,
14
+ Multiply,
15
+ Sim,
16
+ Subtract,
17
+ )
4
18
  from upgini.autofe.date import (
5
19
  DateDiff,
6
20
  DateDiffType2,
@@ -9,9 +23,9 @@ from upgini.autofe.date import (
9
23
  DatePercentile,
10
24
  DatePercentileMethod2,
11
25
  )
12
- from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
26
+ from upgini.autofe.groupby import GroupByThenAgg, GroupByThenFreq, GroupByThenNUnique, GroupByThenRank
13
27
  from upgini.autofe.operand import Operand
14
- from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
28
+ from upgini.autofe.unary import Abs, Embeddings, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
15
29
  from upgini.autofe.vector import Mean, Sum
16
30
 
17
31
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -39,10 +53,10 @@ ALL_OPERANDS: Dict[str, Operand] = {
39
53
  GroupByThenAgg(name="GroupByThenMedian", agg="median"),
40
54
  GroupByThenAgg(name="GroupByThenStd", output_type="float", agg="std"),
41
55
  GroupByThenRank(),
42
- Operand(name="Combine", has_symmetry_importance=True, output_type="object", is_categorical=True),
43
- Operand(name="CombineThenFreq", has_symmetry_importance=True, output_type="float"),
44
- Operand(name="GroupByThenNUnique", output_type="int", is_vectorizable=True, is_grouping=True),
45
- Operand(name="GroupByThenFreq", output_type="float", is_grouping=True),
56
+ Combine(),
57
+ CombineThenFreq(),
58
+ GroupByThenNUnique(),
59
+ GroupByThenFreq(),
46
60
  Sim(),
47
61
  DateDiff(),
48
62
  DateDiffType2(),
@@ -59,6 +73,11 @@ ALL_OPERANDS: Dict[str, Operand] = {
59
73
  DatePercentile(),
60
74
  DatePercentileMethod2(),
61
75
  Norm(),
76
+ JaroWinklerSim1(),
77
+ JaroWinklerSim2(),
78
+ LevenshteinSim(),
79
+ Distance(),
80
+ Embeddings(),
62
81
  ]
63
82
  }
64
83
 
@@ -1,7 +1,11 @@
1
+ import abc
2
+ from typing import Optional
3
+ import Levenshtein
1
4
  import numpy as np
2
5
  import pandas as pd
3
6
  from numpy import dot
4
7
  from numpy.linalg import norm
8
+ from jarowinkler import jarowinkler_similarity
5
9
 
6
10
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
7
11
 
@@ -130,7 +134,25 @@ class CombineThenFreq(PandasOperand):
130
134
  self._loc(temp, value_counts)
131
135
 
132
136
 
133
- class Sim(PandasOperand):
137
+ class Distance(PandasOperand):
138
+ name = "dist"
139
+ is_binary = True
140
+ output_type = "float"
141
+ is_symmetrical = True
142
+ has_symmetry_importance = True
143
+
144
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
145
+ return pd.Series(
146
+ 1 - self.__dot(left, right) / (self.__dot(left, left) * self.__dot(right, right)), index=left.index
147
+ )
148
+
149
+ # row-wise dot product
150
+ def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
151
+ return (left * right).apply(np.sum)
152
+
153
+
154
+ # Left for backward compatibility
155
+ class Sim(Distance):
134
156
  name = "sim"
135
157
  is_binary = True
136
158
  output_type = "float"
@@ -138,4 +160,71 @@ class Sim(PandasOperand):
138
160
  has_symmetry_importance = True
139
161
 
140
162
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
141
- return dot(left, right) / (norm(left) * norm(right))
163
+ return 1 - super().calculate_binary(left, right)
164
+
165
+
166
+ class StringSim(PandasOperand, abc.ABC):
167
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
168
+ sims = []
169
+ for i in left.index:
170
+ left_i = self._prepare_value(left.get(i))
171
+ right_i = self._prepare_value(right.get(i))
172
+ if left_i is not None and right_i is not None:
173
+ sims.append(self._similarity(left_i, right_i))
174
+ else:
175
+ sims.append(None)
176
+
177
+ return pd.Series(sims, index=left.index)
178
+
179
+ @abc.abstractmethod
180
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
181
+ pass
182
+
183
+ @abc.abstractmethod
184
+ def _similarity(self, left: str, right: str) -> float:
185
+ pass
186
+
187
+
188
+ class JaroWinklerSim1(StringSim):
189
+ name = "sim_jw1"
190
+ is_binary = True
191
+ input_type = "string"
192
+ output_type = "float"
193
+ is_symmetrical = True
194
+ has_symmetry_importance = True
195
+
196
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
197
+ return value
198
+
199
+ def _similarity(self, left: str, right: str) -> float:
200
+ return jarowinkler_similarity(left, right)
201
+
202
+
203
+ class JaroWinklerSim2(StringSim):
204
+ name = "sim_jw2"
205
+ is_binary = True
206
+ input_type = "string"
207
+ output_type = "float"
208
+ is_symmetrical = True
209
+ has_symmetry_importance = True
210
+
211
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
212
+ return value[::-1] if value is not None else None
213
+
214
+ def _similarity(self, left: str, right: str) -> float:
215
+ return jarowinkler_similarity(left, right)
216
+
217
+
218
+ class LevenshteinSim(StringSim):
219
+ name = "sim_lv"
220
+ is_binary = True
221
+ input_type = "string"
222
+ output_type = "float"
223
+ is_symmetrical = True
224
+ has_symmetry_importance = True
225
+
226
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
227
+ return value
228
+
229
+ def _similarity(self, left: str, right: str) -> float:
230
+ return 1 - Levenshtein.distance(left, right) / max(len(left), len(right))
@@ -43,6 +43,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
43
43
  is_binary = True
44
44
  has_symmetry_importance = True
45
45
 
46
+ replace_negative: bool = False
47
+
46
48
  def get_params(self) -> Dict[str, Optional[str]]:
47
49
  res = super().get_params()
48
50
  res.update(
@@ -50,6 +52,7 @@ class DateDiff(PandasOperand, DateDiffMixin):
50
52
  "diff_unit": self.diff_unit,
51
53
  "left_unit": self.left_unit,
52
54
  "right_unit": self.right_unit,
55
+ "replace_negative": self.replace_negative,
53
56
  }
54
57
  )
55
58
  return res
@@ -61,7 +64,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
61
64
  return self.__replace_negative(diff)
62
65
 
63
66
  def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
64
- x[x < 0] = None
67
+ if self.replace_negative:
68
+ x[x < 0] = None
65
69
  return x
66
70
 
67
71
 
@@ -101,13 +105,19 @@ _ext_aggregations = {"nunique": (lambda x: len(np.unique(x)), 0), "count": (len,
101
105
  class DateListDiff(PandasOperand, DateDiffMixin):
102
106
  is_binary = True
103
107
  has_symmetry_importance = True
108
+
104
109
  aggregation: str
110
+ replace_negative: bool = False
105
111
 
106
112
  def get_params(self) -> Dict[str, Optional[str]]:
107
113
  res = super().get_params()
108
114
  res.update(
109
115
  {
110
116
  "aggregation": self.aggregation,
117
+ "diff_unit": self.diff_unit,
118
+ "left_unit": self.left_unit,
119
+ "right_unit": self.right_unit,
120
+ "replace_negative": self.replace_negative,
111
121
  }
112
122
  )
113
123
  return res
@@ -125,7 +135,7 @@ class DateListDiff(PandasOperand, DateDiffMixin):
125
135
 
126
136
  def _diff(self, x: TimedeltaArray):
127
137
  x = self._convert_diff_to_unit(x)
128
- return x[x > 0]
138
+ return x[x > 0] if self.replace_negative else x
129
139
 
130
140
  def _agg(self, x):
131
141
  method = getattr(np, self.aggregation, None)
@@ -157,7 +167,10 @@ class DateListDiffBounded(DateListDiff):
157
167
  super().__init__(**data)
158
168
 
159
169
  def _agg(self, x):
160
- x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
170
+ x = x[
171
+ (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
172
+ & (x < (self.upper_bound if self.upper_bound is not None else np.inf))
173
+ ]
161
174
  return super()._agg(x)
162
175
 
163
176
 
@@ -140,8 +140,9 @@ class Feature:
140
140
 
141
141
  if self.alias:
142
142
  components = ["f_autofe", self.alias]
143
- elif shorten and not self.op.is_unary:
144
- components = ["f_autofe", self.get_op_display_name()]
143
+ elif shorten and not (self.op.is_unary and all(isinstance(c, Column) for c in self.children)):
144
+ prev_name = [self.children[0].get_op_display_name()] if self.op.is_unary else []
145
+ components = ["f_autofe"] + prev_name + [self.get_op_display_name()]
145
146
  else:
146
147
  components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
147
148
  "autofe",
@@ -125,3 +125,10 @@ class Norm(PandasOperand):
125
125
  normalized_data = pd.Series(normalized_data[:, 0], index=data_dropna.index, name=data.name)
126
126
  normalized_data = normalized_data.reindex(data.index)
127
127
  return normalized_data
128
+
129
+
130
+ class Embeddings(PandasOperand):
131
+ name = "emb"
132
+ is_unary = True
133
+ input_type = "string"
134
+ output_type = "vector"
@@ -23,9 +23,7 @@ from pandas.api.types import (
23
23
  from upgini.errors import ValidationError
24
24
  from upgini.http import ProgressStage, SearchProgress, _RestClient
25
25
  from upgini.metadata import (
26
- ENTITY_SYSTEM_RECORD_ID,
27
26
  EVAL_SET_INDEX,
28
- SEARCH_KEY_UNNEST,
29
27
  SYSTEM_COLUMNS,
30
28
  SYSTEM_RECORD_ID,
31
29
  TARGET,
@@ -81,7 +79,6 @@ class Dataset: # (pd.DataFrame):
81
79
  path: Optional[str] = None,
82
80
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
83
81
  search_keys: Optional[List[Tuple[str, ...]]] = None,
84
- unnest_search_keys: Optional[Dict[str, str]] = None,
85
82
  model_task_type: Optional[ModelTaskType] = None,
86
83
  random_state: Optional[int] = None,
87
84
  rest_client: Optional[_RestClient] = None,
@@ -116,7 +113,6 @@ class Dataset: # (pd.DataFrame):
116
113
  self.description = description
117
114
  self.meaning_types = meaning_types
118
115
  self.search_keys = search_keys
119
- self.unnest_search_keys = unnest_search_keys
120
116
  self.ignore_columns = []
121
117
  self.hierarchical_group_keys = []
122
118
  self.hierarchical_subgroup_keys = []
@@ -176,7 +172,7 @@ class Dataset: # (pd.DataFrame):
176
172
  new_columns = []
177
173
  dup_counter = 0
178
174
  for column in self.data.columns:
179
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
175
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
180
176
  self.columns_renaming[column] = column
181
177
  new_columns.append(column)
182
178
  continue
@@ -357,9 +353,7 @@ class Dataset: # (pd.DataFrame):
357
353
 
358
354
  if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
359
355
  try:
360
- self.data[postal_code] = (
361
- self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
362
- )
356
+ self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
363
357
  except Exception:
364
358
  pass
365
359
  elif is_float_dtype(self.data[postal_code]):
@@ -809,9 +803,6 @@ class Dataset: # (pd.DataFrame):
809
803
  meaningType=meaning_type,
810
804
  minMaxValues=min_max_values,
811
805
  )
812
- if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
813
- column_meta.isUnnest = True
814
- column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
815
806
 
816
807
  columns.append(column_meta)
817
808