upgini 1.1.309__tar.gz → 1.1.309a3511.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (65) hide show
  1. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/PKG-INFO +3 -1
  2. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/pyproject.toml +3 -0
  3. upgini-1.1.309a3511.dev1/src/upgini/__about__.py +1 -0
  4. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/all_operands.py +33 -7
  5. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/binary.py +93 -2
  6. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/date.py +16 -3
  7. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/feature.py +24 -11
  8. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/unary.py +7 -0
  9. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/data_source/data_source_publisher.py +5 -21
  10. upgini-1.1.309/src/upgini/__about__.py +0 -1
  11. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/.gitignore +0 -0
  12. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/LICENSE +0 -0
  13. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/README.md +0 -0
  14. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/__init__.py +0 -0
  15. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/ads.py +0 -0
  16. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/ads_management/__init__.py +0 -0
  17. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/ads_management/ads_manager.py +0 -0
  18. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/__init__.py +0 -0
  19. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/autofe/vector.py +0 -0
  22. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/dataset.py +0 -0
  24. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/errors.py +0 -0
  25. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/features_enricher.py +0 -0
  26. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/http.py +0 -0
  27. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/metadata.py +0 -0
  31. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/metrics.py +0 -0
  32. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/normalizer/phone_normalizer.py +0 -0
  34. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/search_task.py +0 -0
  43. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/spinner.py +0 -0
  44. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/datetime_utils.py +0 -0
  51. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
  52. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/display_utils.py +0 -0
  53. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/email_utils.py +0 -0
  54. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  55. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/features_validator.py +0 -0
  56. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/format.py +0 -0
  57. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/ip_utils.py +0 -0
  58. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/phone_utils.py +0 -0
  59. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/sklearn_ext.py +0 -0
  62. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/target_utils.py +0 -0
  63. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.1.309 → upgini-1.1.309a3511.dev1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.309
3
+ Version: 1.1.309a3511.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -26,6 +26,8 @@ Requires-Python: <3.11,>=3.8
26
26
  Requires-Dist: catboost>=1.0.3
27
27
  Requires-Dist: fastparquet>=0.8.1
28
28
  Requires-Dist: ipywidgets>=8.1.0
29
+ Requires-Dist: jarowinkler>=2.0.0
30
+ Requires-Dist: levenshtein>=0.25.1
29
31
  Requires-Dist: lightgbm>=3.3.2
30
32
  Requires-Dist: numpy>=1.19.0
31
33
  Requires-Dist: pandas<3.0.0,>=1.1.0
@@ -49,6 +49,9 @@ dependencies = [
49
49
  "scikit-learn>=1.3.0",
50
50
  "python-bidi==0.4.2",
51
51
  "xhtml2pdf==0.2.11",
52
+ "jarowinkler>=2.0.0",
53
+ "levenshtein>=0.25.1",
54
+ "python-bidi==0.4.2",
52
55
  ]
53
56
 
54
57
  [project.urls]
@@ -0,0 +1 @@
1
+ __version__ = "1.1.309a3511.dev1"
@@ -1,17 +1,38 @@
1
1
  from typing import Dict
2
2
 
3
- from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
3
+ from upgini.autofe.binary import (
4
+ Add,
5
+ Combine,
6
+ CombineThenFreq,
7
+ Distance,
8
+ Divide,
9
+ JaroWinklerSim1,
10
+ JaroWinklerSim2,
11
+ LevenshteinSim,
12
+ Max,
13
+ Min,
14
+ Multiply,
15
+ Sim,
16
+ Subtract,
17
+ )
4
18
  from upgini.autofe.date import (
19
+ (
5
20
  DateDiff,
21
+
6
22
  DateDiffType2,
23
+
7
24
  DateListDiff,
25
+
8
26
  DateListDiffBounded,
9
27
  DatePercentile,
28
+
29
+ DatePercentileMethod2,
30
+ ),
10
31
  DatePercentileMethod2,
11
32
  )
12
- from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
33
+ from upgini.autofe.groupby import GroupByThenAgg, GroupByThenFreq, GroupByThenNUnique, GroupByThenRank
13
34
  from upgini.autofe.operand import Operand
14
- from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
35
+ from upgini.autofe.unary import Abs, Embeddings, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
15
36
  from upgini.autofe.vector import Mean, Sum
16
37
 
17
38
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -39,10 +60,10 @@ ALL_OPERANDS: Dict[str, Operand] = {
39
60
  GroupByThenAgg(name="GroupByThenMedian", agg="median"),
40
61
  GroupByThenAgg(name="GroupByThenStd", output_type="float", agg="std"),
41
62
  GroupByThenRank(),
42
- Operand(name="Combine", has_symmetry_importance=True, output_type="object", is_categorical=True),
43
- Operand(name="CombineThenFreq", has_symmetry_importance=True, output_type="float"),
44
- Operand(name="GroupByThenNUnique", output_type="int", is_vectorizable=True, is_grouping=True),
45
- Operand(name="GroupByThenFreq", output_type="float", is_grouping=True),
63
+ Combine(),
64
+ CombineThenFreq(),
65
+ GroupByThenNUnique(),
66
+ GroupByThenFreq(),
46
67
  Sim(),
47
68
  DateDiff(),
48
69
  DateDiffType2(),
@@ -59,6 +80,11 @@ ALL_OPERANDS: Dict[str, Operand] = {
59
80
  DatePercentile(),
60
81
  DatePercentileMethod2(),
61
82
  Norm(),
83
+ JaroWinklerSim1(),
84
+ JaroWinklerSim2(),
85
+ LevenshteinSim(),
86
+ Distance(),
87
+ Embeddings(),
62
88
  ]
63
89
  }
64
90
 
@@ -1,7 +1,11 @@
1
+ import abc
2
+ from typing import Optional
3
+ import Levenshtein
1
4
  import numpy as np
2
5
  import pandas as pd
3
6
  from numpy import dot
4
7
  from numpy.linalg import norm
8
+ from jarowinkler import jarowinkler_similarity
5
9
 
6
10
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
7
11
 
@@ -130,7 +134,27 @@ class CombineThenFreq(PandasOperand):
130
134
  self._loc(temp, value_counts)
131
135
 
132
136
 
133
- class Sim(PandasOperand):
137
+ class Distance(PandasOperand):
138
+ name = "dist"
139
+ is_binary = True
140
+ output_type = "float"
141
+ is_symmetrical = True
142
+ has_symmetry_importance = True
143
+
144
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
145
+ return pd.Series(
146
+ 1 - self.__dot(left, right) / (self.__dot(left, left) * self.__dot(right, right)), index=left.index
147
+ )
148
+
149
+ # row-wise dot product
150
+ def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
151
+ res = (left.dropna() * right.dropna()).apply(np.sum)
152
+ res = res.reindex(left.index.union(right.index))
153
+ return res
154
+
155
+
156
+ # Left for backward compatibility
157
+ class Sim(Distance):
134
158
  name = "sim"
135
159
  is_binary = True
136
160
  output_type = "float"
@@ -138,4 +162,71 @@ class Sim(PandasOperand):
138
162
  has_symmetry_importance = True
139
163
 
140
164
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
141
- return dot(left, right) / (norm(left) * norm(right))
165
+ return 1 - super().calculate_binary(left, right)
166
+
167
+
168
+ class StringSim(PandasOperand, abc.ABC):
169
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
170
+ sims = []
171
+ for i in left.index:
172
+ left_i = self._prepare_value(left.get(i))
173
+ right_i = self._prepare_value(right.get(i))
174
+ if left_i is not None and right_i is not None:
175
+ sims.append(self._similarity(left_i, right_i))
176
+ else:
177
+ sims.append(None)
178
+
179
+ return pd.Series(sims, index=left.index)
180
+
181
+ @abc.abstractmethod
182
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
183
+ pass
184
+
185
+ @abc.abstractmethod
186
+ def _similarity(self, left: str, right: str) -> float:
187
+ pass
188
+
189
+
190
+ class JaroWinklerSim1(StringSim):
191
+ name = "sim_jw1"
192
+ is_binary = True
193
+ input_type = "string"
194
+ output_type = "float"
195
+ is_symmetrical = True
196
+ has_symmetry_importance = True
197
+
198
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
199
+ return value
200
+
201
+ def _similarity(self, left: str, right: str) -> float:
202
+ return jarowinkler_similarity(left, right)
203
+
204
+
205
+ class JaroWinklerSim2(StringSim):
206
+ name = "sim_jw2"
207
+ is_binary = True
208
+ input_type = "string"
209
+ output_type = "float"
210
+ is_symmetrical = True
211
+ has_symmetry_importance = True
212
+
213
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
214
+ return value[::-1] if value is not None else None
215
+
216
+ def _similarity(self, left: str, right: str) -> float:
217
+ return jarowinkler_similarity(left, right)
218
+
219
+
220
+ class LevenshteinSim(StringSim):
221
+ name = "sim_lv"
222
+ is_binary = True
223
+ input_type = "string"
224
+ output_type = "float"
225
+ is_symmetrical = True
226
+ has_symmetry_importance = True
227
+
228
+ def _prepare_value(self, value: Optional[str]) -> Optional[str]:
229
+ return value
230
+
231
+ def _similarity(self, left: str, right: str) -> float:
232
+ return 1 - Levenshtein.distance(left, right) / max(len(left), len(right))
@@ -43,6 +43,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
43
43
  is_binary = True
44
44
  has_symmetry_importance = True
45
45
 
46
+ replace_negative: bool = False
47
+
46
48
  def get_params(self) -> Dict[str, Optional[str]]:
47
49
  res = super().get_params()
48
50
  res.update(
@@ -50,6 +52,7 @@ class DateDiff(PandasOperand, DateDiffMixin):
50
52
  "diff_unit": self.diff_unit,
51
53
  "left_unit": self.left_unit,
52
54
  "right_unit": self.right_unit,
55
+ "replace_negative": self.replace_negative,
53
56
  }
54
57
  )
55
58
  return res
@@ -61,7 +64,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
61
64
  return self.__replace_negative(diff)
62
65
 
63
66
  def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
64
- x[x < 0] = None
67
+ if self.replace_negative:
68
+ x[x < 0] = None
65
69
  return x
66
70
 
67
71
 
@@ -101,13 +105,19 @@ _ext_aggregations = {"nunique": (lambda x: len(np.unique(x)), 0), "count": (len,
101
105
  class DateListDiff(PandasOperand, DateDiffMixin):
102
106
  is_binary = True
103
107
  has_symmetry_importance = True
108
+
104
109
  aggregation: str
110
+ replace_negative: bool = False
105
111
 
106
112
  def get_params(self) -> Dict[str, Optional[str]]:
107
113
  res = super().get_params()
108
114
  res.update(
109
115
  {
110
116
  "aggregation": self.aggregation,
117
+ "diff_unit": self.diff_unit,
118
+ "left_unit": self.left_unit,
119
+ "right_unit": self.right_unit,
120
+ "replace_negative": self.replace_negative,
111
121
  }
112
122
  )
113
123
  return res
@@ -125,7 +135,7 @@ class DateListDiff(PandasOperand, DateDiffMixin):
125
135
 
126
136
  def _diff(self, x: TimedeltaArray):
127
137
  x = self._convert_diff_to_unit(x)
128
- return x[x > 0]
138
+ return x[x > 0] if self.replace_negative else x
129
139
 
130
140
  def _agg(self, x):
131
141
  method = getattr(np, self.aggregation, None)
@@ -157,7 +167,10 @@ class DateListDiffBounded(DateListDiff):
157
167
  super().__init__(**data)
158
168
 
159
169
  def _agg(self, x):
160
- x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
170
+ x = x[
171
+ (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
172
+ & (x < (self.upper_bound if self.upper_bound is not None else np.inf))
173
+ ]
161
174
  return super()._agg(x)
162
175
 
163
176
 
@@ -138,15 +138,17 @@ class Feature:
138
138
  if self.cached_display_name is not None and cache:
139
139
  return self.cached_display_name
140
140
 
141
+ should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary else False
142
+ prev_name = [self.children[0].get_op_display_name()] if should_stack_op else []
143
+
141
144
  if self.alias:
142
145
  components = ["f_autofe", self.alias]
143
- elif shorten and not self.op.is_unary:
144
- components = ["f_autofe", self.get_op_display_name()]
146
+ elif shorten and (not self.op.is_unary or should_stack_op):
147
+ components = ["f_autofe"] + prev_name + [self.get_op_display_name()]
145
148
  else:
146
- components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
147
- "autofe",
148
- self.get_op_display_name(),
149
- ]
149
+ components = (
150
+ ["f_" + "_f_".join(self.get_columns(**kwargs))] + ["autofe"] + prev_name + [self.get_op_display_name()]
151
+ )
150
152
  components.extend([str(self.display_index)] if self.display_index is not None else [])
151
153
  display_name = "_".join(components)
152
154
 
@@ -237,12 +239,18 @@ class Feature:
237
239
 
238
240
  @staticmethod
239
241
  def from_formula(string: str) -> Union[Column, "Feature"]:
240
- if string[-1] != ")":
241
- return Column(string)
242
242
 
243
243
  def is_trivial_char(c: str) -> bool:
244
244
  return c not in "()+-*/,"
245
245
 
246
+ if string[-1] != ")":
247
+ if all(is_trivial_char(c) for c in string):
248
+ return Column(string)
249
+ else:
250
+ raise ValueError(
251
+ f"Unsupported column name: {string}. Column names should not have characters: ['(', ')', '+', '-', '*', '/', ',']"
252
+ )
253
+
246
254
  def find_prev(string: str) -> int:
247
255
  if string[-1] != ")":
248
256
  return max([(0 if is_trivial_char(c) else i + 1) for i, c in enumerate(string)])
@@ -264,8 +272,11 @@ class Feature:
264
272
  return Feature(find_op(string[: p2 - 1]), [Feature.from_formula(string[p2:-1])])
265
273
  p1 = find_prev(string[: p2 - 1])
266
274
  if string[0] == "(":
275
+ op = find_op(string[p2 - 1])
276
+ if op is None:
277
+ raise ValueError(f"Unsupported operand: {string[p2 - 1]}")
267
278
  return Feature(
268
- find_op(string[p2 - 1]),
279
+ op,
269
280
  [Feature.from_formula(string[p1 : p2 - 1]), Feature.from_formula(string[p2:-1])],
270
281
  )
271
282
  else:
@@ -276,6 +287,8 @@ class Feature:
276
287
  [Feature.from_formula(string[p1 : p2 - 1]), Feature.from_formula(string[p2:-1])],
277
288
  )
278
289
  else:
290
+ if string[p1 - 1] == "(":
291
+ raise ValueError(f"Unsupported operand: {string[: p1 - 1]}")
279
292
  base_features = [
280
293
  Feature.from_formula(string[p2:-1]),
281
294
  Feature.from_formula(string[p1 : p2 - 1]),
@@ -321,10 +334,10 @@ class FeatureGroup:
321
334
  lower_order_names = [ch.get_display_name() for ch in lower_order_children]
322
335
  if any(isinstance(f, Feature) for f in lower_order_children):
323
336
  child_data = pd.concat(
324
- [data[main_column]] + [ch.calculate(data) for ch in lower_order_children],
337
+ [data[main_column or []]] + [ch.calculate(data) for ch in lower_order_children],
325
338
  axis=1,
326
339
  )
327
- child_data.columns = [main_column] + lower_order_names
340
+ child_data.columns = ([main_column] if main_column is not None else []) + lower_order_names
328
341
  else:
329
342
  child_data = data[columns]
330
343
 
@@ -125,3 +125,10 @@ class Norm(PandasOperand):
125
125
  normalized_data = pd.Series(normalized_data[:, 0], index=data_dropna.index, name=data.name)
126
126
  normalized_data = normalized_data.reindex(data.index)
127
127
  return normalized_data
128
+
129
+
130
+ class Embeddings(PandasOperand):
131
+ name = "emb"
132
+ is_unary = True
133
+ input_type = "string"
134
+ output_type = "vector"
@@ -3,7 +3,7 @@ import time
3
3
  import uuid
4
4
  from datetime import datetime
5
5
  from enum import Enum
6
- from typing import Dict, List, Literal, Optional, Union
6
+ from typing import Dict, List, Optional, Union
7
7
 
8
8
  from upgini.errors import HttpError, ValidationError
9
9
  from upgini.http import LoggerFactory, get_rest_client
@@ -47,9 +47,7 @@ class DataSourcePublisher:
47
47
  self,
48
48
  data_table_uri: str,
49
49
  search_keys: Dict[str, SearchKey],
50
- update_frequency: (
51
- Literal["Daily"] | Literal["Weekly"] | Literal["Monthly"] | Literal["Quarterly"] | Literal["Annually"]
52
- ),
50
+ update_frequency: str,
53
51
  exclude_from_autofe_generation: Optional[List[str]],
54
52
  secondary_search_keys: Optional[Dict[str, SearchKey]] = None,
55
53
  sort_column: Optional[str] = None,
@@ -235,17 +233,11 @@ class DataSourcePublisher:
235
233
  self.logger.exception("Failed to register data table")
236
234
  raise
237
235
 
238
- def remove(self, data_table_ids: List[str] | str):
236
+ def remove(self, data_table_ids: List[str]):
239
237
  trace_id = str(uuid.uuid4())
240
238
  with MDC(trace_id=trace_id):
241
239
  try:
242
- if not data_table_ids:
243
- raise ValidationError("Empty data table ids")
244
- if isinstance(data_table_ids, str):
245
- data_table_ids = [data_table_ids]
246
- if not isinstance(data_table_ids, list):
247
- raise ValidationError("Invalid format of data_table_ids argument")
248
- if len(data_table_ids) == 0:
240
+ if data_table_ids is None or len(data_table_ids) == 0:
249
241
  raise ValidationError("Empty data table ids")
250
242
 
251
243
  for data_table_id in data_table_ids:
@@ -274,20 +266,16 @@ class DataSourcePublisher:
274
266
  source_link: Optional[str] = None,
275
267
  update_frequency: Optional[str] = None,
276
268
  client_emails: Optional[List[str]] = None,
277
- date_features: Optional[List[str]] = None,
278
- date_vector_features: Optional[List[str]] = None,
279
269
  ):
280
270
  trace_id = str(uuid.uuid4())
281
271
  with MDC(trace_id=trace_id):
282
272
  try:
283
- if data_table_ids is None:
273
+ if data_table_ids is None or len(data_table_ids) == 0:
284
274
  raise ValidationError("Empty data table ids")
285
275
  if isinstance(data_table_ids, str):
286
276
  data_table_ids = [data_table_ids]
287
277
  if not isinstance(data_table_ids, list):
288
278
  raise ValidationError("data_table_ids should be string or list of strings")
289
- if len(data_table_ids) == 0:
290
- raise ValidationError("Empty data table ids")
291
279
  if update_frequency is not None and update_frequency not in self.ACCEPTABLE_UPDATE_FREQUENCIES:
292
280
  raise ValidationError(
293
281
  f"Invalid update frequency: {update_frequency}. "
@@ -323,10 +311,6 @@ class DataSourcePublisher:
323
311
  request["updateFrequency"] = update_frequency
324
312
  if client_emails is not None:
325
313
  request["clientEmails"] = client_emails
326
- if date_features is not None:
327
- request["dateFeatures"] = date_features
328
- if date_vector_features is not None:
329
- request["dateVectorFeatures"] = date_vector_features
330
314
  self.logger.info(f"Activating data tables with request {request}")
331
315
 
332
316
  self._rest_client.activate_datatables(request, trace_id)
@@ -1 +0,0 @@
1
- __version__ = "1.1.309"
File without changes
File without changes
File without changes