upgini 1.1.296a3521.dev8__tar.gz → 1.1.296a3521.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (65) hide show
  1. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/PKG-INFO +1 -1
  2. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/pyproject.toml +1 -0
  3. upgini-1.1.296a3521.dev10/src/upgini/__about__.py +1 -0
  4. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/all_operands.py +2 -2
  5. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/date.py +4 -2
  6. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/feature.py +1 -5
  7. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/data_source/data_source_publisher.py +37 -0
  8. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/features_enricher.py +1 -1
  9. upgini-1.1.296a3521.dev8/src/upgini/__about__.py +0 -1
  10. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/.gitignore +0 -0
  11. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/LICENSE +0 -0
  12. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/README.md +0 -0
  13. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/__init__.py +0 -0
  14. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/ads.py +0 -0
  15. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/ads_management/__init__.py +0 -0
  16. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/ads_management/ads_manager.py +0 -0
  17. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/__init__.py +0 -0
  18. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/binary.py +0 -0
  19. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/unary.py +0 -0
  22. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/autofe/vector.py +0 -0
  23. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/data_source/__init__.py +0 -0
  24. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/dataset.py +0 -0
  25. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/errors.py +0 -0
  26. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/http.py +0 -0
  27. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/metadata.py +0 -0
  31. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/metrics.py +0 -0
  32. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/normalizer/phone_normalizer.py +0 -0
  34. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/search_task.py +0 -0
  43. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/spinner.py +0 -0
  44. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/datetime_utils.py +0 -0
  51. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/deduplicate_utils.py +0 -0
  52. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/display_utils.py +0 -0
  53. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/email_utils.py +0 -0
  54. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/fallback_progress_bar.py +0 -0
  55. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/features_validator.py +0 -0
  56. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/format.py +0 -0
  57. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/ip_utils.py +0 -0
  58. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/phone_utils.py +0 -0
  59. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/sklearn_ext.py +0 -0
  62. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/target_utils.py +0 -0
  63. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.1.296a3521.dev8 → upgini-1.1.296a3521.dev10}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.296a3521.dev8
3
+ Version: 1.1.296a3521.dev10
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -47,6 +47,7 @@ dependencies = [
47
47
  "python-json-logger>=2.0.2",
48
48
  "requests>=2.8.0",
49
49
  "scikit-learn>=1.3.0",
50
+ "python-bidi==0.4.2",
50
51
  "xhtml2pdf==0.2.11",
51
52
  "python-bidi==0.4.2",
52
53
  ]
@@ -0,0 +1 @@
1
+ __version__ = "1.1.296a3521.dev10"
@@ -7,7 +7,7 @@ from upgini.autofe.date import (
7
7
  DateListDiff,
8
8
  DateListDiffBounded,
9
9
  DatePercentile,
10
- DatePercentileType2,
10
+ DatePercentileMethod2,
11
11
  )
12
12
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
13
13
  from upgini.autofe.operand import Operand
@@ -57,7 +57,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
57
57
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
58
58
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
59
59
  DatePercentile(),
60
- DatePercentileType2(),
60
+ DatePercentileMethod2(),
61
61
  Norm(),
62
62
  ]
63
63
  }
@@ -39,6 +39,7 @@ class DateDiffMixin(BaseModel):
39
39
 
40
40
  class DateDiff(PandasOperand, DateDiffMixin):
41
41
  name = "date_diff"
42
+ alias = "date_diff_type1"
42
43
  is_binary = True
43
44
  has_symmetry_importance = True
44
45
 
@@ -197,6 +198,7 @@ class DatePercentileBase(PandasOperand, abc.ABC):
197
198
 
198
199
  class DatePercentile(DatePercentileBase):
199
200
  name = "date_per"
201
+ alias = "date_per_method1"
200
202
 
201
203
  zero_month: Optional[int]
202
204
  zero_year: Optional[int]
@@ -232,8 +234,8 @@ class DatePercentile(DatePercentileBase):
232
234
  )
233
235
 
234
236
 
235
- class DatePercentileType2(DatePercentileBase):
236
- name = "date_per_type2"
237
+ class DatePercentileMethod2(DatePercentileBase):
238
+ name = "date_per_method2"
237
239
 
238
240
  def _get_bounds(self, date_col: pd.Series) -> pd.Series:
239
241
  pass
@@ -134,9 +134,6 @@ class Feature:
134
134
  for child in self.children:
135
135
  child.delete_data()
136
136
 
137
- def get_name_component(self, **kwargs) -> str:
138
- return "_".join(ch.get_name_component(**kwargs) for ch in self.children) + "_" + self.get_op_display_name()
139
-
140
137
  def get_op_display_name(self) -> str:
141
138
  return self.op.alias or self.op.name.lower()
142
139
 
@@ -149,8 +146,7 @@ class Feature:
149
146
  elif shorten and not self.op.is_unary:
150
147
  components = ["f_autofe", self.get_op_display_name()]
151
148
  else:
152
- child_components = [ch.get_name_component(**kwargs) for ch in self.children]
153
- components = ["f_" + "_f_".join(child_components)] + [
149
+ components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
154
150
  "autofe",
155
151
  self.get_op_display_name(),
156
152
  ]
@@ -59,9 +59,35 @@ class DataSourcePublisher:
59
59
  features_for_embeddings: Optional[List[str]] = DEFAULT_GENERATE_EMBEDDINGS,
60
60
  data_table_id_to_replace: Optional[str] = None,
61
61
  keep_features: Optional[List[str]] = None,
62
+ date_features: Optional[List[str]] = None,
63
+ date_vector_features: Optional[List[str]] = None,
62
64
  _force_generation=False,
63
65
  _silent=False,
64
66
  ) -> str:
67
+ """Register new ADS
68
+
69
+ Parameters
70
+ ----------
71
+ data_table_uri - str - table name in format {project_id}.{datasource_name}.{table_name}
72
+
73
+ search_keys - dict with column names as keys and SearchKey as value
74
+
75
+ update_frequency - str - (Monthly, Weekly, Daily, Annually, Quarterly)
76
+
77
+ exclude_from_autofe_generation - optional list of features that should be excluded from AutoFE
78
+
79
+ secondary_search_keys - optional dict of secondary search keys
80
+
81
+ sort_column - optional str - name of unique column that could be used for sort
82
+
83
+ date_format - optional str - format of date if it is present in search keys
84
+
85
+ ...
86
+
87
+ data_table_id_to_replace - optional str - id of registered ADS that should be replaced by new table
88
+
89
+ keep_features - optional list - features that should not be removed from ADS (even if they are personal)
90
+ """
65
91
  trace_id = str(uuid.uuid4())
66
92
 
67
93
  with MDC(trace_id=trace_id):
@@ -124,6 +150,14 @@ class DataSourcePublisher:
124
150
  request["excludeFromGeneration"] = exclude_from_autofe_generation
125
151
  if keep_features is not None:
126
152
  request["keepFeatures"] = keep_features
153
+ if date_features is not None:
154
+ if date_format is None:
155
+ raise ValidationError("date_format should be presented if you use date features")
156
+ request["dateFeatures"] = date_features
157
+ if date_vector_features is not None:
158
+ if date_format is None:
159
+ raise ValidationError("date_format should be presented if you use date vector features")
160
+ request["dateVectorFeatures"] = date_vector_features
127
161
  self.logger.info(f"Start registering data table {request}")
128
162
 
129
163
  task_id = self._rest_client.register_ads(request, trace_id)
@@ -181,6 +215,9 @@ class DataSourcePublisher:
181
215
  msg = f"Data table successfully registered with id: {data_table_id}"
182
216
  self.logger.info(msg)
183
217
  print(msg)
218
+ if "warnings" in status_response and status_response["warnings"]:
219
+ self.logger.warning(status_response["warnings"])
220
+ print(status_response["warnings"])
184
221
  return data_table_id
185
222
  except KeyboardInterrupt:
186
223
  if task_id is not None:
@@ -2870,7 +2870,7 @@ class FeaturesEnricher(TransformerMixin):
2870
2870
  self.logger.info(f"Dates interval is ({min_date}, {max_date})")
2871
2871
 
2872
2872
  except Exception:
2873
- self.logger.exception("Failed to log debug information")
2873
+ self.logger.warning("Failed to log debug information", exc_info=True)
2874
2874
 
2875
2875
  def __handle_index_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
2876
2876
  index_names = df.index.names if df.index.names != [None] else [DEFAULT_INDEX]
@@ -1 +0,0 @@
1
- __version__ = "1.1.296a3521.dev8"