upgini 1.1.296a3511.dev2__tar.gz → 1.1.296a3511.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (65) hide show
  1. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/PKG-INFO +1 -1
  2. upgini-1.1.296a3511.dev4/src/upgini/__about__.py +1 -0
  3. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/all_operands.py +2 -1
  4. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/binary.py +11 -1
  5. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/unary.py +7 -0
  6. upgini-1.1.296a3511.dev2/src/upgini/__about__.py +0 -1
  7. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/.gitignore +0 -0
  8. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/LICENSE +0 -0
  9. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/README.md +0 -0
  10. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/pyproject.toml +0 -0
  11. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/__init__.py +0 -0
  12. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/ads.py +0 -0
  13. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/operand.py +0 -0
  20. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/autofe/vector.py +0 -0
  21. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/data_source/__init__.py +0 -0
  22. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/data_source/data_source_publisher.py +0 -0
  23. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/dataset.py +0 -0
  24. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/errors.py +0 -0
  25. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/features_enricher.py +0 -0
  26. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/http.py +0 -0
  27. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/metadata.py +0 -0
  31. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/metrics.py +0 -0
  32. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/normalizer/phone_normalizer.py +0 -0
  34. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/search_task.py +0 -0
  43. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/spinner.py +0 -0
  44. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/datetime_utils.py +0 -0
  51. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/deduplicate_utils.py +0 -0
  52. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/display_utils.py +0 -0
  53. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/email_utils.py +0 -0
  54. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/fallback_progress_bar.py +0 -0
  55. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/features_validator.py +0 -0
  56. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/format.py +0 -0
  57. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/ip_utils.py +0 -0
  58. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/phone_utils.py +0 -0
  59. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/sklearn_ext.py +0 -0
  62. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/target_utils.py +0 -0
  63. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.1.296a3511.dev2 → upgini-1.1.296a3511.dev4}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.296a3511.dev2
3
+ Version: 1.1.296a3511.dev4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.296a3511.dev4"
@@ -18,7 +18,7 @@ from upgini.autofe.binary import (
18
18
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
19
19
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenFreq, GroupByThenNUnique, GroupByThenRank
20
20
  from upgini.autofe.operand import Operand
21
- from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
21
+ from upgini.autofe.unary import Abs, Embeddings, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
22
22
  from upgini.autofe.vector import Mean, Sum
23
23
 
24
24
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -69,6 +69,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
69
69
  JaroWinklerSim2(),
70
70
  LevenshteinSim(),
71
71
  Distance(),
72
+ Embeddings(),
72
73
  ]
73
74
  }
74
75
 
@@ -142,9 +142,16 @@ class Distance(PandasOperand):
142
142
  has_symmetry_importance = True
143
143
 
144
144
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
145
- return dot(left, right) / (norm(left) * norm(right))
145
+ return pd.Series(
146
+ 1 - self.__dot(left, right) / (self.__dot(left, left) * self.__dot(right, right)), index=left.index
147
+ )
146
148
 
149
+ # row-wise dot product
150
+ def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
151
+ return (left * right).apply(np.sum)
147
152
 
153
+
154
+ # Left for backward compatibility
148
155
  class Sim(Distance):
149
156
  name = "sim"
150
157
  is_binary = True
@@ -152,6 +159,9 @@ class Sim(Distance):
152
159
  is_symmetrical = True
153
160
  has_symmetry_importance = True
154
161
 
162
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
163
+ return 1 - super().calculate_binary(left, right)
164
+
155
165
 
156
166
  class StringSim(PandasOperand, abc.ABC):
157
167
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
@@ -125,3 +125,10 @@ class Norm(PandasOperand):
125
125
  normalized_data = pd.Series(normalized_data[:, 0], index=data_dropna.index, name=data.name)
126
126
  normalized_data = normalized_data.reindex(data.index)
127
127
  return normalized_data
128
+
129
+
130
+ class Embeddings(PandasOperand):
131
+ name = "emb"
132
+ is_unary = True
133
+ input_type = "string"
134
+ output_type = "vector"
@@ -1 +0,0 @@
1
- __version__ = "1.1.296a3511.dev2"