upgini 1.2.99a3922.dev5__tar.gz → 1.2.103__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/PKG-INFO +1 -1
  2. upgini-1.2.103/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/feature.py +1 -0
  4. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/operator.py +3 -0
  5. upgini-1.2.103/src/upgini/autofe/vector.py +75 -0
  6. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/data_source/data_source_publisher.py +4 -0
  7. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/features_enricher.py +8 -1
  8. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/strings.properties +1 -0
  9. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/display_utils.py +0 -4
  10. upgini-1.2.99a3922.dev5/src/upgini/__about__.py +0 -1
  11. upgini-1.2.99a3922.dev5/src/upgini/autofe/vector.py +0 -45
  12. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/.gitignore +0 -0
  13. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/LICENSE +0 -0
  14. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/README.md +0 -0
  15. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/pyproject.toml +0 -0
  16. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/__init__.py +0 -0
  17. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/ads.py +0 -0
  18. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/ads_management/__init__.py +0 -0
  19. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/ads_management/ads_manager.py +0 -0
  20. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/__init__.py +0 -0
  21. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/all_operators.py +0 -0
  22. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/binary.py +0 -0
  23. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/date.py +0 -0
  24. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/groupby.py +0 -0
  25. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/__init__.py +0 -0
  26. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/base.py +0 -0
  27. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/cross.py +0 -0
  28. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/delta.py +0 -0
  29. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/lag.py +0 -0
  30. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/roll.py +0 -0
  31. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/trend.py +0 -0
  32. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/volatility.py +0 -0
  33. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/unary.py +0 -0
  34. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/utils.py +0 -0
  35. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/data_source/__init__.py +0 -0
  36. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/dataset.py +0 -0
  37. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/errors.py +0 -0
  38. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/http.py +0 -0
  39. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/mdc/__init__.py +0 -0
  40. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/mdc/context.py +0 -0
  41. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/metadata.py +0 -0
  42. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/metrics.py +0 -0
  43. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/normalizer/__init__.py +0 -0
  44. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/normalizer/normalize_utils.py +0 -0
  45. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/__init__.py +0 -0
  46. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/exceptions.py +0 -0
  47. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  48. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/__init__.py +0 -0
  49. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/base.py +0 -0
  50. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/random_under_sampler.py +0 -0
  51. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/utils.py +0 -0
  52. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/search_task.py +0 -0
  53. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/spinner.py +0 -0
  54. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  55. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/__init__.py +0 -0
  56. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/base_search_key_detector.py +0 -0
  57. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/blocked_time_series.py +0 -0
  58. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/country_utils.py +0 -0
  59. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/custom_loss_utils.py +0 -0
  60. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/cv_utils.py +0 -0
  61. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/datetime_utils.py +0 -0
  62. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/deduplicate_utils.py +0 -0
  63. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/feature_info.py +0 -0
  66. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/ip_utils.py +0 -0
  69. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/mstats.py +0 -0
  70. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/phone_utils.py +0 -0
  71. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/postal_code_utils.py +0 -0
  72. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/progress_bar.py +0 -0
  73. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/sample_utils.py +0 -0
  74. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/sklearn_ext.py +0 -0
  75. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/sort.py +0 -0
  76. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/target_utils.py +0 -0
  77. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/track_info.py +0 -0
  78. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/ts_utils.py +0 -0
  79. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/warning_counter.py +0 -0
  80. {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.99a3922.dev5
3
+ Version: 1.2.103
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.103"
@@ -164,6 +164,7 @@ class Feature:
164
164
 
165
165
  def delete_data(self):
166
166
  self.data = None
167
+ self.op.delete_data()
167
168
  for child in self.children:
168
169
  child.delete_data()
169
170
 
@@ -86,6 +86,9 @@ class Operator(BaseModel, metaclass=OperatorRegistry):
86
86
  self._logger = logger
87
87
  return self
88
88
 
89
+ def delete_data(self):
90
+ pass
91
+
89
92
 
90
93
  class ParametrizedOperator(Operator, abc.ABC):
91
94
 
@@ -0,0 +1,75 @@
1
+ from typing import List, Optional
2
+
3
+ import pandas as pd
4
+
5
+ from upgini.autofe.operator import OperatorRegistry, PandasOperator, ParametrizedOperator, VectorizableMixin
6
+
7
+
8
+ class Mean(PandasOperator, VectorizableMixin):
9
+ name: str = "mean"
10
+ output_type: Optional[str] = "float"
11
+ is_vector: bool = True
12
+ group_index: int = 0
13
+
14
+ def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
15
+ return pd.DataFrame(data).T.fillna(0).mean(axis=1)
16
+
17
+
18
+ class Sum(PandasOperator, VectorizableMixin):
19
+ name: str = "sum"
20
+ is_vector: bool = True
21
+ group_index: int = 0
22
+
23
+ def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
24
+ return pd.DataFrame(data).T.fillna(0).sum(axis=1)
25
+
26
+
27
+ class Vectorize(PandasOperator, VectorizableMixin):
28
+ name: str = "vectorize"
29
+ is_vector: bool = True
30
+ group_index: int = 0
31
+
32
+ def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
33
+ return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
34
+
35
+
36
+ class OnnxModel(PandasOperator, ParametrizedOperator, metaclass=OperatorRegistry):
37
+ name: str = "onnx"
38
+ score_name: str = "score"
39
+ is_vector: bool = True
40
+ output_type: Optional[str] = "float"
41
+
42
+ def to_formula(self) -> str:
43
+ return f"onnx_{self.score_name}"
44
+
45
+ @classmethod
46
+ def from_formula(cls, formula: str) -> Optional["OnnxModel"]:
47
+ if "(" in formula:
48
+ return None
49
+ if formula.startswith("onnx_"):
50
+ score_name = formula[len("onnx_"):]
51
+ return cls(score_name=score_name)
52
+ elif formula == "onnx": # for OperatorRegistry
53
+ return cls()
54
+ return None
55
+
56
+
57
+ class CatboostModel(PandasOperator, ParametrizedOperator, metaclass=OperatorRegistry):
58
+ name: str = "catboost"
59
+ score_name: str = "score"
60
+ is_vector: bool = True
61
+ output_type: Optional[str] = "float"
62
+
63
+ def to_formula(self) -> str:
64
+ return f"catboost_{self.score_name}"
65
+
66
+ @classmethod
67
+ def from_formula(cls, formula: str) -> Optional["CatboostModel"]:
68
+ if "(" in formula:
69
+ return None
70
+ if formula.startswith("catboost_"):
71
+ score_name = formula[len("catboost_"):]
72
+ return cls(score_name=score_name)
73
+ elif formula == "catboost": # for OperatorRegistry
74
+ return cls()
75
+ return None
@@ -500,6 +500,8 @@ class DataSourcePublisher:
500
500
  name: str,
501
501
  input_names: List[str],
502
502
  search_id: str,
503
+ date_column: Optional[str] = None,
504
+ score_name: Optional[str] = None,
503
505
  model_type: Optional[Literal["ONNX", "CATBOOST"]] = None,
504
506
  description: str = "",
505
507
  ):
@@ -508,6 +510,8 @@ class DataSourcePublisher:
508
510
  metadata = {
509
511
  "modelName": name,
510
512
  "inputNames": input_names,
513
+ "dateColumn": date_column,
514
+ "scoreName": score_name,
511
515
  "searchTaskId": search_id,
512
516
  "modelType": model_type or "ONNX",
513
517
  "description": description,
@@ -3708,6 +3708,11 @@ if response.status_code == 200:
3708
3708
 
3709
3709
  @staticmethod
3710
3710
  def _get_group_columns(df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> List[str]:
3711
+ search_key_priority = [SearchKey.PHONE, SearchKey.EMAIL, SearchKey.HEM, SearchKey.IP]
3712
+ for key_type in search_key_priority:
3713
+ if key_type in search_keys.values():
3714
+ return [col for col, t in search_keys.items() if t == key_type]
3715
+
3711
3716
  return [
3712
3717
  col
3713
3718
  for col, t in search_keys.items()
@@ -4481,7 +4486,9 @@ if response.status_code == 200:
4481
4486
  sample = df.head(100)
4482
4487
 
4483
4488
  def check_need_detect(search_key: SearchKey):
4484
- return not is_transform or search_key in self.fit_search_keys.values()
4489
+ return not is_transform or (
4490
+ search_key in self.fit_search_keys.values() and search_key not in search_keys.values()
4491
+ )
4485
4492
 
4486
4493
  # if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
4487
4494
  if check_need_detect(SearchKey.POSTAL_CODE):
@@ -99,6 +99,7 @@ unsupported_date_type=Unsupported type of date column `{}`. Convert to datetime
99
99
  invalid_postal_code=All values of POSTAL_CODE column `{}` are invalid
100
100
  invalid_country=All values of COUNTRY column `{}` are invalid
101
101
  invalid_ip=All values of IP column `{}` are invalid
102
+ transform_search_keys_not_match_fit_keys=Search keys used in fit call {} are different from the search keys used in transform call. Please use the same search keys as in fit call.
102
103
  # X and y validation
103
104
  unsupported_x_type=Unsupported type of X: {}. Use pandas.DataFrame, pandas.Series or numpy.ndarray or list
104
105
  x_contains_dup_columns=X contains duplicate column names. Please rename or drop duplicates
@@ -152,10 +152,6 @@ def make_html_report(
152
152
  email: Optional[str] = None,
153
153
  search_keys: Optional[List[str]] = None,
154
154
  ) -> str:
155
- # relevant_features_df = relevant_features_df.copy()
156
- # relevant_features_df["Feature name"] = relevant_features_df["Feature name"].apply(
157
- # lambda x: "*" + x if x.contains("_autofe_") else x
158
- # )
159
155
  relevant_datasources_df = relevant_datasources_df.copy()
160
156
  relevant_datasources_df["action"] = (
161
157
  f"""<a href="https://upgini.com/request-a-quote?search-id={search_id}">"""
@@ -1 +0,0 @@
1
- __version__ = "1.2.99a3922.dev5"
@@ -1,45 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import pandas as pd
4
-
5
- from upgini.autofe.operator import OperatorRegistry, PandasOperator, VectorizableMixin
6
-
7
-
8
- class Mean(PandasOperator, VectorizableMixin):
9
- name: str = "mean"
10
- output_type: Optional[str] = "float"
11
- is_vector: bool = True
12
- group_index: int = 0
13
-
14
- def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
15
- return pd.DataFrame(data).T.fillna(0).mean(axis=1)
16
-
17
-
18
- class Sum(PandasOperator, VectorizableMixin):
19
- name: str = "sum"
20
- is_vector: bool = True
21
- group_index: int = 0
22
-
23
- def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
24
- return pd.DataFrame(data).T.fillna(0).sum(axis=1)
25
-
26
-
27
- class Vectorize(PandasOperator, VectorizableMixin):
28
- name: str = "vectorize"
29
- is_vector: bool = True
30
- group_index: int = 0
31
-
32
- def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
33
- return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
34
-
35
-
36
- class OnnxModel(PandasOperator, metaclass=OperatorRegistry):
37
- name: str = "onnx"
38
- is_vector: bool = True
39
- output_type: Optional[str] = "float"
40
-
41
-
42
- class CatboostModel(PandasOperator, metaclass=OperatorRegistry):
43
- name: str = "catboost"
44
- is_vector: bool = True
45
- output_type: Optional[str] = "float"
File without changes
File without changes
File without changes