upgini 1.1.278a2__py3-none-any.whl → 1.1.279__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. upgini/__about__.py +1 -0
  2. upgini/ads_management/ads_manager.py +4 -2
  3. upgini/autofe/all_operands.py +3 -2
  4. upgini/autofe/binary.py +2 -1
  5. upgini/autofe/date.py +2 -1
  6. upgini/autofe/feature.py +1 -1
  7. upgini/autofe/groupby.py +3 -1
  8. upgini/autofe/operand.py +4 -3
  9. upgini/autofe/unary.py +2 -1
  10. upgini/autofe/vector.py +2 -0
  11. upgini/dataset.py +6 -15
  12. upgini/errors.py +1 -1
  13. upgini/features_enricher.py +104 -217
  14. upgini/http.py +11 -10
  15. upgini/mdc/__init__.py +1 -3
  16. upgini/mdc/context.py +4 -6
  17. upgini/metadata.py +5 -10
  18. upgini/metrics.py +102 -100
  19. upgini/normalizer/phone_normalizer.py +1 -1
  20. upgini/resource_bundle/__init__.py +5 -5
  21. upgini/resource_bundle/strings.properties +0 -1
  22. upgini/sampler/base.py +1 -4
  23. upgini/sampler/random_under_sampler.py +2 -5
  24. upgini/search_task.py +4 -4
  25. upgini/spinner.py +1 -1
  26. upgini/utils/__init__.py +1 -1
  27. upgini/utils/base_search_key_detector.py +14 -16
  28. upgini/utils/blocked_time_series.py +4 -2
  29. upgini/utils/country_utils.py +1 -1
  30. upgini/utils/custom_loss_utils.py +3 -2
  31. upgini/utils/cv_utils.py +2 -2
  32. upgini/utils/datetime_utils.py +20 -15
  33. upgini/utils/deduplicate_utils.py +1 -11
  34. upgini/utils/email_utils.py +2 -7
  35. upgini/utils/fallback_progress_bar.py +1 -1
  36. upgini/utils/progress_bar.py +1 -1
  37. upgini/utils/sklearn_ext.py +14 -13
  38. upgini/utils/track_info.py +2 -2
  39. upgini/version_validator.py +2 -2
  40. {upgini-1.1.278a2.dist-info → upgini-1.1.279.dist-info}/METADATA +21 -23
  41. upgini-1.1.279.dist-info/RECORD +62 -0
  42. {upgini-1.1.278a2.dist-info → upgini-1.1.279.dist-info}/WHEEL +1 -2
  43. upgini-1.1.278a2.dist-info/RECORD +0 -62
  44. upgini-1.1.278a2.dist-info/top_level.txt +0 -1
  45. {upgini-1.1.278a2.dist-info → upgini-1.1.279.dist-info/licenses}/LICENSE +0 -0
upgini/sampler/base.py CHANGED
@@ -9,13 +9,11 @@ from abc import ABCMeta, abstractmethod
9
9
  from typing import List, Optional
10
10
 
11
11
  import numpy as np
12
-
13
12
  from sklearn.base import BaseEstimator
14
13
  from sklearn.preprocessing import label_binarize
15
14
  from sklearn.utils.multiclass import check_classification_targets
16
15
 
17
- from .utils import check_sampling_strategy, check_target_type
18
- from .utils import ArraysTransformer
16
+ from .utils import ArraysTransformer, check_sampling_strategy, check_target_type
19
17
 
20
18
 
21
19
  class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
@@ -107,7 +105,6 @@ class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
107
105
  The corresponding label of `X_resampled`.
108
106
 
109
107
  """
110
- pass
111
108
 
112
109
  @abstractmethod
113
110
  def _check_X_y(self, X, y, accept_sparse: Optional[List[str]] = None):
@@ -5,13 +5,10 @@
5
5
  # License: MIT
6
6
 
7
7
  import numpy as np
8
-
9
- from sklearn.utils import check_random_state
10
- from sklearn.utils import _safe_indexing
8
+ from sklearn.utils import _safe_indexing, check_random_state
11
9
 
12
10
  from .base import BaseUnderSampler
13
- from .utils import check_target_type
14
- from .utils import _deprecate_positional_args
11
+ from .utils import _deprecate_positional_args, check_target_type
15
12
 
16
13
 
17
14
  class RandomUnderSampler(BaseUnderSampler):
upgini/search_task.py CHANGED
@@ -8,10 +8,10 @@ import pandas as pd
8
8
 
9
9
  from upgini import dataset
10
10
  from upgini.http import (
11
- _RestClient,
12
11
  ProviderTaskSummary,
13
12
  SearchProgress,
14
13
  SearchTaskSummary,
14
+ _RestClient,
15
15
  get_rest_client,
16
16
  is_demo_api_key,
17
17
  )
@@ -295,7 +295,7 @@ class SearchTask:
295
295
  return self.rest_client.get_search_file_metadata(self.search_task_id, trace_id)
296
296
 
297
297
 
298
- @lru_cache()
298
+ @lru_cache
299
299
  def _get_all_initial_raw_features_cached(
300
300
  endpoint: Optional[str],
301
301
  api_key: Optional[str],
@@ -328,7 +328,7 @@ def _get_all_initial_raw_features_cached(
328
328
  return result_df
329
329
 
330
330
 
331
- @lru_cache()
331
+ @lru_cache
332
332
  def _get_all_validation_raw_features_cached(
333
333
  endpoint: Optional[str],
334
334
  api_key: Optional[str],
@@ -357,7 +357,7 @@ def _get_all_validation_raw_features_cached(
357
357
  return result_df
358
358
 
359
359
 
360
- @lru_cache()
360
+ @lru_cache
361
361
  def _get_target_outliers_cached(
362
362
  endpoint: Optional[str],
363
363
  api_key: Optional[str],
upgini/spinner.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import threading
2
- from typing import Optional, List
3
2
  import time
3
+ from typing import List, Optional
4
4
 
5
5
 
6
6
  class Spinner:
upgini/utils/__init__.py CHANGED
@@ -2,7 +2,7 @@ import itertools
2
2
  from typing import List, Tuple
3
3
 
4
4
  import pandas as pd
5
- from pandas.api.types import is_string_dtype, is_object_dtype
5
+ from pandas.api.types import is_object_dtype, is_string_dtype
6
6
 
7
7
 
8
8
  def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
@@ -1,27 +1,25 @@
1
- from typing import List
1
+ from typing import List, Optional
2
2
 
3
3
  import pandas as pd
4
4
 
5
5
 
6
6
  class BaseSearchKeyDetector:
7
7
  def _is_search_key_by_name(self, column_name: str) -> bool:
8
- raise NotImplementedError()
8
+ raise NotImplementedError
9
9
 
10
10
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
11
- raise NotImplementedError()
11
+ raise NotImplementedError
12
12
 
13
- def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
14
- return [
15
- column_name
16
- for column_name in column_names
17
- if self._is_search_key_by_name(column_name)
18
- ]
13
+ def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
14
+ for column_name in column_names:
15
+ if self._is_search_key_by_name(column_name):
16
+ return column_name
19
17
 
20
- def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
21
- other_columns = [col for col in df.columns if col not in existing_search_keys]
22
- columns_by_names = self._get_search_keys_by_name(other_columns)
23
- columns_by_values = []
24
- for column_name in other_columns:
18
+ def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
19
+ maybe_column = self._get_search_key_by_name(df.columns.to_list())
20
+ if maybe_column is not None:
21
+ return maybe_column
22
+
23
+ for column_name in df.columns:
25
24
  if self._is_search_key_by_values(df[column_name]):
26
- columns_by_values.append(column_name)
27
- return list(set(columns_by_names + columns_by_values))
25
+ return column_name
@@ -1,8 +1,10 @@
1
- import numpy as np
2
1
  import numbers
2
+
3
+ import numpy as np
4
+ from sklearn.model_selection import BaseCrossValidator
3
5
  from sklearn.utils import indexable
4
6
  from sklearn.utils.validation import _num_samples
5
- from sklearn.model_selection import BaseCrossValidator
7
+
6
8
  from upgini.resource_bundle import bundle
7
9
 
8
10
 
@@ -1,5 +1,5 @@
1
1
  import pandas as pd
2
- from pandas.api.types import is_string_dtype, is_object_dtype
2
+ from pandas.api.types import is_object_dtype, is_string_dtype
3
3
 
4
4
  from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
5
5
 
@@ -1,6 +1,7 @@
1
- from upgini.metadata import ModelTaskType, RuntimeParameters
2
- from typing import Optional, Dict, Any
3
1
  import logging
2
+ from typing import Any, Dict, Optional
3
+
4
+ from upgini.metadata import ModelTaskType, RuntimeParameters
4
5
  from upgini.resource_bundle import bundle
5
6
 
6
7
 
upgini/utils/cv_utils.py CHANGED
@@ -1,9 +1,9 @@
1
1
  from functools import reduce
2
2
  from typing import Any, Dict, List, Optional, Tuple, Union
3
- import numpy as np
4
3
 
4
+ import numpy as np
5
5
  import pandas as pd
6
- from sklearn.model_selection import BaseCrossValidator, KFold, TimeSeriesSplit, GroupKFold, GroupShuffleSplit
6
+ from sklearn.model_selection import BaseCrossValidator, GroupKFold, GroupShuffleSplit, KFold, TimeSeriesSplit
7
7
 
8
8
  from upgini.metadata import CVType
9
9
  from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
@@ -208,18 +208,17 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
208
208
  if nunique_dates / days_delta < 0.3:
209
209
  return False
210
210
 
211
- def check_differences(group):
212
- data = group.drop(date_col, axis=1)
213
- diffs = data.values[:, None] != data.values
214
- diff_counts = diffs.sum(axis=2)
215
- max_diff = np.max(diff_counts)
216
- return max_diff <= 2
217
-
218
- def is_multiple_rows(group):
211
+ accumulated_changing_columns = set()
212
+
213
+ def check_differences(group: pd.DataFrame):
214
+ changing_columns = group.columns[group.nunique(dropna=False) > 1].to_list()
215
+ accumulated_changing_columns.update(changing_columns)
216
+
217
+ def is_multiple_rows(group: pd.DataFrame) -> bool:
219
218
  return group.shape[0] > 1
220
219
 
221
- grouped = df.groupby(date_col)
222
- dates_with_multiple_rows = len(grouped.apply(is_multiple_rows))
220
+ grouped = df.groupby(date_col)[[c for c in df.columns if c != date_col]]
221
+ dates_with_multiple_rows = grouped.apply(is_multiple_rows).sum()
223
222
 
224
223
  # share of dates with more than one record is more than 99%
225
224
  if dates_with_multiple_rows / nunique_dates < 0.99:
@@ -228,8 +227,8 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
228
227
  if df.shape[1] <= 3:
229
228
  return True
230
229
 
231
- is_diff_less_than_two_columns = grouped.apply(check_differences)
232
- return is_diff_less_than_two_columns.all()
230
+ grouped.apply(check_differences)
231
+ return len(accumulated_changing_columns) <= 2
233
232
 
234
233
 
235
234
  def validate_dates_distribution(
@@ -249,8 +248,11 @@ def validate_dates_distribution(
249
248
  if col in search_keys:
250
249
  continue
251
250
  try:
252
- # Format mixed to avoid massive warnings
253
- pd.to_datetime(X[col], format="mixed")
251
+ if pd.__version__ >= "2.0.0":
252
+ # Format mixed to avoid massive warnings
253
+ pd.to_datetime(X[col], format="mixed")
254
+ else:
255
+ pd.to_datetime(X[col])
254
256
  maybe_date_col = col
255
257
  break
256
258
  except Exception:
@@ -259,7 +261,10 @@ def validate_dates_distribution(
259
261
  if maybe_date_col is None:
260
262
  return
261
263
 
262
- dates = pd.to_datetime(X[maybe_date_col]).dt.date
264
+ if pd.__version__ >= "2.0.0":
265
+ dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
266
+ else:
267
+ dates = pd.to_datetime(X[maybe_date_col]).dt.date
263
268
 
264
269
  date_counts = dates.value_counts().sort_index()
265
270
 
@@ -3,15 +3,7 @@ from typing import Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from upgini.metadata import (
7
- ENTITY_SYSTEM_RECORD_ID,
8
- EVAL_SET_INDEX,
9
- SORT_ID,
10
- SYSTEM_RECORD_ID,
11
- TARGET,
12
- ModelTaskType,
13
- SearchKey,
14
- )
6
+ from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
15
7
  from upgini.resource_bundle import ResourceBundle
16
8
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
17
9
  from upgini.utils.target_utils import define_task
@@ -151,8 +143,6 @@ def clean_full_duplicates(
151
143
  unique_columns = df.columns.tolist()
152
144
  if SYSTEM_RECORD_ID in unique_columns:
153
145
  unique_columns.remove(SYSTEM_RECORD_ID)
154
- if ENTITY_SYSTEM_RECORD_ID in unique_columns:
155
- unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
156
146
  if SORT_ID in unique_columns:
157
147
  unique_columns.remove(SORT_ID)
158
148
  if EVAL_SET_INDEX in unique_columns:
@@ -4,10 +4,10 @@ from hashlib import sha256
4
4
  from typing import Dict, List, Optional
5
5
 
6
6
  import pandas as pd
7
- from pandas.api.types import is_string_dtype, is_object_dtype
8
- from upgini.resource_bundle import bundle
7
+ from pandas.api.types import is_object_dtype, is_string_dtype
9
8
 
10
9
  from upgini.metadata import SearchKey
10
+ from upgini.resource_bundle import bundle
11
11
  from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
12
12
 
13
13
  EMAIL_REGEX = re.compile(r"^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$")
@@ -38,13 +38,11 @@ class EmailSearchKeyConverter:
38
38
  email_column: str,
39
39
  hem_column: Optional[str],
40
40
  search_keys: Dict[str, SearchKey],
41
- unnest_search_keys: Optional[List[str]] = None,
42
41
  logger: Optional[logging.Logger] = None,
43
42
  ):
44
43
  self.email_column = email_column
45
44
  self.hem_column = hem_column
46
45
  self.search_keys = search_keys
47
- self.unnest_search_keys = unnest_search_keys
48
46
  if logger is not None:
49
47
  self.logger = logger
50
48
  else:
@@ -82,12 +80,9 @@ class EmailSearchKeyConverter:
82
80
  del self.search_keys[self.email_column]
83
81
  return df
84
82
  self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
85
- self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
86
83
  self.email_converted_to_hem = True
87
84
 
88
85
  del self.search_keys[self.email_column]
89
- if self.email_column in self.unnest_search_keys:
90
- self.unnest_search_keys.remove(self.email_column)
91
86
 
92
87
  df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
93
88
 
@@ -22,7 +22,7 @@ class CustomFallbackProgressBar:
22
22
  fraction = self.progress / self.total
23
23
  filled = "=" * int(fraction * self.text_width)
24
24
  rest = " " * (self.text_width - len(filled))
25
- return "[{}{}] {}% {} {}".format(filled, rest, self.progress, self._stage, self._eta)
25
+ return f"[{filled}{rest}] {self.progress}% {self._stage} {self._eta}"
26
26
 
27
27
  def display(self):
28
28
  print(self)
@@ -28,7 +28,7 @@ class CustomProgressBar(DisplayObject):
28
28
  fraction = self.progress / self.total
29
29
  filled = "=" * int(fraction * self.text_width)
30
30
  rest = " " * (self.text_width - len(filled))
31
- return "[{}{}] {}% {}".format(filled, rest, self.progress, self._stage)
31
+ return f"[{filled}{rest}] {self.progress}% {self._stage}"
32
32
 
33
33
  def _repr_html_(self):
34
34
  return "<progress style='width:{}' max='{}' value='{}'></progress> {}% {}</br>{}".format(
@@ -20,6 +20,7 @@ from sklearn.metrics._scorer import _MultimetricScorer
20
20
  from sklearn.model_selection import check_cv
21
21
  from sklearn.utils.fixes import np_version, parse_version
22
22
  from sklearn.utils.validation import indexable
23
+
23
24
  # from sklearn.model_selection import cross_validate as original_cross_validate
24
25
 
25
26
  _DEFAULT_TAGS = {
@@ -46,7 +47,7 @@ _DEFAULT_TAGS = {
46
47
 
47
48
  def cross_validate(
48
49
  estimator,
49
- X,
50
+ x,
50
51
  y=None,
51
52
  *,
52
53
  groups=None,
@@ -69,7 +70,7 @@ def cross_validate(
69
70
  estimator : estimator object implementing 'fit'
70
71
  The object to use to fit the data.
71
72
 
72
- X : array-like of shape (n_samples, n_features)
73
+ x : array-like of shape (n_samples, n_features)
73
74
  The data to fit. Can be for example a list, or an array.
74
75
 
75
76
  y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
@@ -250,7 +251,7 @@ def cross_validate(
250
251
 
251
252
  """
252
253
  try:
253
- X, y, groups = indexable(X, y, groups)
254
+ x, y, groups = indexable(x, y, groups)
254
255
 
255
256
  cv = check_cv(cv, y, classifier=is_classifier(estimator))
256
257
 
@@ -267,7 +268,7 @@ def cross_validate(
267
268
  results = parallel(
268
269
  delayed(_fit_and_score)(
269
270
  clone(estimator),
270
- X,
271
+ x,
271
272
  y,
272
273
  scorers,
273
274
  train,
@@ -280,7 +281,7 @@ def cross_validate(
280
281
  return_estimator=return_estimator,
281
282
  error_score=error_score,
282
283
  )
283
- for train, test in cv.split(X, y, groups)
284
+ for train, test in cv.split(x, y, groups)
284
285
  )
285
286
 
286
287
  _warn_about_fit_failures(results, error_score)
@@ -487,7 +488,7 @@ def _fit_and_score(
487
488
  if y_train is None:
488
489
  estimator.fit(X_train, **fit_params)
489
490
  else:
490
- if isinstance(estimator, CatBoostClassifier) or isinstance(estimator, CatBoostRegressor):
491
+ if isinstance(estimator, (CatBoostClassifier, CatBoostRegressor)):
491
492
  fit_params = fit_params.copy()
492
493
  fit_params["eval_set"] = [(X_test, y_test)]
493
494
  estimator.fit(X_train, y_train, **fit_params)
@@ -582,9 +583,11 @@ def _aggregate_score_dicts(scores):
582
583
  """
583
584
 
584
585
  return {
585
- key: np.asarray([score[key] for score in scores])
586
- if isinstance(scores[0][key], numbers.Number)
587
- else [score[key] for score in scores]
586
+ key: (
587
+ np.asarray([score[key] for score in scores])
588
+ if isinstance(scores[0][key], numbers.Number)
589
+ else [score[key] for score in scores]
590
+ )
588
591
  for key in scores[0]
589
592
  }
590
593
 
@@ -969,9 +972,7 @@ def _safe_indexing(X, indices, *, axis=0):
969
972
  return X
970
973
 
971
974
  if axis not in (0, 1):
972
- raise ValueError(
973
- "'axis' should be either 0 (to index rows) or 1 (to index " " column). Got {} instead.".format(axis)
974
- )
975
+ raise ValueError("'axis' should be either 0 (to index rows) or 1 (to index " f" column). Got {axis} instead.")
975
976
 
976
977
  indices_dtype = _determine_key_type(indices)
977
978
 
@@ -982,7 +983,7 @@ def _safe_indexing(X, indices, *, axis=0):
982
983
  raise ValueError(
983
984
  "'X' should be a 2D NumPy array, 2D sparse matrix or pandas "
984
985
  "dataframe when indexing the columns (i.e. 'axis=1'). "
985
- "Got {} instead with {} dimension(s).".format(type(X), X.ndim)
986
+ f"Got {type(X)} instead with {X.ndim} dimension(s)."
986
987
  )
987
988
 
988
989
  if axis == 1 and indices_dtype == "str" and not hasattr(X, "loc"):
@@ -1,8 +1,8 @@
1
- from functools import lru_cache
2
1
  import os
3
2
  import re
4
3
  import socket
5
4
  import sys
5
+ from functools import lru_cache
6
6
  from getpass import getuser
7
7
  from hashlib import sha256
8
8
  from typing import Optional
@@ -51,7 +51,7 @@ def _get_execution_ide() -> str:
51
51
  return "other"
52
52
 
53
53
 
54
- @lru_cache()
54
+ @lru_cache
55
55
  def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> dict:
56
56
  # default values
57
57
  track = {"ide": _get_execution_ide()}
@@ -1,7 +1,7 @@
1
1
  import json
2
+ import threading
2
3
 
3
4
  import requests
4
- import threading
5
5
 
6
6
  try:
7
7
  from packaging.version import parse
@@ -10,7 +10,7 @@ except ImportError:
10
10
 
11
11
  import logging
12
12
 
13
- from upgini.http import __version__
13
+ from upgini.__about__ import __version__
14
14
  from upgini.resource_bundle import bundle
15
15
 
16
16
  URL_PATTERN = "https://pypi.python.org/pypi/{package}/json"
@@ -1,14 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.278a2
3
+ Version: 1.1.279
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
- Home-page: https://upgini.com/
6
- Author: Upgini Developers
7
- Author-email: madewithlove@upgini.com
8
- License: BSD 3-Clause License
9
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
+ Project-URL: Homepage, https://upgini.com/
10
7
  Project-URL: Source, https://github.com/upgini/upgini
11
- Keywords: data science,machine learning,data mining,automl,data search
8
+ Author-email: Upgini Developers <madewithlove@upgini.com>
9
+ License-File: LICENSE
10
+ Keywords: automl,data mining,data science,data search,machine learning
12
11
  Classifier: Development Status :: 5 - Production/Stable
13
12
  Classifier: Intended Audience :: Customer Service
14
13
  Classifier: Intended Audience :: Developers
@@ -23,22 +22,21 @@ Classifier: Programming Language :: Python :: 3.9
23
22
  Classifier: Programming Language :: Python :: 3.10
24
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
24
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
- Requires-Python: >=3.8,<3.11
25
+ Requires-Python: <3.11,>=3.8
26
+ Requires-Dist: catboost>=1.0.3
27
+ Requires-Dist: fastparquet>=0.8.1
28
+ Requires-Dist: ipywidgets>=8.1.0
29
+ Requires-Dist: lightgbm>=3.3.2
30
+ Requires-Dist: numpy>=1.19.0
31
+ Requires-Dist: pandas<3.0.0,>=1.1.0
32
+ Requires-Dist: pydantic<2.0.0,>=1.8.2
33
+ Requires-Dist: pyjwt>=2.8.0
34
+ Requires-Dist: python-dateutil>=2.8.0
35
+ Requires-Dist: python-json-logger>=2.0.2
36
+ Requires-Dist: requests>=2.8.0
37
+ Requires-Dist: scikit-learn>=1.3.0
38
+ Requires-Dist: xhtml2pdf==0.2.11
27
39
  Description-Content-Type: text/markdown
28
- License-File: LICENSE
29
- Requires-Dist: python-dateutil >=2.8.0
30
- Requires-Dist: requests >=2.8.0
31
- Requires-Dist: pandas <3.0.0,>=1.1.0
32
- Requires-Dist: numpy >=1.19.0
33
- Requires-Dist: scikit-learn >=1.3.0
34
- Requires-Dist: pydantic <2.0.0,>=1.8.2
35
- Requires-Dist: fastparquet >=0.8.1
36
- Requires-Dist: python-json-logger >=2.0.2
37
- Requires-Dist: catboost >=1.0.3
38
- Requires-Dist: lightgbm >=3.3.2
39
- Requires-Dist: pyjwt >=2.8.0
40
- Requires-Dist: xhtml2pdf ==0.2.11
41
- Requires-Dist: ipywidgets >=8.1.0
42
40
 
43
41
 
44
42
  <!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
@@ -841,4 +839,4 @@ Some convenient ways to start contributing are:
841
839
  - [More perks for registered users](https://profile.upgini.com)
842
840
 
843
841
  <sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
844
- Please report it here.</a></sup>
842
+ Please report it here.</a></sup>
@@ -0,0 +1,62 @@
1
+ upgini/__about__.py,sha256=iqiE7m95oLHgp-tKIBMVrOSu-y0RBxTaA5Ngsqhp5yk,24
2
+ upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
3
+ upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
+ upgini/dataset.py,sha256=uiFY-P8te7-zigib1hGWRtW5v0X7chxPM0hJFdixAN8,45623
5
+ upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
+ upgini/features_enricher.py,sha256=NU2lpp6ZrJ3oKOOLa6u7DQ5kb64n8mDAFXadWjr219A,176290
7
+ upgini/http.py,sha256=khrYSldpY-HbVLCcApfV1BjBFK6Uyuatb4colKybxgY,42301
8
+ upgini/metadata.py,sha256=CFJekYGD7Ep7pRFH7wCEcsXS4bz83do33FNmtcCY9P4,9729
9
+ upgini/metrics.py,sha256=L4LKSMOK9iKFLaJvTBTKk2tQauMgiJqtfrBclM3fBjs,29670
10
+ upgini/search_task.py,sha256=LtRJ9bCPjMo1gJ-sUDKERhDwGcWKImrzwVFHjkMSQHQ,17071
11
+ upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
+ upgini/version_validator.py,sha256=RGg87VweujTNlibgsOuqPLIEiBgIOkuXNVTGuNCD234,1405
13
+ upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
14
+ upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
15
+ upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ upgini/autofe/all_operands.py,sha256=SyKVU-xGMHgoRZvHrCmba2u2Ygc73c1mXFolNSWe8Uo,2357
17
+ upgini/autofe/binary.py,sha256=441BRuqMsxlxuw4c8rMZB6h5EpRdVMk-bVa03U7T5Hg,3973
18
+ upgini/autofe/date.py,sha256=Vy1I92fLLYLhuYKJmtuPBMI8cPxE4Uwk40hqE2F2e1A,4224
19
+ upgini/autofe/feature.py,sha256=ChSuuIbRPGIWnPjKAgZbeAEi7Y_PjSVRyxxx41MyFp0,11845
20
+ upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
21
+ upgini/autofe/operand.py,sha256=xgEIZuFCfckc6LpBqVu1OVK3JEabm1O-LHUsp83EHKA,2806
22
+ upgini/autofe/unary.py,sha256=v-l3aiE5hj6kurvh6adCQL8W3X9u9a7RVbS_WPR2qlw,3146
23
+ upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
24
+ upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
26
+ upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
27
+ upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
28
+ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ upgini/normalizer/phone_normalizer.py,sha256=EzTaahk6myRv6ZXgbyVFGY4kpo_2VlQgOrm5_lfbmNI,9996
30
+ upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
31
+ upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
32
+ upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
33
+ upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
34
+ upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
36
+ upgini/sampler/random_under_sampler.py,sha256=TIbm7ATo-bCMF-IiS5sZeDC1ad1SYg0eY_rRmg84yIQ,4024
37
+ upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
38
+ upgini/utils/__init__.py,sha256=O_KgzKiJjW3g4NoqZ7lAxUpoHcBi_gze6r3ndEjCH74,842
39
+ upgini/utils/base_search_key_detector.py,sha256=UNs2uxEcD1N_mOtkx3k6U70DCajW-QEO2vZp41GF0mU,855
40
+ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl1UOB4s,3382
41
+ upgini/utils/country_utils.py,sha256=yE8oRgMpXuJxPfQm4fioY6dg6700HgVnHSk4Cv9sUyM,6511
42
+ upgini/utils/custom_loss_utils.py,sha256=bLk3uygqkJBaGkyzfO032d72QASae-dDyEURfFehVJo,3973
43
+ upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
44
+ upgini/utils/datetime_utils.py,sha256=-LsDTThsGKsTZ57V1uNiHtLcoTtqktk5tui4WnqggJo,10673
45
+ upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
46
+ upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
47
+ upgini/utils/email_utils.py,sha256=PLufTO97Pg9PPsNqB9agcM6M98MIxKUgIgNn2mVwSQ0,3520
48
+ upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
49
+ upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
50
+ upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
51
+ upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
52
+ upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
53
+ upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
54
+ upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
55
+ upgini/utils/sklearn_ext.py,sha256=c23MGSUVfxLnaDWKAxavHgnOtm5dGKkF3YswdWQcFzs,43984
56
+ upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
57
+ upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
58
+ upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
59
+ upgini-1.1.279.dist-info/METADATA,sha256=A4C2M3EmtRTSAaJXreEzPFZtkp2t-Ixy2NjRqa9uv30,48118
60
+ upgini-1.1.279.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
61
+ upgini-1.1.279.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
62
+ upgini-1.1.279.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: hatchling 1.24.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-