upgini 1.1.279a2__py3-none-any.whl → 1.1.279a2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (44) hide show
  1. upgini/__about__.py +1 -0
  2. upgini/ads_management/ads_manager.py +4 -2
  3. upgini/autofe/all_operands.py +3 -2
  4. upgini/autofe/binary.py +2 -1
  5. upgini/autofe/date.py +2 -1
  6. upgini/autofe/feature.py +1 -1
  7. upgini/autofe/groupby.py +3 -1
  8. upgini/autofe/operand.py +4 -3
  9. upgini/autofe/unary.py +2 -1
  10. upgini/autofe/vector.py +2 -0
  11. upgini/dataset.py +4 -4
  12. upgini/errors.py +1 -1
  13. upgini/features_enricher.py +4 -4
  14. upgini/http.py +11 -10
  15. upgini/mdc/__init__.py +1 -3
  16. upgini/mdc/context.py +4 -6
  17. upgini/metadata.py +3 -0
  18. upgini/metrics.py +101 -99
  19. upgini/normalizer/phone_normalizer.py +1 -1
  20. upgini/resource_bundle/__init__.py +5 -5
  21. upgini/sampler/base.py +1 -4
  22. upgini/sampler/random_under_sampler.py +2 -5
  23. upgini/search_task.py +4 -4
  24. upgini/spinner.py +1 -1
  25. upgini/utils/__init__.py +1 -1
  26. upgini/utils/base_search_key_detector.py +2 -2
  27. upgini/utils/blocked_time_series.py +4 -2
  28. upgini/utils/country_utils.py +1 -1
  29. upgini/utils/custom_loss_utils.py +3 -2
  30. upgini/utils/cv_utils.py +2 -2
  31. upgini/utils/datetime_utils.py +9 -3
  32. upgini/utils/email_utils.py +2 -2
  33. upgini/utils/fallback_progress_bar.py +1 -1
  34. upgini/utils/progress_bar.py +1 -1
  35. upgini/utils/sklearn_ext.py +14 -13
  36. upgini/utils/track_info.py +2 -2
  37. upgini/version_validator.py +2 -2
  38. {upgini-1.1.279a2.dist-info → upgini-1.1.279a2.dev1.dist-info}/METADATA +21 -23
  39. upgini-1.1.279a2.dev1.dist-info/RECORD +62 -0
  40. {upgini-1.1.279a2.dist-info → upgini-1.1.279a2.dev1.dist-info}/WHEEL +1 -2
  41. upgini/fingerprint.js +0 -8
  42. upgini-1.1.279a2.dist-info/RECORD +0 -63
  43. upgini-1.1.279a2.dist-info/top_level.txt +0 -1
  44. {upgini-1.1.279a2.dist-info → upgini-1.1.279a2.dev1.dist-info/licenses}/LICENSE +0 -0
upgini/__about__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "1.1.279a2.dev1"
@@ -1,9 +1,11 @@
1
1
  import time
2
- from typing import Dict, Optional
3
2
  import uuid
3
+ from typing import Dict, Optional
4
+
5
+ import pandas as pd
6
+
4
7
  from upgini.http import get_rest_client
5
8
  from upgini.spinner import Spinner
6
- import pandas as pd
7
9
 
8
10
 
9
11
  class AdsManager:
@@ -1,9 +1,10 @@
1
1
  from typing import Dict
2
+
3
+ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
2
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
3
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
4
6
  from upgini.autofe.operand import Operand
5
- from upgini.autofe.unary import Abs, Log, Residual, Sqrt, Square, Sigmoid, Floor, Freq
6
- from upgini.autofe.binary import Min, Max, Add, Subtract, Multiply, Divide, Sim
7
+ from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
7
8
  from upgini.autofe.vector import Mean, Sum
8
9
 
9
10
  ALL_OPERANDS: Dict[str, Operand] = {
upgini/autofe/binary.py CHANGED
@@ -1,9 +1,10 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
  from numpy import dot
5
4
  from numpy.linalg import norm
6
5
 
6
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
7
+
7
8
 
8
9
  class Min(PandasOperand):
9
10
  name = "min"
upgini/autofe/date.py CHANGED
@@ -1,8 +1,9 @@
1
1
  from typing import Any, Optional, Union
2
+
2
3
  import numpy as np
3
4
  import pandas as pd
4
- from pydantic import BaseModel
5
5
  from pandas.core.arrays.timedeltas import TimedeltaArray
6
+ from pydantic import BaseModel
6
7
 
7
8
  from upgini.autofe.operand import PandasOperand
8
9
 
upgini/autofe/feature.py CHANGED
@@ -215,7 +215,7 @@ class Feature:
215
215
  return Column(string)
216
216
 
217
217
  def is_trivial_char(c: str) -> bool:
218
- return not (c in "()+-*/,")
218
+ return c not in "()+-*/,"
219
219
 
220
220
  def find_prev(string: str) -> int:
221
221
  if string[-1] != ")":
upgini/autofe/groupby.py CHANGED
@@ -1,7 +1,9 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  from typing import Optional
2
+
3
3
  import pandas as pd
4
4
 
5
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
6
+
5
7
 
6
8
  class GroupByThenAgg(PandasOperand, VectorizableMixin):
7
9
  agg: Optional[str]
upgini/autofe/operand.py CHANGED
@@ -1,8 +1,9 @@
1
- from pydantic import BaseModel
2
- from typing import Dict, List, Optional, Tuple, Union
3
1
  import abc
4
- import pandas as pd
2
+ from typing import Dict, List, Optional, Tuple, Union
3
+
5
4
  import numpy as np
5
+ import pandas as pd
6
+ from pydantic import BaseModel
6
7
 
7
8
 
8
9
  class Operand(BaseModel):
upgini/autofe/unary.py CHANGED
@@ -1,7 +1,8 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
 
4
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
5
+
5
6
 
6
7
  class Abs(PandasOperand, VectorizableMixin):
7
8
  name = "abs"
upgini/autofe/vector.py CHANGED
@@ -1,5 +1,7 @@
1
1
  from typing import List
2
+
2
3
  import pandas as pd
4
+
3
5
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
4
6
 
5
7
 
upgini/dataset.py CHANGED
@@ -15,9 +15,9 @@ from pandas.api.types import (
15
15
  is_float_dtype,
16
16
  is_integer_dtype,
17
17
  is_numeric_dtype,
18
+ is_object_dtype,
18
19
  is_period_dtype,
19
20
  is_string_dtype,
20
- is_object_dtype,
21
21
  )
22
22
 
23
23
  from upgini.errors import ValidationError
@@ -95,7 +95,7 @@ class Dataset: # (pd.DataFrame):
95
95
  data = pd.read_csv(path, **kwargs)
96
96
  else:
97
97
  # try different separators: , ; \t ...
98
- with open(path, mode="r") as csvfile:
98
+ with open(path) as csvfile:
99
99
  sep = csv.Sniffer().sniff(csvfile.read(2048)).delimiter
100
100
  kwargs["sep"] = sep
101
101
  data = pd.read_csv(path, **kwargs)
@@ -251,7 +251,7 @@ class Dataset: # (pd.DataFrame):
251
251
  @staticmethod
252
252
  def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
253
253
  try:
254
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
254
+ if isinstance(ip, (IPv4Address, IPv6Address)):
255
255
  return int(ip)
256
256
  except Exception:
257
257
  pass
@@ -259,7 +259,7 @@ class Dataset: # (pd.DataFrame):
259
259
  @staticmethod
260
260
  def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
261
261
  try:
262
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
262
+ if isinstance(ip, (IPv4Address, IPv6Address)):
263
263
  return str(int(ip))
264
264
  except Exception:
265
265
  pass
upgini/errors.py CHANGED
@@ -16,7 +16,7 @@ class UnauthorizedError(HttpError):
16
16
  """Unauthorized error from REST API."""
17
17
 
18
18
  def __init__(self, message, status_code):
19
- message = "Unauthorized, please check your authorization token ({})".format(message)
19
+ message = f"Unauthorized, please check your authorization token ({message})"
20
20
  super(UnauthorizedError, self).__init__(message, status_code)
21
21
 
22
22
 
@@ -2548,7 +2548,7 @@ class FeaturesEnricher(TransformerMixin):
2548
2548
  validated_X = X.copy()
2549
2549
  elif isinstance(X, pd.Series):
2550
2550
  validated_X = X.to_frame()
2551
- elif isinstance(X, np.ndarray) or isinstance(X, list):
2551
+ elif isinstance(X, (list, np.ndarray)):
2552
2552
  validated_X = pd.DataFrame(X)
2553
2553
  renaming = {c: str(c) for c in validated_X.columns}
2554
2554
  validated_X = validated_X.rename(columns=renaming)
@@ -2637,7 +2637,7 @@ class FeaturesEnricher(TransformerMixin):
2637
2637
  validated_eval_X = eval_X.copy()
2638
2638
  elif isinstance(eval_X, pd.Series):
2639
2639
  validated_eval_X = eval_X.to_frame()
2640
- elif isinstance(eval_X, np.ndarray) or isinstance(eval_X, list):
2640
+ elif isinstance(eval_X, (list, np.ndarray)):
2641
2641
  validated_eval_X = pd.DataFrame(eval_X)
2642
2642
  renaming = {c: str(c) for c in validated_eval_X.columns}
2643
2643
  validated_eval_X = validated_eval_X.rename(columns=renaming)
@@ -2819,7 +2819,7 @@ class FeaturesEnricher(TransformerMixin):
2819
2819
  )
2820
2820
 
2821
2821
  def sample(df):
2822
- if isinstance(df, pd.Series) or isinstance(df, pd.DataFrame):
2822
+ if isinstance(df, (pd.DataFrame, pd.Series)):
2823
2823
  return df.head(10)
2824
2824
  else:
2825
2825
  return df[:10]
@@ -3693,7 +3693,7 @@ class FeaturesEnricher(TransformerMixin):
3693
3693
  def sample(inp, sample_index):
3694
3694
  if _num_samples(inp) <= 1000:
3695
3695
  return inp
3696
- if isinstance(inp, pd.DataFrame) or isinstance(inp, pd.Series):
3696
+ if isinstance(inp, (pd.DataFrame, pd.Series)):
3697
3697
  return inp.sample(n=1000, random_state=random_state)
3698
3698
  if isinstance(inp, np.ndarray):
3699
3699
  return inp[sample_index]
upgini/http.py CHANGED
@@ -22,6 +22,7 @@ from pydantic import BaseModel
22
22
  from pythonjsonlogger import jsonlogger
23
23
  from requests.exceptions import RequestException
24
24
 
25
+ from upgini.__about__ import __version__
25
26
  from upgini.errors import (
26
27
  HttpError,
27
28
  UnauthorizedError,
@@ -38,17 +39,17 @@ from upgini.metadata import (
38
39
  from upgini.resource_bundle import bundle
39
40
  from upgini.utils.track_info import get_track_metrics
40
41
 
41
- try:
42
- from importlib_metadata import version # type: ignore
42
+ # try:
43
+ # from importlib.metadata import version # type: ignore
43
44
 
44
- __version__ = version("upgini")
45
- except ImportError:
46
- try:
47
- from importlib.metadata import version # type: ignore
45
+ # __version__ = version("upgini")
46
+ # except ImportError:
47
+ # try:
48
+ # from importlib_metadata import version # type: ignore
48
49
 
49
- __version__ = version("upgini")
50
- except ImportError:
51
- __version__ = "Upgini wasn't installed"
50
+ # __version__ = version("upgini")
51
+ # except ImportError:
52
+ # __version__ = "Upgini wasn't installed"
52
53
 
53
54
  UPGINI_URL: str = "UPGINI_URL"
54
55
  UPGINI_API_KEY: str = "UPGINI_API_KEY"
@@ -925,7 +926,7 @@ def is_demo_api_key(api_token: Optional[str]) -> bool:
925
926
  return api_token is None or api_token == "" or api_token == DEMO_API_KEY
926
927
 
927
928
 
928
- @lru_cache()
929
+ @lru_cache
929
930
  def _get_rest_client(
930
931
  backend_url: str, api_token: str, client_ip: Optional[str] = None, client_visitorid: Optional[str] = None
931
932
  ) -> _RestClient:
upgini/mdc/__init__.py CHANGED
@@ -1,15 +1,13 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  .. module: mdc
4
3
  .. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
5
4
  """
6
- from __future__ import absolute_import, division, print_function, unicode_literals
7
5
 
8
6
  import logging
9
7
 
10
- from upgini.mdc.context import new_log_context, get_mdc_fields
11
8
  from pythonjsonlogger import jsonlogger
12
9
 
10
+ from upgini.mdc.context import get_mdc_fields, new_log_context
13
11
 
14
12
  MDContext = new_log_context
15
13
  MDC = new_log_context
upgini/mdc/context.py CHANGED
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  .. module: TODO
4
3
  :platform: TODO
@@ -7,12 +6,11 @@
7
6
  .. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
8
7
  """
9
8
 
10
- import time
11
- import uuid
9
+ import collections
12
10
  import logging
13
11
  import threading
14
- import collections
15
-
12
+ import time
13
+ import uuid
16
14
  from contextlib import contextmanager
17
15
 
18
16
  LOGGER = logging.getLogger(__name__)
@@ -32,7 +30,7 @@ def get_mdc_fields():
32
30
 
33
31
  @contextmanager
34
32
  def new_log_context(**kwargs):
35
- context_id = "mdc-{thread}-{context}".format(thread=threading.current_thread().ident, context=uuid.uuid4())
33
+ context_id = f"mdc-{threading.current_thread().ident}-{uuid.uuid4()}"
36
34
 
37
35
  LOGGER.debug("creating context %s", context_id)
38
36
 
upgini/metadata.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from enum import Enum
2
4
  from typing import Dict, List, Optional, Set
3
5
 
@@ -201,6 +203,7 @@ class FileMetadata(BaseModel):
201
203
  for c in self.columns:
202
204
  if c.name == name:
203
205
  return c
206
+ return None
204
207
 
205
208
  def search_types(self) -> Set[SearchKey]:
206
209
  search_keys = set()