upgini 1.1.279a1__py3-none-any.whl → 1.1.279a2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -0
- upgini/ads_management/ads_manager.py +4 -2
- upgini/autofe/all_operands.py +3 -2
- upgini/autofe/binary.py +2 -1
- upgini/autofe/date.py +2 -1
- upgini/autofe/feature.py +1 -1
- upgini/autofe/groupby.py +3 -1
- upgini/autofe/operand.py +4 -3
- upgini/autofe/unary.py +2 -1
- upgini/autofe/vector.py +2 -0
- upgini/dataset.py +4 -4
- upgini/errors.py +1 -1
- upgini/features_enricher.py +4 -4
- upgini/http.py +11 -10
- upgini/mdc/__init__.py +1 -3
- upgini/mdc/context.py +4 -6
- upgini/metadata.py +3 -0
- upgini/metrics.py +101 -99
- upgini/normalizer/phone_normalizer.py +1 -1
- upgini/resource_bundle/__init__.py +5 -5
- upgini/sampler/base.py +1 -4
- upgini/sampler/random_under_sampler.py +2 -5
- upgini/search_task.py +4 -4
- upgini/spinner.py +1 -1
- upgini/utils/__init__.py +1 -1
- upgini/utils/base_search_key_detector.py +2 -2
- upgini/utils/blocked_time_series.py +4 -2
- upgini/utils/country_utils.py +1 -1
- upgini/utils/custom_loss_utils.py +3 -2
- upgini/utils/cv_utils.py +2 -2
- upgini/utils/datetime_utils.py +12 -6
- upgini/utils/email_utils.py +2 -2
- upgini/utils/fallback_progress_bar.py +1 -1
- upgini/utils/progress_bar.py +1 -1
- upgini/utils/sklearn_ext.py +14 -13
- upgini/utils/track_info.py +2 -2
- upgini/version_validator.py +2 -2
- {upgini-1.1.279a1.dist-info → upgini-1.1.279a2.dev1.dist-info}/METADATA +21 -23
- upgini-1.1.279a2.dev1.dist-info/RECORD +62 -0
- {upgini-1.1.279a1.dist-info → upgini-1.1.279a2.dev1.dist-info}/WHEEL +1 -2
- upgini/fingerprint.js +0 -8
- upgini-1.1.279a1.dist-info/RECORD +0 -63
- upgini-1.1.279a1.dist-info/top_level.txt +0 -1
- {upgini-1.1.279a1.dist-info → upgini-1.1.279a2.dev1.dist-info/licenses}/LICENSE +0 -0
upgini/__about__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.279a2.dev1"
|
upgini/autofe/all_operands.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
2
4
|
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
|
|
3
5
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
4
6
|
from upgini.autofe.operand import Operand
|
|
5
|
-
from upgini.autofe.unary import Abs,
|
|
6
|
-
from upgini.autofe.binary import Min, Max, Add, Subtract, Multiply, Divide, Sim
|
|
7
|
+
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
|
|
7
8
|
from upgini.autofe.vector import Mean, Sum
|
|
8
9
|
|
|
9
10
|
ALL_OPERANDS: Dict[str, Operand] = {
|
upgini/autofe/binary.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
2
1
|
import numpy as np
|
|
3
2
|
import pandas as pd
|
|
4
3
|
from numpy import dot
|
|
5
4
|
from numpy.linalg import norm
|
|
6
5
|
|
|
6
|
+
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
7
|
+
|
|
7
8
|
|
|
8
9
|
class Min(PandasOperand):
|
|
9
10
|
name = "min"
|
upgini/autofe/date.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from typing import Any, Optional, Union
|
|
2
|
+
|
|
2
3
|
import numpy as np
|
|
3
4
|
import pandas as pd
|
|
4
|
-
from pydantic import BaseModel
|
|
5
5
|
from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
6
|
+
from pydantic import BaseModel
|
|
6
7
|
|
|
7
8
|
from upgini.autofe.operand import PandasOperand
|
|
8
9
|
|
upgini/autofe/feature.py
CHANGED
upgini/autofe/groupby.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
2
1
|
from typing import Optional
|
|
2
|
+
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
|
+
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class GroupByThenAgg(PandasOperand, VectorizableMixin):
|
|
7
9
|
agg: Optional[str]
|
upgini/autofe/operand.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from pydantic import BaseModel
|
|
2
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
3
1
|
import abc
|
|
4
|
-
import
|
|
2
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
3
|
+
|
|
5
4
|
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pydantic import BaseModel
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class Operand(BaseModel):
|
upgini/autofe/unary.py
CHANGED
upgini/autofe/vector.py
CHANGED
upgini/dataset.py
CHANGED
|
@@ -15,9 +15,9 @@ from pandas.api.types import (
|
|
|
15
15
|
is_float_dtype,
|
|
16
16
|
is_integer_dtype,
|
|
17
17
|
is_numeric_dtype,
|
|
18
|
+
is_object_dtype,
|
|
18
19
|
is_period_dtype,
|
|
19
20
|
is_string_dtype,
|
|
20
|
-
is_object_dtype,
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
from upgini.errors import ValidationError
|
|
@@ -95,7 +95,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
95
95
|
data = pd.read_csv(path, **kwargs)
|
|
96
96
|
else:
|
|
97
97
|
# try different separators: , ; \t ...
|
|
98
|
-
with open(path
|
|
98
|
+
with open(path) as csvfile:
|
|
99
99
|
sep = csv.Sniffer().sniff(csvfile.read(2048)).delimiter
|
|
100
100
|
kwargs["sep"] = sep
|
|
101
101
|
data = pd.read_csv(path, **kwargs)
|
|
@@ -251,7 +251,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
251
251
|
@staticmethod
|
|
252
252
|
def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
|
|
253
253
|
try:
|
|
254
|
-
if isinstance(ip, IPv4Address
|
|
254
|
+
if isinstance(ip, (IPv4Address, IPv6Address)):
|
|
255
255
|
return int(ip)
|
|
256
256
|
except Exception:
|
|
257
257
|
pass
|
|
@@ -259,7 +259,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
259
259
|
@staticmethod
|
|
260
260
|
def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
|
|
261
261
|
try:
|
|
262
|
-
if isinstance(ip, IPv4Address
|
|
262
|
+
if isinstance(ip, (IPv4Address, IPv6Address)):
|
|
263
263
|
return str(int(ip))
|
|
264
264
|
except Exception:
|
|
265
265
|
pass
|
upgini/errors.py
CHANGED
|
@@ -16,7 +16,7 @@ class UnauthorizedError(HttpError):
|
|
|
16
16
|
"""Unauthorized error from REST API."""
|
|
17
17
|
|
|
18
18
|
def __init__(self, message, status_code):
|
|
19
|
-
message = "Unauthorized, please check your authorization token ({})"
|
|
19
|
+
message = f"Unauthorized, please check your authorization token ({message})"
|
|
20
20
|
super(UnauthorizedError, self).__init__(message, status_code)
|
|
21
21
|
|
|
22
22
|
|
upgini/features_enricher.py
CHANGED
|
@@ -2548,7 +2548,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2548
2548
|
validated_X = X.copy()
|
|
2549
2549
|
elif isinstance(X, pd.Series):
|
|
2550
2550
|
validated_X = X.to_frame()
|
|
2551
|
-
elif isinstance(X, np.ndarray)
|
|
2551
|
+
elif isinstance(X, (list, np.ndarray)):
|
|
2552
2552
|
validated_X = pd.DataFrame(X)
|
|
2553
2553
|
renaming = {c: str(c) for c in validated_X.columns}
|
|
2554
2554
|
validated_X = validated_X.rename(columns=renaming)
|
|
@@ -2637,7 +2637,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2637
2637
|
validated_eval_X = eval_X.copy()
|
|
2638
2638
|
elif isinstance(eval_X, pd.Series):
|
|
2639
2639
|
validated_eval_X = eval_X.to_frame()
|
|
2640
|
-
elif isinstance(eval_X, np.ndarray)
|
|
2640
|
+
elif isinstance(eval_X, (list, np.ndarray)):
|
|
2641
2641
|
validated_eval_X = pd.DataFrame(eval_X)
|
|
2642
2642
|
renaming = {c: str(c) for c in validated_eval_X.columns}
|
|
2643
2643
|
validated_eval_X = validated_eval_X.rename(columns=renaming)
|
|
@@ -2819,7 +2819,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2819
2819
|
)
|
|
2820
2820
|
|
|
2821
2821
|
def sample(df):
|
|
2822
|
-
if isinstance(df, pd.
|
|
2822
|
+
if isinstance(df, (pd.DataFrame, pd.Series)):
|
|
2823
2823
|
return df.head(10)
|
|
2824
2824
|
else:
|
|
2825
2825
|
return df[:10]
|
|
@@ -3693,7 +3693,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3693
3693
|
def sample(inp, sample_index):
|
|
3694
3694
|
if _num_samples(inp) <= 1000:
|
|
3695
3695
|
return inp
|
|
3696
|
-
if isinstance(inp, pd.DataFrame
|
|
3696
|
+
if isinstance(inp, (pd.DataFrame, pd.Series)):
|
|
3697
3697
|
return inp.sample(n=1000, random_state=random_state)
|
|
3698
3698
|
if isinstance(inp, np.ndarray):
|
|
3699
3699
|
return inp[sample_index]
|
upgini/http.py
CHANGED
|
@@ -22,6 +22,7 @@ from pydantic import BaseModel
|
|
|
22
22
|
from pythonjsonlogger import jsonlogger
|
|
23
23
|
from requests.exceptions import RequestException
|
|
24
24
|
|
|
25
|
+
from upgini.__about__ import __version__
|
|
25
26
|
from upgini.errors import (
|
|
26
27
|
HttpError,
|
|
27
28
|
UnauthorizedError,
|
|
@@ -38,17 +39,17 @@ from upgini.metadata import (
|
|
|
38
39
|
from upgini.resource_bundle import bundle
|
|
39
40
|
from upgini.utils.track_info import get_track_metrics
|
|
40
41
|
|
|
41
|
-
try:
|
|
42
|
-
|
|
42
|
+
# try:
|
|
43
|
+
# from importlib.metadata import version # type: ignore
|
|
43
44
|
|
|
44
|
-
|
|
45
|
-
except ImportError:
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
# __version__ = version("upgini")
|
|
46
|
+
# except ImportError:
|
|
47
|
+
# try:
|
|
48
|
+
# from importlib_metadata import version # type: ignore
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
50
|
+
# __version__ = version("upgini")
|
|
51
|
+
# except ImportError:
|
|
52
|
+
# __version__ = "Upgini wasn't installed"
|
|
52
53
|
|
|
53
54
|
UPGINI_URL: str = "UPGINI_URL"
|
|
54
55
|
UPGINI_API_KEY: str = "UPGINI_API_KEY"
|
|
@@ -925,7 +926,7 @@ def is_demo_api_key(api_token: Optional[str]) -> bool:
|
|
|
925
926
|
return api_token is None or api_token == "" or api_token == DEMO_API_KEY
|
|
926
927
|
|
|
927
928
|
|
|
928
|
-
@lru_cache
|
|
929
|
+
@lru_cache
|
|
929
930
|
def _get_rest_client(
|
|
930
931
|
backend_url: str, api_token: str, client_ip: Optional[str] = None, client_visitorid: Optional[str] = None
|
|
931
932
|
) -> _RestClient:
|
upgini/mdc/__init__.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
"""
|
|
3
2
|
.. module: mdc
|
|
4
3
|
.. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
|
|
5
4
|
"""
|
|
6
|
-
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
7
5
|
|
|
8
6
|
import logging
|
|
9
7
|
|
|
10
|
-
from upgini.mdc.context import new_log_context, get_mdc_fields
|
|
11
8
|
from pythonjsonlogger import jsonlogger
|
|
12
9
|
|
|
10
|
+
from upgini.mdc.context import get_mdc_fields, new_log_context
|
|
13
11
|
|
|
14
12
|
MDContext = new_log_context
|
|
15
13
|
MDC = new_log_context
|
upgini/mdc/context.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
"""
|
|
3
2
|
.. module: TODO
|
|
4
3
|
:platform: TODO
|
|
@@ -7,12 +6,11 @@
|
|
|
7
6
|
.. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
|
-
import
|
|
11
|
-
import uuid
|
|
9
|
+
import collections
|
|
12
10
|
import logging
|
|
13
11
|
import threading
|
|
14
|
-
import
|
|
15
|
-
|
|
12
|
+
import time
|
|
13
|
+
import uuid
|
|
16
14
|
from contextlib import contextmanager
|
|
17
15
|
|
|
18
16
|
LOGGER = logging.getLogger(__name__)
|
|
@@ -32,7 +30,7 @@ def get_mdc_fields():
|
|
|
32
30
|
|
|
33
31
|
@contextmanager
|
|
34
32
|
def new_log_context(**kwargs):
|
|
35
|
-
context_id = "mdc-{
|
|
33
|
+
context_id = f"mdc-{threading.current_thread().ident}-{uuid.uuid4()}"
|
|
36
34
|
|
|
37
35
|
LOGGER.debug("creating context %s", context_id)
|
|
38
36
|
|
upgini/metadata.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from enum import Enum
|
|
2
4
|
from typing import Dict, List, Optional, Set
|
|
3
5
|
|
|
@@ -201,6 +203,7 @@ class FileMetadata(BaseModel):
|
|
|
201
203
|
for c in self.columns:
|
|
202
204
|
if c.name == name:
|
|
203
205
|
return c
|
|
206
|
+
return None
|
|
204
207
|
|
|
205
208
|
def search_types(self) -> Set[SearchKey]:
|
|
206
209
|
search_keys = set()
|