upgini 1.1.203a1__tar.gz → 1.1.205__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.203a1/src/upgini.egg-info → upgini-1.1.205}/PKG-INFO +1 -1
- {upgini-1.1.203a1 → upgini-1.1.205}/setup.py +1 -1
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/dataset.py +4 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/features_enricher.py +7 -2
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/http.py +9 -6
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/search_task.py +2 -1
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/display_utils.py +3 -2
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/track_info.py +29 -19
- {upgini-1.1.203a1 → upgini-1.1.205/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.203a1 → upgini-1.1.205}/LICENSE +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/README.md +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/pyproject.toml +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/setup.cfg +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/ads.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/errors.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/metadata.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/metrics.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/spinner.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini.egg-info/SOURCES.txt +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_country_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_email_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_features_enricher.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_metrics.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.203a1 → upgini-1.1.205}/tests/test_widget.py +0 -0
|
@@ -80,6 +80,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
80
80
|
endpoint: Optional[str] = None,
|
|
81
81
|
api_key: Optional[str] = None,
|
|
82
82
|
logger: Optional[logging.Logger] = None,
|
|
83
|
+
client_ip: Optional[str] = None,
|
|
83
84
|
warning_counter: Optional[WarningCounter] = None,
|
|
84
85
|
**kwargs,
|
|
85
86
|
):
|
|
@@ -123,6 +124,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
123
124
|
else:
|
|
124
125
|
self.logger = logging.getLogger()
|
|
125
126
|
self.logger.setLevel("FATAL")
|
|
127
|
+
self.client_ip = client_ip
|
|
126
128
|
self.warning_counter = warning_counter or WarningCounter()
|
|
127
129
|
|
|
128
130
|
def __len__(self):
|
|
@@ -961,6 +963,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
961
963
|
task_type=self.task_type,
|
|
962
964
|
endpoint=self.endpoint,
|
|
963
965
|
api_key=self.api_key,
|
|
966
|
+
client_ip=self.client_ip,
|
|
964
967
|
)
|
|
965
968
|
|
|
966
969
|
def validation(
|
|
@@ -1030,6 +1033,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
1030
1033
|
initial_search_task_id=initial_search_task_id,
|
|
1031
1034
|
endpoint=self.endpoint,
|
|
1032
1035
|
api_key=self.api_key,
|
|
1036
|
+
client_ip=self.client_ip,
|
|
1033
1037
|
)
|
|
1034
1038
|
|
|
1035
1039
|
def prepare_uploading_file(self, base_path: str) -> str:
|
|
@@ -168,16 +168,18 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
168
168
|
logs_enabled: bool = True,
|
|
169
169
|
raise_validation_error: bool = True,
|
|
170
170
|
exclude_columns: Optional[List[str]] = None,
|
|
171
|
+
client_ip: Optional[str] = None,
|
|
171
172
|
**kwargs,
|
|
172
173
|
):
|
|
173
174
|
self._api_key = api_key or os.environ.get(UPGINI_API_KEY)
|
|
174
175
|
if api_key is not None and not isinstance(api_key, str):
|
|
175
176
|
raise ValidationError(f"api_key should be `string`, but passed: `{api_key}`")
|
|
176
177
|
self.rest_client = get_rest_client(endpoint, self._api_key)
|
|
178
|
+
self.client_ip = client_ip
|
|
177
179
|
|
|
178
180
|
self.logs_enabled = logs_enabled
|
|
179
181
|
if logs_enabled:
|
|
180
|
-
self.logger = LoggerFactory().get_logger(endpoint, self._api_key)
|
|
182
|
+
self.logger = LoggerFactory().get_logger(endpoint, self._api_key, client_ip)
|
|
181
183
|
else:
|
|
182
184
|
self.logger = logging.getLogger()
|
|
183
185
|
self.logger.setLevel("FATAL")
|
|
@@ -222,6 +224,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
222
224
|
search_id,
|
|
223
225
|
endpoint=self.endpoint,
|
|
224
226
|
api_key=self._api_key,
|
|
227
|
+
client_ip=client_ip
|
|
225
228
|
)
|
|
226
229
|
|
|
227
230
|
print(bundle.get("search_by_task_id_start"))
|
|
@@ -281,7 +284,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
281
284
|
def _set_api_key(self, api_key: str):
|
|
282
285
|
self._api_key = api_key
|
|
283
286
|
if self.logs_enabled:
|
|
284
|
-
self.logger = LoggerFactory().get_logger(self.endpoint, self._api_key)
|
|
287
|
+
self.logger = LoggerFactory().get_logger(self.endpoint, self._api_key, self.client_ip)
|
|
285
288
|
|
|
286
289
|
api_key = property(_get_api_key, _set_api_key)
|
|
287
290
|
|
|
@@ -1584,6 +1587,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1584
1587
|
api_key=self.api_key, # type: ignore
|
|
1585
1588
|
date_format=self.date_format, # type: ignore
|
|
1586
1589
|
logger=self.logger,
|
|
1590
|
+
client_ip=self.client_ip
|
|
1587
1591
|
)
|
|
1588
1592
|
dataset.meaning_types = meaning_types
|
|
1589
1593
|
dataset.search_keys = combined_search_keys
|
|
@@ -1917,6 +1921,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1917
1921
|
date_format=self.date_format, # type: ignore
|
|
1918
1922
|
random_state=self.random_state, # type: ignore
|
|
1919
1923
|
logger=self.logger,
|
|
1924
|
+
client_ip=self.client_ip,
|
|
1920
1925
|
)
|
|
1921
1926
|
dataset.meaning_types = meaning_types
|
|
1922
1927
|
dataset.search_keys = combined_search_keys
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import copy
|
|
3
|
+
import datetime
|
|
2
4
|
import hashlib
|
|
3
5
|
import logging
|
|
4
6
|
import os
|
|
@@ -12,8 +14,6 @@ from http.client import HTTPConnection
|
|
|
12
14
|
from json import dumps
|
|
13
15
|
from typing import Any, Dict, List, Optional, Tuple
|
|
14
16
|
from urllib.parse import urljoin
|
|
15
|
-
import datetime
|
|
16
|
-
import copy
|
|
17
17
|
|
|
18
18
|
import jwt
|
|
19
19
|
import pandas as pd
|
|
@@ -882,17 +882,18 @@ def _get_rest_client(backend_url: str, api_token: str) -> _RestClient:
|
|
|
882
882
|
|
|
883
883
|
|
|
884
884
|
class BackendLogHandler(logging.Handler):
|
|
885
|
-
def __init__(self, rest_client: _RestClient, *args, **kwargs) -> None:
|
|
885
|
+
def __init__(self, rest_client: _RestClient, client_ip: Optional[str] = None, *args, **kwargs) -> None:
|
|
886
886
|
super().__init__(*args, **kwargs)
|
|
887
887
|
self.rest_client = rest_client
|
|
888
888
|
self.track_metrics = None
|
|
889
889
|
self.hostname = "0.0.0.0"
|
|
890
|
+
self.client_ip = client_ip
|
|
890
891
|
|
|
891
892
|
def emit(self, record: logging.LogRecord) -> None:
|
|
892
893
|
def task():
|
|
893
894
|
try:
|
|
894
895
|
if self.track_metrics is None or len(self.track_metrics) == 0:
|
|
895
|
-
self.track_metrics = get_track_metrics()
|
|
896
|
+
self.track_metrics = get_track_metrics(self.client_ip)
|
|
896
897
|
self.hostname = self.track_metrics.get("ip") or "0.0.0.0"
|
|
897
898
|
text = self.format(record)
|
|
898
899
|
tags = self.track_metrics
|
|
@@ -933,7 +934,9 @@ class LoggerFactory:
|
|
|
933
934
|
root.setLevel(logging.INFO)
|
|
934
935
|
root.handlers.clear()
|
|
935
936
|
|
|
936
|
-
def get_logger(
|
|
937
|
+
def get_logger(
|
|
938
|
+
self, backend_url: Optional[str] = None, api_token: Optional[str] = None, client_ip: Optional[str] = None
|
|
939
|
+
) -> logging.Logger:
|
|
937
940
|
url = _resolve_backend_url(backend_url)
|
|
938
941
|
token = resolve_api_token(api_token)
|
|
939
942
|
key = url + token
|
|
@@ -944,7 +947,7 @@ class LoggerFactory:
|
|
|
944
947
|
upgini_logger = logging.getLogger(f"upgini.{hash(key)}")
|
|
945
948
|
upgini_logger.handlers.clear()
|
|
946
949
|
rest_client = get_rest_client(backend_url, api_token)
|
|
947
|
-
datadog_handler = BackendLogHandler(rest_client)
|
|
950
|
+
datadog_handler = BackendLogHandler(rest_client, client_ip)
|
|
948
951
|
json_formatter = jsonlogger.JsonFormatter(
|
|
949
952
|
"%(asctime)s %(threadName)s %(name)s %(levelname)s %(message)s",
|
|
950
953
|
timestamp=True,
|
|
@@ -42,6 +42,7 @@ class SearchTask:
|
|
|
42
42
|
task_type: Optional[ModelTaskType] = None,
|
|
43
43
|
endpoint: Optional[str] = None,
|
|
44
44
|
api_key: Optional[str] = None,
|
|
45
|
+
client_ip: Optional[str] = None,
|
|
45
46
|
):
|
|
46
47
|
self.search_task_id = search_task_id
|
|
47
48
|
self.initial_search_task_id = initial_search_task_id
|
|
@@ -53,7 +54,7 @@ class SearchTask:
|
|
|
53
54
|
self.summary = None
|
|
54
55
|
self.endpoint = endpoint
|
|
55
56
|
self.api_key = api_key
|
|
56
|
-
self.logger = LoggerFactory().get_logger(endpoint, api_key)
|
|
57
|
+
self.logger = LoggerFactory().get_logger(endpoint, api_key, client_ip)
|
|
57
58
|
self.provider_metadata_v2: Optional[List[ProviderTaskMetadataV2]] = None
|
|
58
59
|
self.unused_features_for_generation: Optional[List[str]] = None
|
|
59
60
|
|
|
@@ -135,9 +135,10 @@ def make_html_report(
|
|
|
135
135
|
):
|
|
136
136
|
relevant_datasources_df = relevant_datasources_df.copy()
|
|
137
137
|
relevant_datasources_df["action"] = (
|
|
138
|
-
f"""<a href="https://upgini.com/requet-a-quote?search-id={search_id}"
|
|
138
|
+
f"""<a href="https://upgini.com/requet-a-quote?search-id={search_id}">"""
|
|
139
|
+
"""<button type="button">Request a quote</button></a>"""
|
|
139
140
|
)
|
|
140
|
-
relevant_datasources_df.rename(columns={"action": ""})
|
|
141
|
+
relevant_datasources_df.rename(columns={"action": " "}, inplace=True)
|
|
141
142
|
return f"""<html>
|
|
142
143
|
<head>
|
|
143
144
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
@@ -5,6 +5,7 @@ import socket
|
|
|
5
5
|
import sys
|
|
6
6
|
from getpass import getuser
|
|
7
7
|
from hashlib import sha256
|
|
8
|
+
from typing import Optional
|
|
8
9
|
from uuid import getnode
|
|
9
10
|
|
|
10
11
|
from requests import get, post
|
|
@@ -46,7 +47,7 @@ def _get_execution_ide() -> str:
|
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
@lru_cache()
|
|
49
|
-
def get_track_metrics() -> dict:
|
|
50
|
+
def get_track_metrics(client_ip: Optional[str]) -> dict:
|
|
50
51
|
# default values
|
|
51
52
|
track = {"ide": _get_execution_ide()}
|
|
52
53
|
ident_res = "https://api.ipify.org"
|
|
@@ -93,28 +94,34 @@ def get_track_metrics() -> dict:
|
|
|
93
94
|
except Exception as e:
|
|
94
95
|
track["err"] = str(e)
|
|
95
96
|
track["visitorId"] = "None"
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
Javascript
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
97
|
+
if client_ip:
|
|
98
|
+
track["ip"] = client_ip
|
|
99
|
+
else:
|
|
100
|
+
try:
|
|
101
|
+
from google.colab import output # type: ignore
|
|
102
|
+
from IPython.display import Javascript, display
|
|
103
|
+
|
|
104
|
+
display(
|
|
105
|
+
Javascript(
|
|
106
|
+
f"""
|
|
107
|
+
window.clientIP = fetch("{ident_res}")
|
|
108
|
+
.then(response => response.text())
|
|
109
|
+
.then(data => data);
|
|
110
|
+
"""
|
|
111
|
+
)
|
|
107
112
|
)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
track["ip"] = "0.0.0.0"
|
|
113
|
+
track["ip"] = output.eval_js("window.clientIP", timeout_sec=10)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
track["err"] = str(e)
|
|
116
|
+
track["ip"] = "0.0.0.0"
|
|
113
117
|
|
|
114
118
|
elif track["ide"] == "binder":
|
|
115
119
|
try:
|
|
116
120
|
if "CLIENT_IP" in os.environ.keys():
|
|
117
|
-
|
|
121
|
+
if client_ip:
|
|
122
|
+
track["ip"] = client_ip
|
|
123
|
+
else:
|
|
124
|
+
track["ip"] = os.environ["CLIENT_IP"]
|
|
118
125
|
track["visitorId"] = sha256(os.environ["CLIENT_IP"].encode()).hexdigest()
|
|
119
126
|
except Exception as e:
|
|
120
127
|
track["err"] = str(e)
|
|
@@ -142,7 +149,10 @@ def get_track_metrics() -> dict:
|
|
|
142
149
|
track["visitorId"] = "None"
|
|
143
150
|
else:
|
|
144
151
|
try:
|
|
145
|
-
|
|
152
|
+
if client_ip:
|
|
153
|
+
track["ip"] = client_ip
|
|
154
|
+
else:
|
|
155
|
+
track["ip"] = get(ident_res, timeout=10).text
|
|
146
156
|
track["visitorId"] = sha256(str(getnode()).encode()).hexdigest()
|
|
147
157
|
except Exception as e:
|
|
148
158
|
track["err"] = str(e)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|