upgini 1.1.241__py3-none-any.whl → 1.1.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/dataset.py +11 -15
- upgini/features_enricher.py +8 -10
- upgini/http.py +9 -4
- upgini/search_task.py +19 -16
- {upgini-1.1.241.dist-info → upgini-1.1.242.dist-info}/METADATA +1 -1
- {upgini-1.1.241.dist-info → upgini-1.1.242.dist-info}/RECORD +9 -9
- {upgini-1.1.241.dist-info → upgini-1.1.242.dist-info}/LICENSE +0 -0
- {upgini-1.1.241.dist-info → upgini-1.1.242.dist-info}/WHEEL +0 -0
- {upgini-1.1.241.dist-info → upgini-1.1.242.dist-info}/top_level.txt +0 -0
upgini/dataset.py
CHANGED
|
@@ -20,7 +20,7 @@ from pandas.api.types import (
|
|
|
20
20
|
from pandas.core.dtypes.common import is_period_dtype
|
|
21
21
|
|
|
22
22
|
from upgini.errors import ValidationError
|
|
23
|
-
from upgini.http import ProgressStage, SearchProgress,
|
|
23
|
+
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
24
24
|
from upgini.metadata import (
|
|
25
25
|
EVAL_SET_INDEX,
|
|
26
26
|
SYSTEM_COLUMNS,
|
|
@@ -78,8 +78,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
78
78
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
79
79
|
model_task_type: Optional[ModelTaskType] = None,
|
|
80
80
|
random_state: Optional[int] = None,
|
|
81
|
-
|
|
82
|
-
api_key: Optional[str] = None,
|
|
81
|
+
rest_client: Optional[_RestClient] = None,
|
|
83
82
|
logger: Optional[logging.Logger] = None,
|
|
84
83
|
warning_counter: Optional[WarningCounter] = None,
|
|
85
84
|
**kwargs,
|
|
@@ -114,8 +113,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
114
113
|
self.hierarchical_subgroup_keys = []
|
|
115
114
|
self.file_upload_id: Optional[str] = None
|
|
116
115
|
self.etalon_def: Optional[Dict[str, str]] = None
|
|
117
|
-
self.
|
|
118
|
-
self.api_key = api_key
|
|
116
|
+
self.rest_client = rest_client
|
|
119
117
|
self.random_state = random_state
|
|
120
118
|
self.columns_renaming: Dict[str, str] = {}
|
|
121
119
|
self.imbalanced: bool = False
|
|
@@ -983,10 +981,10 @@ class Dataset: # (pd.DataFrame):
|
|
|
983
981
|
runtime_parameters=runtime_parameters,
|
|
984
982
|
)
|
|
985
983
|
|
|
986
|
-
if self.file_upload_id is not None and
|
|
984
|
+
if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(
|
|
987
985
|
trace_id, self.file_upload_id, file_metadata
|
|
988
986
|
):
|
|
989
|
-
search_task_response =
|
|
987
|
+
search_task_response = self.rest_client.initial_search_without_upload_v2(
|
|
990
988
|
trace_id, self.file_upload_id, file_metadata, file_metrics, search_customization
|
|
991
989
|
)
|
|
992
990
|
else:
|
|
@@ -999,7 +997,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
999
997
|
progress_bar.progress = search_progress.to_progress_bar()
|
|
1000
998
|
if progress_callback is not None:
|
|
1001
999
|
progress_callback(search_progress)
|
|
1002
|
-
search_task_response =
|
|
1000
|
+
search_task_response = self.rest_client.initial_search_v2(
|
|
1003
1001
|
trace_id, parquet_file_path, file_metadata, file_metrics, search_customization
|
|
1004
1002
|
)
|
|
1005
1003
|
# if progress_bar is not None:
|
|
@@ -1015,8 +1013,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
1015
1013
|
extract_features,
|
|
1016
1014
|
accurate_model,
|
|
1017
1015
|
task_type=self.task_type,
|
|
1018
|
-
|
|
1019
|
-
api_key=self.api_key,
|
|
1016
|
+
rest_client=self.rest_client,
|
|
1020
1017
|
logger=self.logger,
|
|
1021
1018
|
)
|
|
1022
1019
|
|
|
@@ -1053,10 +1050,10 @@ class Dataset: # (pd.DataFrame):
|
|
|
1053
1050
|
progress_bar.progress = search_progress.to_progress_bar()
|
|
1054
1051
|
if progress_callback is not None:
|
|
1055
1052
|
progress_callback(search_progress)
|
|
1056
|
-
if self.file_upload_id is not None and
|
|
1053
|
+
if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(
|
|
1057
1054
|
trace_id, self.file_upload_id, file_metadata
|
|
1058
1055
|
):
|
|
1059
|
-
search_task_response =
|
|
1056
|
+
search_task_response = self.rest_client.validation_search_without_upload_v2(
|
|
1060
1057
|
trace_id, self.file_upload_id, initial_search_task_id, file_metadata, file_metrics, search_customization
|
|
1061
1058
|
)
|
|
1062
1059
|
else:
|
|
@@ -1065,7 +1062,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
1065
1062
|
# To avoid rate limit
|
|
1066
1063
|
time.sleep(1)
|
|
1067
1064
|
|
|
1068
|
-
search_task_response =
|
|
1065
|
+
search_task_response = self.rest_client.validation_search_v2(
|
|
1069
1066
|
trace_id,
|
|
1070
1067
|
parquet_file_path,
|
|
1071
1068
|
initial_search_task_id,
|
|
@@ -1085,8 +1082,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
1085
1082
|
return_scores,
|
|
1086
1083
|
extract_features,
|
|
1087
1084
|
initial_search_task_id=initial_search_task_id,
|
|
1088
|
-
|
|
1089
|
-
api_key=self.api_key,
|
|
1085
|
+
rest_client=self.rest_client,
|
|
1090
1086
|
logger=self.logger,
|
|
1091
1087
|
)
|
|
1092
1088
|
|
upgini/features_enricher.py
CHANGED
|
@@ -233,7 +233,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
233
233
|
self.feature_importances_ = []
|
|
234
234
|
self.search_id = search_id
|
|
235
235
|
if search_id:
|
|
236
|
-
search_task = SearchTask(search_id,
|
|
236
|
+
search_task = SearchTask(search_id, rest_client=self.rest_client, logger=self.logger)
|
|
237
237
|
|
|
238
238
|
print(bundle.get("search_by_task_id_start"))
|
|
239
239
|
trace_id = str(uuid.uuid4())
|
|
@@ -1817,10 +1817,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1817
1817
|
|
|
1818
1818
|
dataset = Dataset(
|
|
1819
1819
|
"sample_" + str(uuid.uuid4()),
|
|
1820
|
-
df=df_without_features,
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
date_format=self.date_format, # type: ignore
|
|
1820
|
+
df=df_without_features,
|
|
1821
|
+
date_format=self.date_format,
|
|
1822
|
+
rest_client=self.rest_client,
|
|
1824
1823
|
logger=self.logger,
|
|
1825
1824
|
)
|
|
1826
1825
|
dataset.meaning_types = meaning_types
|
|
@@ -2150,11 +2149,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2150
2149
|
dataset = Dataset(
|
|
2151
2150
|
"tds_" + str(uuid.uuid4()),
|
|
2152
2151
|
df=df, # type: ignore
|
|
2153
|
-
model_task_type=model_task_type,
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
random_state=self.random_state, # type: ignore
|
|
2152
|
+
model_task_type=model_task_type,
|
|
2153
|
+
date_format=self.date_format,
|
|
2154
|
+
random_state=self.random_state,
|
|
2155
|
+
rest_client=self.rest_client,
|
|
2158
2156
|
logger=self.logger,
|
|
2159
2157
|
)
|
|
2160
2158
|
dataset.meaning_types = meaning_types
|
upgini/http.py
CHANGED
|
@@ -301,13 +301,14 @@ class _RestClient:
|
|
|
301
301
|
USER_AGENT_HEADER_VALUE = "pyupgini/" + __version__
|
|
302
302
|
SEARCH_KEYS_HEADER_NAME = "Search-Keys"
|
|
303
303
|
|
|
304
|
-
def __init__(self, service_endpoint, refresh_token,
|
|
304
|
+
def __init__(self, service_endpoint, refresh_token, client_ip=None, client_visitorid=None):
|
|
305
305
|
# debug_requests_on()
|
|
306
306
|
self._service_endpoint = service_endpoint
|
|
307
307
|
self._refresh_token = refresh_token
|
|
308
|
-
self.silent_mode = silent_mode
|
|
308
|
+
# self.silent_mode = silent_mode
|
|
309
309
|
self.client_ip = client_ip
|
|
310
310
|
self.client_visitorid = client_visitorid
|
|
311
|
+
print(f"Created RestClient with {client_ip} and {client_visitorid}")
|
|
311
312
|
self._access_token = self._refresh_access_token()
|
|
312
313
|
# self._access_token: Optional[str] = None # self._refresh_access_token()
|
|
313
314
|
self.last_refresh_time = time.time()
|
|
@@ -441,6 +442,10 @@ class _RestClient:
|
|
|
441
442
|
) -> SearchTaskResponse:
|
|
442
443
|
api_path = self.INITIAL_SEARCH_URI_FMT_V2
|
|
443
444
|
|
|
445
|
+
print(f"Start initial search with {self.client_ip} and {self.client_visitorid}")
|
|
446
|
+
track_metrics = get_track_metrics(self.client_ip, self.client_visitorid)
|
|
447
|
+
print(f"Sending track metrics: {track_metrics}")
|
|
448
|
+
|
|
444
449
|
def open_and_send():
|
|
445
450
|
md5_hash = hashlib.md5()
|
|
446
451
|
with open(file_path, "rb") as file:
|
|
@@ -463,7 +468,7 @@ class _RestClient:
|
|
|
463
468
|
),
|
|
464
469
|
"tracking": (
|
|
465
470
|
"tracking.json",
|
|
466
|
-
dumps(
|
|
471
|
+
dumps(track_metrics).encode(),
|
|
467
472
|
"application/json",
|
|
468
473
|
),
|
|
469
474
|
"metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
|
|
@@ -994,7 +999,7 @@ class LoggerFactory:
|
|
|
994
999
|
|
|
995
1000
|
upgini_logger = logging.getLogger(f"upgini.{hash(key)}")
|
|
996
1001
|
upgini_logger.handlers.clear()
|
|
997
|
-
rest_client = get_rest_client(backend_url, api_token)
|
|
1002
|
+
rest_client = get_rest_client(backend_url, api_token, client_ip, client_visitorid)
|
|
998
1003
|
datadog_handler = BackendLogHandler(rest_client, client_ip, client_visitorid)
|
|
999
1004
|
json_formatter = jsonlogger.JsonFormatter(
|
|
1000
1005
|
"%(asctime)s %(threadName)s %(name)s %(levelname)s %(message)s",
|
upgini/search_task.py
CHANGED
|
@@ -8,6 +8,7 @@ import pandas as pd
|
|
|
8
8
|
|
|
9
9
|
from upgini import dataset
|
|
10
10
|
from upgini.http import (
|
|
11
|
+
_RestClient,
|
|
11
12
|
ProviderTaskSummary,
|
|
12
13
|
SearchProgress,
|
|
13
14
|
SearchTaskSummary,
|
|
@@ -41,8 +42,7 @@ class SearchTask:
|
|
|
41
42
|
accurate_model: bool = False,
|
|
42
43
|
initial_search_task_id: Optional[str] = None,
|
|
43
44
|
task_type: Optional[ModelTaskType] = None,
|
|
44
|
-
|
|
45
|
-
api_key: Optional[str] = None,
|
|
45
|
+
rest_client: Optional[_RestClient] = None,
|
|
46
46
|
logger: Optional[logging.Logger] = None,
|
|
47
47
|
):
|
|
48
48
|
self.search_task_id = search_task_id
|
|
@@ -53,8 +53,7 @@ class SearchTask:
|
|
|
53
53
|
self.accurate_model = accurate_model
|
|
54
54
|
self.task_type = task_type
|
|
55
55
|
self.summary = None
|
|
56
|
-
self.
|
|
57
|
-
self.api_key = api_key
|
|
56
|
+
self.rest_client = rest_client
|
|
58
57
|
if logger is not None:
|
|
59
58
|
self.logger = logger
|
|
60
59
|
else:
|
|
@@ -64,7 +63,7 @@ class SearchTask:
|
|
|
64
63
|
self.unused_features_for_generation: Optional[List[str]] = None
|
|
65
64
|
|
|
66
65
|
def get_progress(self, trace_id: str) -> SearchProgress:
|
|
67
|
-
return
|
|
66
|
+
return self.rest_client.get_search_progress(trace_id, self.search_task_id)
|
|
68
67
|
|
|
69
68
|
def poll_result(self, trace_id: str, quiet: bool = False, check_fit: bool = False) -> "SearchTask":
|
|
70
69
|
completed_statuses = {"COMPLETED", "VALIDATION_COMPLETED"}
|
|
@@ -72,7 +71,7 @@ class SearchTask:
|
|
|
72
71
|
submitted_statuses = {"SUBMITTED", "VALIDATION_SUBMITTED"}
|
|
73
72
|
if not quiet:
|
|
74
73
|
print(bundle.get("polling_search_task").format(self.search_task_id))
|
|
75
|
-
if is_demo_api_key(self.
|
|
74
|
+
if is_demo_api_key(self.rest_client._refresh_token):
|
|
76
75
|
print(bundle.get("polling_unregister_information"))
|
|
77
76
|
search_task_id = self.initial_search_task_id if self.initial_search_task_id is not None else self.search_task_id
|
|
78
77
|
|
|
@@ -80,14 +79,14 @@ class SearchTask:
|
|
|
80
79
|
with Spinner():
|
|
81
80
|
if self.PROTECT_FROM_RATE_LIMIT:
|
|
82
81
|
time.sleep(1) # this is neccesary to avoid requests rate limit restrictions
|
|
83
|
-
self.summary =
|
|
82
|
+
self.summary = self.rest_client.search_task_summary_v2(
|
|
84
83
|
trace_id, search_task_id
|
|
85
84
|
)
|
|
86
85
|
while self.summary.status not in completed_statuses and (
|
|
87
86
|
not check_fit or "VALIDATION" not in self.summary.status
|
|
88
87
|
):
|
|
89
88
|
time.sleep(self.POLLING_DELAY_SECONDS)
|
|
90
|
-
self.summary =
|
|
89
|
+
self.summary = self.rest_client.search_task_summary_v2(
|
|
91
90
|
trace_id, search_task_id
|
|
92
91
|
)
|
|
93
92
|
if self.summary.status in failed_statuses:
|
|
@@ -103,7 +102,7 @@ class SearchTask:
|
|
|
103
102
|
except KeyboardInterrupt as e:
|
|
104
103
|
if not check_fit:
|
|
105
104
|
print(bundle.get("search_stopping"))
|
|
106
|
-
|
|
105
|
+
self.rest_client.stop_search_task_v2(trace_id, search_task_id)
|
|
107
106
|
self.logger.warning(f"Search {search_task_id} stopped by user")
|
|
108
107
|
print(bundle.get("search_stopped"))
|
|
109
108
|
raise e
|
|
@@ -131,7 +130,7 @@ class SearchTask:
|
|
|
131
130
|
for provider_summary in self.summary.initial_important_providers:
|
|
132
131
|
if provider_summary.status == "COMPLETED":
|
|
133
132
|
self.provider_metadata_v2.append(
|
|
134
|
-
|
|
133
|
+
self.rest_client.get_provider_search_metadata_v3(
|
|
135
134
|
provider_summary.ads_search_task_id, trace_id
|
|
136
135
|
)
|
|
137
136
|
)
|
|
@@ -257,8 +256,8 @@ class SearchTask:
|
|
|
257
256
|
if self.PROTECT_FROM_RATE_LIMIT:
|
|
258
257
|
time.sleep(1) # this is neccesary to avoid requests rate limit restrictions
|
|
259
258
|
return _get_all_initial_raw_features_cached(
|
|
260
|
-
self.
|
|
261
|
-
self.
|
|
259
|
+
self.rest_client._service_endpoint,
|
|
260
|
+
self.rest_client._refresh_token,
|
|
262
261
|
trace_id,
|
|
263
262
|
self.search_task_id,
|
|
264
263
|
metrics_calculation,
|
|
@@ -268,7 +267,11 @@ class SearchTask:
|
|
|
268
267
|
def get_target_outliers(self, trace_id: str) -> Optional[pd.DataFrame]:
|
|
269
268
|
self._check_finished_initial_search()
|
|
270
269
|
return _get_target_outliers_cached(
|
|
271
|
-
self.
|
|
270
|
+
self.rest_client._service_endpoint,
|
|
271
|
+
self.rest_client._refresh_token,
|
|
272
|
+
trace_id,
|
|
273
|
+
self.search_task_id,
|
|
274
|
+
self.PROTECT_FROM_RATE_LIMIT
|
|
272
275
|
)
|
|
273
276
|
|
|
274
277
|
def get_max_initial_eval_set_hit_rate_v2(self) -> Optional[Dict[int, float]]:
|
|
@@ -286,8 +289,8 @@ class SearchTask:
|
|
|
286
289
|
def get_all_validation_raw_features(self, trace_id: str, metrics_calculation=False) -> Optional[pd.DataFrame]:
|
|
287
290
|
self._check_finished_validation_search()
|
|
288
291
|
return _get_all_validation_raw_features_cached(
|
|
289
|
-
self.
|
|
290
|
-
self.
|
|
292
|
+
self.rest_client._service_endpoint,
|
|
293
|
+
self.rest_client._refresh_token,
|
|
291
294
|
trace_id,
|
|
292
295
|
self.search_task_id,
|
|
293
296
|
metrics_calculation,
|
|
@@ -295,7 +298,7 @@ class SearchTask:
|
|
|
295
298
|
)
|
|
296
299
|
|
|
297
300
|
def get_file_metadata(self, trace_id: str) -> FileMetadata:
|
|
298
|
-
return
|
|
301
|
+
return self.rest_client.get_search_file_metadata(self.search_task_id, trace_id)
|
|
299
302
|
|
|
300
303
|
|
|
301
304
|
@lru_cache()
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
2
|
upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
3
|
+
upgini/dataset.py,sha256=y9rpNhdLU9QgfFZndrPGK-S6CL67q5ocmB9HMzwHtaA,49395
|
|
4
4
|
upgini/errors.py,sha256=BqpvfhW2jJW5fa5KXj0alhXatGl-WK4xTl309-QNLp8,959
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
5
|
+
upgini/features_enricher.py,sha256=n2L9MWq4WoUQIzoDDECFyiuprwZslFPPhbLfpXsT3sQ,162975
|
|
6
6
|
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
7
|
-
upgini/http.py,sha256=
|
|
7
|
+
upgini/http.py,sha256=xeSatYNnSBMQfGMXsER_ZvhR5zfDTY8_E1g3YpIOb38,42477
|
|
8
8
|
upgini/metadata.py,sha256=FZ5CQluLLWrfrBVThSIes1SW6wcs7n50aNZwzYnHiF0,9584
|
|
9
9
|
upgini/metrics.py,sha256=rteVPPjDFYlL5bBFVpu-YwwXQGNV1IzwT7V7L9JtjaE,23762
|
|
10
|
-
upgini/search_task.py,sha256=
|
|
10
|
+
upgini/search_task.py,sha256=nTVrb3CE4M1zfDkI-W_qVdUhsc90b98w3lo0XxegeKo,17200
|
|
11
11
|
upgini/spinner.py,sha256=yhakBaydMNS8E8TRAwTdCMdnWrHeWT0cR1M8c9hP6jA,1157
|
|
12
12
|
upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
|
|
13
13
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
@@ -53,8 +53,8 @@ upgini/utils/sklearn_ext.py,sha256=IMx2La70AXAggApVpT7sMEjWqVWon5AMZt4MARDsIMQ,4
|
|
|
53
53
|
upgini/utils/target_utils.py,sha256=_VjYUm4ECXbgNvxNupr982fuOK_jtkg-8Xw7-zJBz2w,1708
|
|
54
54
|
upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
|
|
55
55
|
upgini/utils/warning_counter.py,sha256=vnmdFo5-7GBkU2bK9h_uC0K0Y_wtfcYstxOdeRfacO0,228
|
|
56
|
-
upgini-1.1.
|
|
57
|
-
upgini-1.1.
|
|
58
|
-
upgini-1.1.
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
56
|
+
upgini-1.1.242.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
57
|
+
upgini-1.1.242.dist-info/METADATA,sha256=FwVINjwPmABqlcahJ70lv1hjpyDTH7bt3CGKGZmBHE0,48262
|
|
58
|
+
upgini-1.1.242.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
59
|
+
upgini-1.1.242.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
60
|
+
upgini-1.1.242.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|