upgini 1.1.242a2__py3-none-any.whl → 1.1.243__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/dataset.py CHANGED
@@ -20,7 +20,7 @@ from pandas.api.types import (
20
20
  from pandas.core.dtypes.common import is_period_dtype
21
21
 
22
22
  from upgini.errors import ValidationError
23
- from upgini.http import ProgressStage, SearchProgress, get_rest_client
23
+ from upgini.http import ProgressStage, SearchProgress, _RestClient
24
24
  from upgini.metadata import (
25
25
  EVAL_SET_INDEX,
26
26
  SYSTEM_COLUMNS,
@@ -78,8 +78,7 @@ class Dataset: # (pd.DataFrame):
78
78
  search_keys: Optional[List[Tuple[str, ...]]] = None,
79
79
  model_task_type: Optional[ModelTaskType] = None,
80
80
  random_state: Optional[int] = None,
81
- endpoint: Optional[str] = None,
82
- api_key: Optional[str] = None,
81
+ rest_client: Optional[_RestClient] = None,
83
82
  logger: Optional[logging.Logger] = None,
84
83
  warning_counter: Optional[WarningCounter] = None,
85
84
  **kwargs,
@@ -114,8 +113,7 @@ class Dataset: # (pd.DataFrame):
114
113
  self.hierarchical_subgroup_keys = []
115
114
  self.file_upload_id: Optional[str] = None
116
115
  self.etalon_def: Optional[Dict[str, str]] = None
117
- self.endpoint = endpoint
118
- self.api_key = api_key
116
+ self.rest_client = rest_client
119
117
  self.random_state = random_state
120
118
  self.columns_renaming: Dict[str, str] = {}
121
119
  self.imbalanced: bool = False
@@ -983,10 +981,10 @@ class Dataset: # (pd.DataFrame):
983
981
  runtime_parameters=runtime_parameters,
984
982
  )
985
983
 
986
- if self.file_upload_id is not None and get_rest_client(self.endpoint, self.api_key).check_uploaded_file_v2(
984
+ if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(
987
985
  trace_id, self.file_upload_id, file_metadata
988
986
  ):
989
- search_task_response = get_rest_client(self.endpoint, self.api_key).initial_search_without_upload_v2(
987
+ search_task_response = self.rest_client.initial_search_without_upload_v2(
990
988
  trace_id, self.file_upload_id, file_metadata, file_metrics, search_customization
991
989
  )
992
990
  else:
@@ -999,7 +997,7 @@ class Dataset: # (pd.DataFrame):
999
997
  progress_bar.progress = search_progress.to_progress_bar()
1000
998
  if progress_callback is not None:
1001
999
  progress_callback(search_progress)
1002
- search_task_response = get_rest_client(self.endpoint, self.api_key).initial_search_v2(
1000
+ search_task_response = self.rest_client.initial_search_v2(
1003
1001
  trace_id, parquet_file_path, file_metadata, file_metrics, search_customization
1004
1002
  )
1005
1003
  # if progress_bar is not None:
@@ -1015,8 +1013,7 @@ class Dataset: # (pd.DataFrame):
1015
1013
  extract_features,
1016
1014
  accurate_model,
1017
1015
  task_type=self.task_type,
1018
- endpoint=self.endpoint,
1019
- api_key=self.api_key,
1016
+ rest_client=self.rest_client,
1020
1017
  logger=self.logger,
1021
1018
  )
1022
1019
 
@@ -1053,10 +1050,10 @@ class Dataset: # (pd.DataFrame):
1053
1050
  progress_bar.progress = search_progress.to_progress_bar()
1054
1051
  if progress_callback is not None:
1055
1052
  progress_callback(search_progress)
1056
- if self.file_upload_id is not None and get_rest_client(self.endpoint, self.api_key).check_uploaded_file_v2(
1053
+ if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(
1057
1054
  trace_id, self.file_upload_id, file_metadata
1058
1055
  ):
1059
- search_task_response = get_rest_client(self.endpoint, self.api_key).validation_search_without_upload_v2(
1056
+ search_task_response = self.rest_client.validation_search_without_upload_v2(
1060
1057
  trace_id, self.file_upload_id, initial_search_task_id, file_metadata, file_metrics, search_customization
1061
1058
  )
1062
1059
  else:
@@ -1065,7 +1062,7 @@ class Dataset: # (pd.DataFrame):
1065
1062
  # To avoid rate limit
1066
1063
  time.sleep(1)
1067
1064
 
1068
- search_task_response = get_rest_client(self.endpoint, self.api_key).validation_search_v2(
1065
+ search_task_response = self.rest_client.validation_search_v2(
1069
1066
  trace_id,
1070
1067
  parquet_file_path,
1071
1068
  initial_search_task_id,
@@ -1085,8 +1082,7 @@ class Dataset: # (pd.DataFrame):
1085
1082
  return_scores,
1086
1083
  extract_features,
1087
1084
  initial_search_task_id=initial_search_task_id,
1088
- endpoint=self.endpoint,
1089
- api_key=self.api_key,
1085
+ rest_client=self.rest_client,
1090
1086
  logger=self.logger,
1091
1087
  )
1092
1088
 
@@ -233,7 +233,7 @@ class FeaturesEnricher(TransformerMixin):
233
233
  self.feature_importances_ = []
234
234
  self.search_id = search_id
235
235
  if search_id:
236
- search_task = SearchTask(search_id, endpoint=self.endpoint, api_key=self._api_key, logger=self.logger)
236
+ search_task = SearchTask(search_id, rest_client=self.rest_client, logger=self.logger)
237
237
 
238
238
  print(bundle.get("search_by_task_id_start"))
239
239
  trace_id = str(uuid.uuid4())
@@ -1817,10 +1817,9 @@ class FeaturesEnricher(TransformerMixin):
1817
1817
 
1818
1818
  dataset = Dataset(
1819
1819
  "sample_" + str(uuid.uuid4()),
1820
- df=df_without_features, # type: ignore
1821
- endpoint=self.endpoint, # type: ignore
1822
- api_key=self.api_key, # type: ignore
1823
- date_format=self.date_format, # type: ignore
1820
+ df=df_without_features,
1821
+ date_format=self.date_format,
1822
+ rest_client=self.rest_client,
1824
1823
  logger=self.logger,
1825
1824
  )
1826
1825
  dataset.meaning_types = meaning_types
@@ -2150,11 +2149,10 @@ class FeaturesEnricher(TransformerMixin):
2150
2149
  dataset = Dataset(
2151
2150
  "tds_" + str(uuid.uuid4()),
2152
2151
  df=df, # type: ignore
2153
- model_task_type=model_task_type, # type: ignore
2154
- endpoint=self.endpoint, # type: ignore
2155
- api_key=self.api_key, # type: ignore
2156
- date_format=self.date_format, # type: ignore
2157
- random_state=self.random_state, # type: ignore
2152
+ model_task_type=model_task_type,
2153
+ date_format=self.date_format,
2154
+ random_state=self.random_state,
2155
+ rest_client=self.rest_client,
2158
2156
  logger=self.logger,
2159
2157
  )
2160
2158
  dataset.meaning_types = meaning_types
upgini/http.py CHANGED
@@ -308,7 +308,6 @@ class _RestClient:
308
308
  # self.silent_mode = silent_mode
309
309
  self.client_ip = client_ip
310
310
  self.client_visitorid = client_visitorid
311
- print(f"Created RestClient with {client_ip} and {client_visitorid}")
312
311
  self._access_token = self._refresh_access_token()
313
312
  # self._access_token: Optional[str] = None # self._refresh_access_token()
314
313
  self.last_refresh_time = time.time()
@@ -442,9 +441,7 @@ class _RestClient:
442
441
  ) -> SearchTaskResponse:
443
442
  api_path = self.INITIAL_SEARCH_URI_FMT_V2
444
443
 
445
- print(f"Start initial search with {self.client_ip} and {self.client_visitorid}")
446
444
  track_metrics = get_track_metrics(self.client_ip, self.client_visitorid)
447
- print(f"Sending track metrics: {track_metrics}")
448
445
 
449
446
  def open_and_send():
450
447
  md5_hash = hashlib.md5()
@@ -486,7 +483,7 @@ class _RestClient:
486
483
  api_path, files, trace_id=trace_id, additional_headers=additional_headers
487
484
  )
488
485
 
489
- response = self._with_unauth_retry(lambda: open_and_send())
486
+ response = self._with_unauth_retry(open_and_send)
490
487
  return SearchTaskResponse(response)
491
488
 
492
489
  def check_uploaded_file_v2(self, trace_id: str, file_upload_id: str, metadata: FileMetadata) -> bool:
@@ -571,7 +568,7 @@ class _RestClient:
571
568
  api_path, files, trace_id=trace_id, additional_headers=additional_headers
572
569
  )
573
570
 
574
- response = self._with_unauth_retry(lambda: open_and_send())
571
+ response = self._with_unauth_retry(open_and_send)
575
572
  return SearchTaskResponse(response)
576
573
 
577
574
  def validation_search_without_upload_v2(
upgini/search_task.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
8
 
9
9
  from upgini import dataset
10
10
  from upgini.http import (
11
+ _RestClient,
11
12
  ProviderTaskSummary,
12
13
  SearchProgress,
13
14
  SearchTaskSummary,
@@ -41,8 +42,7 @@ class SearchTask:
41
42
  accurate_model: bool = False,
42
43
  initial_search_task_id: Optional[str] = None,
43
44
  task_type: Optional[ModelTaskType] = None,
44
- endpoint: Optional[str] = None,
45
- api_key: Optional[str] = None,
45
+ rest_client: Optional[_RestClient] = None,
46
46
  logger: Optional[logging.Logger] = None,
47
47
  ):
48
48
  self.search_task_id = search_task_id
@@ -53,8 +53,7 @@ class SearchTask:
53
53
  self.accurate_model = accurate_model
54
54
  self.task_type = task_type
55
55
  self.summary = None
56
- self.endpoint = endpoint
57
- self.api_key = api_key
56
+ self.rest_client = rest_client
58
57
  if logger is not None:
59
58
  self.logger = logger
60
59
  else:
@@ -64,7 +63,7 @@ class SearchTask:
64
63
  self.unused_features_for_generation: Optional[List[str]] = None
65
64
 
66
65
  def get_progress(self, trace_id: str) -> SearchProgress:
67
- return get_rest_client(self.endpoint, self.api_key).get_search_progress(trace_id, self.search_task_id)
66
+ return self.rest_client.get_search_progress(trace_id, self.search_task_id)
68
67
 
69
68
  def poll_result(self, trace_id: str, quiet: bool = False, check_fit: bool = False) -> "SearchTask":
70
69
  completed_statuses = {"COMPLETED", "VALIDATION_COMPLETED"}
@@ -72,7 +71,7 @@ class SearchTask:
72
71
  submitted_statuses = {"SUBMITTED", "VALIDATION_SUBMITTED"}
73
72
  if not quiet:
74
73
  print(bundle.get("polling_search_task").format(self.search_task_id))
75
- if is_demo_api_key(self.api_key):
74
+ if is_demo_api_key(self.rest_client._refresh_token):
76
75
  print(bundle.get("polling_unregister_information"))
77
76
  search_task_id = self.initial_search_task_id if self.initial_search_task_id is not None else self.search_task_id
78
77
 
@@ -80,14 +79,14 @@ class SearchTask:
80
79
  with Spinner():
81
80
  if self.PROTECT_FROM_RATE_LIMIT:
82
81
  time.sleep(1) # this is neccesary to avoid requests rate limit restrictions
83
- self.summary = get_rest_client(self.endpoint, self.api_key).search_task_summary_v2(
82
+ self.summary = self.rest_client.search_task_summary_v2(
84
83
  trace_id, search_task_id
85
84
  )
86
85
  while self.summary.status not in completed_statuses and (
87
86
  not check_fit or "VALIDATION" not in self.summary.status
88
87
  ):
89
88
  time.sleep(self.POLLING_DELAY_SECONDS)
90
- self.summary = get_rest_client(self.endpoint, self.api_key).search_task_summary_v2(
89
+ self.summary = self.rest_client.search_task_summary_v2(
91
90
  trace_id, search_task_id
92
91
  )
93
92
  if self.summary.status in failed_statuses:
@@ -103,7 +102,7 @@ class SearchTask:
103
102
  except KeyboardInterrupt as e:
104
103
  if not check_fit:
105
104
  print(bundle.get("search_stopping"))
106
- get_rest_client(self.endpoint, self.api_key).stop_search_task_v2(trace_id, search_task_id)
105
+ self.rest_client.stop_search_task_v2(trace_id, search_task_id)
107
106
  self.logger.warning(f"Search {search_task_id} stopped by user")
108
107
  print(bundle.get("search_stopped"))
109
108
  raise e
@@ -131,7 +130,7 @@ class SearchTask:
131
130
  for provider_summary in self.summary.initial_important_providers:
132
131
  if provider_summary.status == "COMPLETED":
133
132
  self.provider_metadata_v2.append(
134
- get_rest_client(self.endpoint, self.api_key).get_provider_search_metadata_v3(
133
+ self.rest_client.get_provider_search_metadata_v3(
135
134
  provider_summary.ads_search_task_id, trace_id
136
135
  )
137
136
  )
@@ -257,8 +256,8 @@ class SearchTask:
257
256
  if self.PROTECT_FROM_RATE_LIMIT:
258
257
  time.sleep(1) # this is neccesary to avoid requests rate limit restrictions
259
258
  return _get_all_initial_raw_features_cached(
260
- self.endpoint,
261
- self.api_key,
259
+ self.rest_client._service_endpoint,
260
+ self.rest_client._refresh_token,
262
261
  trace_id,
263
262
  self.search_task_id,
264
263
  metrics_calculation,
@@ -268,7 +267,11 @@ class SearchTask:
268
267
  def get_target_outliers(self, trace_id: str) -> Optional[pd.DataFrame]:
269
268
  self._check_finished_initial_search()
270
269
  return _get_target_outliers_cached(
271
- self.endpoint, self.api_key, trace_id, self.search_task_id, self.PROTECT_FROM_RATE_LIMIT
270
+ self.rest_client._service_endpoint,
271
+ self.rest_client._refresh_token,
272
+ trace_id,
273
+ self.search_task_id,
274
+ self.PROTECT_FROM_RATE_LIMIT
272
275
  )
273
276
 
274
277
  def get_max_initial_eval_set_hit_rate_v2(self) -> Optional[Dict[int, float]]:
@@ -286,8 +289,8 @@ class SearchTask:
286
289
  def get_all_validation_raw_features(self, trace_id: str, metrics_calculation=False) -> Optional[pd.DataFrame]:
287
290
  self._check_finished_validation_search()
288
291
  return _get_all_validation_raw_features_cached(
289
- self.endpoint,
290
- self.api_key,
292
+ self.rest_client._service_endpoint,
293
+ self.rest_client._refresh_token,
291
294
  trace_id,
292
295
  self.search_task_id,
293
296
  metrics_calculation,
@@ -295,7 +298,7 @@ class SearchTask:
295
298
  )
296
299
 
297
300
  def get_file_metadata(self, trace_id: str) -> FileMetadata:
298
- return get_rest_client(self.endpoint, self.api_key).get_search_file_metadata(self.search_task_id, trace_id)
301
+ return self.rest_client.get_search_file_metadata(self.search_task_id, trace_id)
299
302
 
300
303
 
301
304
  @lru_cache()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.242a2
3
+ Version: 1.1.243
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -1,13 +1,13 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
- upgini/dataset.py,sha256=qSjv09LKzCYayucb_JlhExw9uSRcscLWTaD8hqATE3s,49676
3
+ upgini/dataset.py,sha256=y9rpNhdLU9QgfFZndrPGK-S6CL67q5ocmB9HMzwHtaA,49395
4
4
  upgini/errors.py,sha256=BqpvfhW2jJW5fa5KXj0alhXatGl-WK4xTl309-QNLp8,959
5
- upgini/features_enricher.py,sha256=kAZqLVG7hxIROp0l_b30n7QG4EUhqt4zbx7wHPHCNug,163196
5
+ upgini/features_enricher.py,sha256=n2L9MWq4WoUQIzoDDECFyiuprwZslFPPhbLfpXsT3sQ,162975
6
6
  upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
7
- upgini/http.py,sha256=xeSatYNnSBMQfGMXsER_ZvhR5zfDTY8_E1g3YpIOb38,42477
7
+ upgini/http.py,sha256=jsAYvipo6jeN3qsZeSuFkIe4BigGiOM813G5PXWVvb0,42234
8
8
  upgini/metadata.py,sha256=FZ5CQluLLWrfrBVThSIes1SW6wcs7n50aNZwzYnHiF0,9584
9
9
  upgini/metrics.py,sha256=rteVPPjDFYlL5bBFVpu-YwwXQGNV1IzwT7V7L9JtjaE,23762
10
- upgini/search_task.py,sha256=C6nqi6eG8pHqrTByC_iNWF1ZCJXLfP0hXgj56V7oEd4,17217
10
+ upgini/search_task.py,sha256=nTVrb3CE4M1zfDkI-W_qVdUhsc90b98w3lo0XxegeKo,17200
11
11
  upgini/spinner.py,sha256=yhakBaydMNS8E8TRAwTdCMdnWrHeWT0cR1M8c9hP6jA,1157
12
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
13
13
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
@@ -53,8 +53,8 @@ upgini/utils/sklearn_ext.py,sha256=IMx2La70AXAggApVpT7sMEjWqVWon5AMZt4MARDsIMQ,4
53
53
  upgini/utils/target_utils.py,sha256=_VjYUm4ECXbgNvxNupr982fuOK_jtkg-8Xw7-zJBz2w,1708
54
54
  upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
55
55
  upgini/utils/warning_counter.py,sha256=vnmdFo5-7GBkU2bK9h_uC0K0Y_wtfcYstxOdeRfacO0,228
56
- upgini-1.1.242a2.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
57
- upgini-1.1.242a2.dist-info/METADATA,sha256=IfEZQIEGTjE4Fze8z-b_kw1RnWCGE1XABgsea-Fiz28,48264
58
- upgini-1.1.242a2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
59
- upgini-1.1.242a2.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
60
- upgini-1.1.242a2.dist-info/RECORD,,
56
+ upgini-1.1.243.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
57
+ upgini-1.1.243.dist-info/METADATA,sha256=-4iFDXdoXZA9bcQArbodiC9TcvdKDT9Qc6GFZi7dNYc,48262
58
+ upgini-1.1.243.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
59
+ upgini-1.1.243.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
60
+ upgini-1.1.243.dist-info/RECORD,,