upgini 1.1.242a2__tar.gz → 1.1.242a3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.242a2/src/upgini.egg-info → upgini-1.1.242a3}/PKG-INFO +1 -1
- {upgini-1.1.242a2 → upgini-1.1.242a3}/setup.py +1 -1
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/dataset.py +11 -15
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/features_enricher.py +8 -10
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/search_task.py +19 -16
- {upgini-1.1.242a2 → upgini-1.1.242a3/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_etalon_validation.py +4 -4
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_features_enricher.py +3 -3
- {upgini-1.1.242a2 → upgini-1.1.242a3}/LICENSE +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/README.md +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/pyproject.toml +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/setup.cfg +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/ads.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/errors.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/http.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/metadata.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/metrics.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/spinner.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini.egg-info/SOURCES.txt +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_country_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_email_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_metrics.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.242a2 → upgini-1.1.242a3}/tests/test_widget.py +0 -0
|
@@ -20,7 +20,7 @@ from pandas.api.types import (
|
|
|
20
20
|
from pandas.core.dtypes.common import is_period_dtype
|
|
21
21
|
|
|
22
22
|
from upgini.errors import ValidationError
|
|
23
|
-
from upgini.http import ProgressStage, SearchProgress,
|
|
23
|
+
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
24
24
|
from upgini.metadata import (
|
|
25
25
|
EVAL_SET_INDEX,
|
|
26
26
|
SYSTEM_COLUMNS,
|
|
@@ -78,8 +78,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
78
78
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
79
79
|
model_task_type: Optional[ModelTaskType] = None,
|
|
80
80
|
random_state: Optional[int] = None,
|
|
81
|
-
|
|
82
|
-
api_key: Optional[str] = None,
|
|
81
|
+
rest_client: Optional[_RestClient] = None,
|
|
83
82
|
logger: Optional[logging.Logger] = None,
|
|
84
83
|
warning_counter: Optional[WarningCounter] = None,
|
|
85
84
|
**kwargs,
|
|
@@ -114,8 +113,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
114
113
|
self.hierarchical_subgroup_keys = []
|
|
115
114
|
self.file_upload_id: Optional[str] = None
|
|
116
115
|
self.etalon_def: Optional[Dict[str, str]] = None
|
|
117
|
-
self.
|
|
118
|
-
self.api_key = api_key
|
|
116
|
+
self.rest_client = rest_client
|
|
119
117
|
self.random_state = random_state
|
|
120
118
|
self.columns_renaming: Dict[str, str] = {}
|
|
121
119
|
self.imbalanced: bool = False
|
|
@@ -983,10 +981,10 @@ class Dataset: # (pd.DataFrame):
|
|
|
983
981
|
runtime_parameters=runtime_parameters,
|
|
984
982
|
)
|
|
985
983
|
|
|
986
|
-
if self.file_upload_id is not None and
|
|
984
|
+
if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(
|
|
987
985
|
trace_id, self.file_upload_id, file_metadata
|
|
988
986
|
):
|
|
989
|
-
search_task_response =
|
|
987
|
+
search_task_response = self.rest_client.initial_search_without_upload_v2(
|
|
990
988
|
trace_id, self.file_upload_id, file_metadata, file_metrics, search_customization
|
|
991
989
|
)
|
|
992
990
|
else:
|
|
@@ -999,7 +997,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
999
997
|
progress_bar.progress = search_progress.to_progress_bar()
|
|
1000
998
|
if progress_callback is not None:
|
|
1001
999
|
progress_callback(search_progress)
|
|
1002
|
-
search_task_response =
|
|
1000
|
+
search_task_response = self.rest_client.initial_search_v2(
|
|
1003
1001
|
trace_id, parquet_file_path, file_metadata, file_metrics, search_customization
|
|
1004
1002
|
)
|
|
1005
1003
|
# if progress_bar is not None:
|
|
@@ -1015,8 +1013,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
1015
1013
|
extract_features,
|
|
1016
1014
|
accurate_model,
|
|
1017
1015
|
task_type=self.task_type,
|
|
1018
|
-
|
|
1019
|
-
api_key=self.api_key,
|
|
1016
|
+
rest_client=self.rest_client,
|
|
1020
1017
|
logger=self.logger,
|
|
1021
1018
|
)
|
|
1022
1019
|
|
|
@@ -1053,10 +1050,10 @@ class Dataset: # (pd.DataFrame):
|
|
|
1053
1050
|
progress_bar.progress = search_progress.to_progress_bar()
|
|
1054
1051
|
if progress_callback is not None:
|
|
1055
1052
|
progress_callback(search_progress)
|
|
1056
|
-
if self.file_upload_id is not None and
|
|
1053
|
+
if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(
|
|
1057
1054
|
trace_id, self.file_upload_id, file_metadata
|
|
1058
1055
|
):
|
|
1059
|
-
search_task_response =
|
|
1056
|
+
search_task_response = self.rest_client.validation_search_without_upload_v2(
|
|
1060
1057
|
trace_id, self.file_upload_id, initial_search_task_id, file_metadata, file_metrics, search_customization
|
|
1061
1058
|
)
|
|
1062
1059
|
else:
|
|
@@ -1065,7 +1062,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
1065
1062
|
# To avoid rate limit
|
|
1066
1063
|
time.sleep(1)
|
|
1067
1064
|
|
|
1068
|
-
search_task_response =
|
|
1065
|
+
search_task_response = self.rest_client.validation_search_v2(
|
|
1069
1066
|
trace_id,
|
|
1070
1067
|
parquet_file_path,
|
|
1071
1068
|
initial_search_task_id,
|
|
@@ -1085,8 +1082,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
1085
1082
|
return_scores,
|
|
1086
1083
|
extract_features,
|
|
1087
1084
|
initial_search_task_id=initial_search_task_id,
|
|
1088
|
-
|
|
1089
|
-
api_key=self.api_key,
|
|
1085
|
+
rest_client=self.rest_client,
|
|
1090
1086
|
logger=self.logger,
|
|
1091
1087
|
)
|
|
1092
1088
|
|
|
@@ -233,7 +233,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
233
233
|
self.feature_importances_ = []
|
|
234
234
|
self.search_id = search_id
|
|
235
235
|
if search_id:
|
|
236
|
-
search_task = SearchTask(search_id,
|
|
236
|
+
search_task = SearchTask(search_id, rest_client=self.rest_client, logger=self.logger)
|
|
237
237
|
|
|
238
238
|
print(bundle.get("search_by_task_id_start"))
|
|
239
239
|
trace_id = str(uuid.uuid4())
|
|
@@ -1817,10 +1817,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1817
1817
|
|
|
1818
1818
|
dataset = Dataset(
|
|
1819
1819
|
"sample_" + str(uuid.uuid4()),
|
|
1820
|
-
df=df_without_features,
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
date_format=self.date_format, # type: ignore
|
|
1820
|
+
df=df_without_features,
|
|
1821
|
+
date_format=self.date_format,
|
|
1822
|
+
rest_client=self.rest_client,
|
|
1824
1823
|
logger=self.logger,
|
|
1825
1824
|
)
|
|
1826
1825
|
dataset.meaning_types = meaning_types
|
|
@@ -2150,11 +2149,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2150
2149
|
dataset = Dataset(
|
|
2151
2150
|
"tds_" + str(uuid.uuid4()),
|
|
2152
2151
|
df=df, # type: ignore
|
|
2153
|
-
model_task_type=model_task_type,
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
random_state=self.random_state, # type: ignore
|
|
2152
|
+
model_task_type=model_task_type,
|
|
2153
|
+
date_format=self.date_format,
|
|
2154
|
+
random_state=self.random_state,
|
|
2155
|
+
rest_client=self.rest_client,
|
|
2158
2156
|
logger=self.logger,
|
|
2159
2157
|
)
|
|
2160
2158
|
dataset.meaning_types = meaning_types
|
|
@@ -8,6 +8,7 @@ import pandas as pd
|
|
|
8
8
|
|
|
9
9
|
from upgini import dataset
|
|
10
10
|
from upgini.http import (
|
|
11
|
+
_RestClient,
|
|
11
12
|
ProviderTaskSummary,
|
|
12
13
|
SearchProgress,
|
|
13
14
|
SearchTaskSummary,
|
|
@@ -41,8 +42,7 @@ class SearchTask:
|
|
|
41
42
|
accurate_model: bool = False,
|
|
42
43
|
initial_search_task_id: Optional[str] = None,
|
|
43
44
|
task_type: Optional[ModelTaskType] = None,
|
|
44
|
-
|
|
45
|
-
api_key: Optional[str] = None,
|
|
45
|
+
rest_client: Optional[_RestClient] = None,
|
|
46
46
|
logger: Optional[logging.Logger] = None,
|
|
47
47
|
):
|
|
48
48
|
self.search_task_id = search_task_id
|
|
@@ -53,8 +53,7 @@ class SearchTask:
|
|
|
53
53
|
self.accurate_model = accurate_model
|
|
54
54
|
self.task_type = task_type
|
|
55
55
|
self.summary = None
|
|
56
|
-
self.
|
|
57
|
-
self.api_key = api_key
|
|
56
|
+
self.rest_client = rest_client
|
|
58
57
|
if logger is not None:
|
|
59
58
|
self.logger = logger
|
|
60
59
|
else:
|
|
@@ -64,7 +63,7 @@ class SearchTask:
|
|
|
64
63
|
self.unused_features_for_generation: Optional[List[str]] = None
|
|
65
64
|
|
|
66
65
|
def get_progress(self, trace_id: str) -> SearchProgress:
|
|
67
|
-
return
|
|
66
|
+
return self.rest_client.get_search_progress(trace_id, self.search_task_id)
|
|
68
67
|
|
|
69
68
|
def poll_result(self, trace_id: str, quiet: bool = False, check_fit: bool = False) -> "SearchTask":
|
|
70
69
|
completed_statuses = {"COMPLETED", "VALIDATION_COMPLETED"}
|
|
@@ -72,7 +71,7 @@ class SearchTask:
|
|
|
72
71
|
submitted_statuses = {"SUBMITTED", "VALIDATION_SUBMITTED"}
|
|
73
72
|
if not quiet:
|
|
74
73
|
print(bundle.get("polling_search_task").format(self.search_task_id))
|
|
75
|
-
if is_demo_api_key(self.
|
|
74
|
+
if is_demo_api_key(self.rest_client._refresh_token):
|
|
76
75
|
print(bundle.get("polling_unregister_information"))
|
|
77
76
|
search_task_id = self.initial_search_task_id if self.initial_search_task_id is not None else self.search_task_id
|
|
78
77
|
|
|
@@ -80,14 +79,14 @@ class SearchTask:
|
|
|
80
79
|
with Spinner():
|
|
81
80
|
if self.PROTECT_FROM_RATE_LIMIT:
|
|
82
81
|
time.sleep(1) # this is neccesary to avoid requests rate limit restrictions
|
|
83
|
-
self.summary =
|
|
82
|
+
self.summary = self.rest_client.search_task_summary_v2(
|
|
84
83
|
trace_id, search_task_id
|
|
85
84
|
)
|
|
86
85
|
while self.summary.status not in completed_statuses and (
|
|
87
86
|
not check_fit or "VALIDATION" not in self.summary.status
|
|
88
87
|
):
|
|
89
88
|
time.sleep(self.POLLING_DELAY_SECONDS)
|
|
90
|
-
self.summary =
|
|
89
|
+
self.summary = self.rest_client.search_task_summary_v2(
|
|
91
90
|
trace_id, search_task_id
|
|
92
91
|
)
|
|
93
92
|
if self.summary.status in failed_statuses:
|
|
@@ -103,7 +102,7 @@ class SearchTask:
|
|
|
103
102
|
except KeyboardInterrupt as e:
|
|
104
103
|
if not check_fit:
|
|
105
104
|
print(bundle.get("search_stopping"))
|
|
106
|
-
|
|
105
|
+
self.rest_client.stop_search_task_v2(trace_id, search_task_id)
|
|
107
106
|
self.logger.warning(f"Search {search_task_id} stopped by user")
|
|
108
107
|
print(bundle.get("search_stopped"))
|
|
109
108
|
raise e
|
|
@@ -131,7 +130,7 @@ class SearchTask:
|
|
|
131
130
|
for provider_summary in self.summary.initial_important_providers:
|
|
132
131
|
if provider_summary.status == "COMPLETED":
|
|
133
132
|
self.provider_metadata_v2.append(
|
|
134
|
-
|
|
133
|
+
self.rest_client.get_provider_search_metadata_v3(
|
|
135
134
|
provider_summary.ads_search_task_id, trace_id
|
|
136
135
|
)
|
|
137
136
|
)
|
|
@@ -257,8 +256,8 @@ class SearchTask:
|
|
|
257
256
|
if self.PROTECT_FROM_RATE_LIMIT:
|
|
258
257
|
time.sleep(1) # this is neccesary to avoid requests rate limit restrictions
|
|
259
258
|
return _get_all_initial_raw_features_cached(
|
|
260
|
-
self.
|
|
261
|
-
self.
|
|
259
|
+
self.rest_client._service_endpoint,
|
|
260
|
+
self.rest_client._refresh_token,
|
|
262
261
|
trace_id,
|
|
263
262
|
self.search_task_id,
|
|
264
263
|
metrics_calculation,
|
|
@@ -268,7 +267,11 @@ class SearchTask:
|
|
|
268
267
|
def get_target_outliers(self, trace_id: str) -> Optional[pd.DataFrame]:
|
|
269
268
|
self._check_finished_initial_search()
|
|
270
269
|
return _get_target_outliers_cached(
|
|
271
|
-
self.
|
|
270
|
+
self.rest_client._service_endpoint,
|
|
271
|
+
self.rest_client._refresh_token,
|
|
272
|
+
trace_id,
|
|
273
|
+
self.search_task_id,
|
|
274
|
+
self.PROTECT_FROM_RATE_LIMIT
|
|
272
275
|
)
|
|
273
276
|
|
|
274
277
|
def get_max_initial_eval_set_hit_rate_v2(self) -> Optional[Dict[int, float]]:
|
|
@@ -286,8 +289,8 @@ class SearchTask:
|
|
|
286
289
|
def get_all_validation_raw_features(self, trace_id: str, metrics_calculation=False) -> Optional[pd.DataFrame]:
|
|
287
290
|
self._check_finished_validation_search()
|
|
288
291
|
return _get_all_validation_raw_features_cached(
|
|
289
|
-
self.
|
|
290
|
-
self.
|
|
292
|
+
self.rest_client._service_endpoint,
|
|
293
|
+
self.rest_client._refresh_token,
|
|
291
294
|
trace_id,
|
|
292
295
|
self.search_task_id,
|
|
293
296
|
metrics_calculation,
|
|
@@ -295,7 +298,7 @@ class SearchTask:
|
|
|
295
298
|
)
|
|
296
299
|
|
|
297
300
|
def get_file_metadata(self, trace_id: str) -> FileMetadata:
|
|
298
|
-
return
|
|
301
|
+
return self.rest_client.get_search_file_metadata(self.search_task_id, trace_id)
|
|
299
302
|
|
|
300
303
|
|
|
301
304
|
@lru_cache()
|
|
@@ -616,7 +616,7 @@ def test_columns_renaming():
|
|
|
616
616
|
df = pd.concat([df1, df2], axis=1)
|
|
617
617
|
|
|
618
618
|
dataset = Dataset(
|
|
619
|
-
"tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)]
|
|
619
|
+
"tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)]
|
|
620
620
|
)
|
|
621
621
|
dataset._Dataset__rename_columns()
|
|
622
622
|
print(dataset)
|
|
@@ -633,7 +633,7 @@ def test_too_long_columns():
|
|
|
633
633
|
)
|
|
634
634
|
|
|
635
635
|
dataset = Dataset(
|
|
636
|
-
"tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)]
|
|
636
|
+
"tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)]
|
|
637
637
|
)
|
|
638
638
|
dataset._Dataset__rename_columns()
|
|
639
639
|
print(dataset)
|
|
@@ -665,7 +665,7 @@ def test_downsampling_binary():
|
|
|
665
665
|
"target": FileColumnMeaningType.TARGET,
|
|
666
666
|
"eval_set_index": FileColumnMeaningType.EVAL_SET_INDEX,
|
|
667
667
|
}
|
|
668
|
-
dataset = Dataset("tds", df=df, meaning_types=meaning_types, search_keys=[("date",)]
|
|
668
|
+
dataset = Dataset("tds", df=df, meaning_types=meaning_types, search_keys=[("date",)])
|
|
669
669
|
dataset.task_type = ModelTaskType.BINARY
|
|
670
670
|
|
|
671
671
|
old_min_sample_threshold = Dataset.MIN_SAMPLE_THRESHOLD
|
|
@@ -708,7 +708,7 @@ def test_downsampling_multiclass():
|
|
|
708
708
|
"target": FileColumnMeaningType.TARGET,
|
|
709
709
|
"eval_set_index": FileColumnMeaningType.EVAL_SET_INDEX,
|
|
710
710
|
}
|
|
711
|
-
dataset = Dataset("tds", df=df, meaning_types=meaning_types, search_keys=[("date",)]
|
|
711
|
+
dataset = Dataset("tds", df=df, meaning_types=meaning_types, search_keys=[("date",)])
|
|
712
712
|
dataset.task_type = ModelTaskType.MULTICLASS
|
|
713
713
|
|
|
714
714
|
old_min_sample_threshold = Dataset.MIN_SAMPLE_THRESHOLD
|
|
@@ -1616,7 +1616,7 @@ def test_validation_metrics_calculation(requests_mock: Mocker):
|
|
|
1616
1616
|
X = tds[["date"]]
|
|
1617
1617
|
y = tds.target
|
|
1618
1618
|
|
|
1619
|
-
search_task = SearchTask(""
|
|
1619
|
+
search_task = SearchTask("")
|
|
1620
1620
|
|
|
1621
1621
|
def initial_max_hit_rate() -> Optional[float]:
|
|
1622
1622
|
return 1.0
|
|
@@ -2363,7 +2363,7 @@ def test_search_keys_autodetection(requests_mock: Mocker):
|
|
|
2363
2363
|
"email_one_domain_3b0a68",
|
|
2364
2364
|
"date_0e8763",
|
|
2365
2365
|
} == {sk for sublist in self.search_keys for sk in sublist}
|
|
2366
|
-
search_task = SearchTask(search_task_id, self,
|
|
2366
|
+
search_task = SearchTask(search_task_id, self, rest_client=enricher.rest_client)
|
|
2367
2367
|
search_task.provider_metadata_v2 = [
|
|
2368
2368
|
ProviderTaskMetadataV2(
|
|
2369
2369
|
features=[
|
|
@@ -2499,7 +2499,7 @@ def test_diff_target_dups(requests_mock: Mocker):
|
|
|
2499
2499
|
assert self.data.loc[1, "date_0e8763"] == 1672531200000
|
|
2500
2500
|
assert self.data.loc[1, "feature_2ad562"] == 13
|
|
2501
2501
|
assert self.data.loc[1, "target"] == 1
|
|
2502
|
-
return SearchTask("123", self,
|
|
2502
|
+
return SearchTask("123", self, rest_client=enricher.rest_client)
|
|
2503
2503
|
|
|
2504
2504
|
Dataset.search = mock_search
|
|
2505
2505
|
Dataset.MIN_ROWS_COUNT = 1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|