upgini 1.1.131a4__tar.gz → 1.1.132__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.131a4/src/upgini.egg-info → upgini-1.1.132}/PKG-INFO +1 -1
- {upgini-1.1.131a4 → upgini-1.1.132}/setup.py +2 -2
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/features_enricher.py +33 -13
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/http.py +21 -15
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/resource_bundle/strings.properties +2 -1
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/search_task.py +2 -2
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/track_info.py +12 -0
- {upgini-1.1.131a4 → upgini-1.1.132/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini.egg-info/requires.txt +1 -1
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_features_enricher.py +8 -8
- {upgini-1.1.131a4 → upgini-1.1.132}/LICENSE +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/README.md +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/pyproject.toml +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/setup.cfg +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/ads.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/dataset.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/errors.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/metadata.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/metrics.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/spinner.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini.egg-info/SOURCES.txt +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_country_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_email_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_metrics.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.131a4 → upgini-1.1.132}/tests/test_postal_code_utils.py +0 -0
|
@@ -35,7 +35,7 @@ def send_log(msg: str):
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
here = Path(__file__).parent.resolve()
|
|
38
|
-
version = "1.1.
|
|
38
|
+
version = "1.1.132"
|
|
39
39
|
try:
|
|
40
40
|
send_log(f"Start setup PyLib version {version}")
|
|
41
41
|
setup(
|
|
@@ -77,7 +77,7 @@ try:
|
|
|
77
77
|
"numpy>=1.19.0",
|
|
78
78
|
"scikit-learn>=1.0.1",
|
|
79
79
|
"pydantic>=1.8.2",
|
|
80
|
-
"fastparquet>=0.
|
|
80
|
+
"fastparquet>=0.8.1",
|
|
81
81
|
"python-json-logger>=2.0.2",
|
|
82
82
|
"catboost>=1.0.3",
|
|
83
83
|
"lightgbm>=3.3.2",
|
|
@@ -139,15 +139,16 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
139
139
|
raise_validation_error: bool = False,
|
|
140
140
|
**kwargs,
|
|
141
141
|
):
|
|
142
|
-
self.
|
|
142
|
+
self._api_key = api_key or os.environ.get(UPGINI_API_KEY)
|
|
143
143
|
try:
|
|
144
|
-
self.rest_client = get_rest_client(endpoint, self.
|
|
144
|
+
self.rest_client = get_rest_client(endpoint, self._api_key)
|
|
145
145
|
except UpginiConnectionError as e:
|
|
146
146
|
print(e)
|
|
147
147
|
return
|
|
148
148
|
|
|
149
|
+
self.logs_enabled = logs_enabled
|
|
149
150
|
if logs_enabled:
|
|
150
|
-
self.logger = LoggerFactory().get_logger(endpoint, self.
|
|
151
|
+
self.logger = LoggerFactory().get_logger(endpoint, self._api_key)
|
|
151
152
|
else:
|
|
152
153
|
self.logger = logging.getLogger()
|
|
153
154
|
self.logger.setLevel("FATAL")
|
|
@@ -174,7 +175,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
174
175
|
search_task = SearchTask(
|
|
175
176
|
search_id,
|
|
176
177
|
endpoint=self.endpoint,
|
|
177
|
-
api_key=self.
|
|
178
|
+
api_key=self._api_key,
|
|
178
179
|
)
|
|
179
180
|
|
|
180
181
|
print(bundle.get("search_by_task_id_start"))
|
|
@@ -235,6 +236,16 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
235
236
|
self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict]] = None
|
|
236
237
|
self.raise_validation_error = raise_validation_error
|
|
237
238
|
|
|
239
|
+
def _get_api_key(self):
|
|
240
|
+
return self._api_key
|
|
241
|
+
|
|
242
|
+
def _set_api_key(self, api_key: str):
|
|
243
|
+
self._api_key = api_key
|
|
244
|
+
if self.logs_enabled:
|
|
245
|
+
self.logger = LoggerFactory().get_logger(self.endpoint, self._api_key)
|
|
246
|
+
|
|
247
|
+
api_key = property(_get_api_key, _set_api_key)
|
|
248
|
+
|
|
238
249
|
def fit(
|
|
239
250
|
self,
|
|
240
251
|
X: Union[pd.DataFrame, pd.Series, np.ndarray],
|
|
@@ -579,13 +590,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
579
590
|
finally:
|
|
580
591
|
self.logger.info(f"Transform elapsed time: {time.time() - start_time}")
|
|
581
592
|
|
|
582
|
-
if
|
|
583
|
-
|
|
593
|
+
if result is not None:
|
|
594
|
+
if self.country_added:
|
|
595
|
+
result = drop_existing_columns(result, COUNTRY)
|
|
584
596
|
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
597
|
+
if keep_input:
|
|
598
|
+
return result
|
|
599
|
+
else:
|
|
600
|
+
return drop_existing_columns(result, X.columns)
|
|
589
601
|
|
|
590
602
|
def calculate_metrics(
|
|
591
603
|
self,
|
|
@@ -912,13 +924,21 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
912
924
|
def _has_features_with_commercial_schema(
|
|
913
925
|
self, commercial_schema: str, exclude_features_sources: Optional[List[str]]
|
|
914
926
|
) -> bool:
|
|
927
|
+
return len(self._get_features_with_commercial_schema(commercial_schema, exclude_features_sources)) > 0
|
|
928
|
+
|
|
929
|
+
def _get_features_with_commercial_schema(
|
|
930
|
+
self, commercial_schema: str, exclude_features_sources: Optional[List[str]]
|
|
931
|
+
) -> List[str]:
|
|
915
932
|
if exclude_features_sources:
|
|
916
933
|
filtered_features_info = self.features_info[
|
|
917
934
|
~self.features_info[bundle.get("features_info_name")].isin(exclude_features_sources)
|
|
918
935
|
]
|
|
919
936
|
else:
|
|
920
937
|
filtered_features_info = self.features_info
|
|
921
|
-
return (filtered_features_info[
|
|
938
|
+
return list(filtered_features_info.loc[
|
|
939
|
+
filtered_features_info[bundle.get("features_info_commercial_schema")] == commercial_schema,
|
|
940
|
+
bundle.get("features_info_name"),
|
|
941
|
+
].values)
|
|
922
942
|
|
|
923
943
|
def _has_trial_features(self, exclude_features_sources: Optional[List[str]]) -> bool:
|
|
924
944
|
return self._has_features_with_commercial_schema(CommercialSchema.TRIAL.value, exclude_features_sources)
|
|
@@ -1248,7 +1268,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1248
1268
|
msg = bundle.get("transform_with_trial_features")
|
|
1249
1269
|
self.logger.warn(msg)
|
|
1250
1270
|
print(msg)
|
|
1251
|
-
return None
|
|
1252
1271
|
|
|
1253
1272
|
columns_to_drop = [c for c in validated_X.columns if c in self.feature_names_]
|
|
1254
1273
|
if len(columns_to_drop) > 0:
|
|
@@ -1493,7 +1512,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1493
1512
|
if is_demo_dataset:
|
|
1494
1513
|
msg = bundle.get("demo_dataset_info")
|
|
1495
1514
|
self.logger.info(msg)
|
|
1496
|
-
|
|
1515
|
+
if not self.__is_registered:
|
|
1516
|
+
print(msg)
|
|
1497
1517
|
|
|
1498
1518
|
if self.generate_features is not None and len(self.generate_features) > 0:
|
|
1499
1519
|
x_columns = list(validated_X.columns)
|
|
@@ -11,8 +11,8 @@ from http.client import HTTPConnection
|
|
|
11
11
|
from json import dumps
|
|
12
12
|
from typing import Dict, List, Optional
|
|
13
13
|
from urllib.parse import urljoin
|
|
14
|
-
import pandas as pd
|
|
15
14
|
|
|
15
|
+
import pandas as pd
|
|
16
16
|
import requests
|
|
17
17
|
from pydantic import BaseModel
|
|
18
18
|
from pythonjsonlogger import jsonlogger
|
|
@@ -32,7 +32,7 @@ from upgini.metadata import (
|
|
|
32
32
|
SearchCustomization,
|
|
33
33
|
)
|
|
34
34
|
from upgini.resource_bundle import bundle
|
|
35
|
-
from upgini.utils.track_info import
|
|
35
|
+
from upgini.utils.track_info import get_track_metrics_with_timeout
|
|
36
36
|
|
|
37
37
|
try:
|
|
38
38
|
from importlib_metadata import version
|
|
@@ -49,6 +49,7 @@ except ImportError:
|
|
|
49
49
|
UPGINI_URL: str = "UPGINI_URL"
|
|
50
50
|
UPGINI_API_KEY: str = "UPGINI_API_KEY"
|
|
51
51
|
DEMO_API_KEY: str = "Aa4BPwGFbn1zNEXIkZ-NbhsRk0ricN6puKuga1-O5lM"
|
|
52
|
+
TRACK_METRICS_TIMEOUT_SECONDS: int = 10
|
|
52
53
|
|
|
53
54
|
refresh_token_lock = threading.Lock()
|
|
54
55
|
|
|
@@ -374,7 +375,11 @@ class _RestClient:
|
|
|
374
375
|
search_customization.json(exclude_none=True).encode(),
|
|
375
376
|
"application/json",
|
|
376
377
|
)
|
|
377
|
-
files["tracking"] = (
|
|
378
|
+
files["tracking"] = (
|
|
379
|
+
"tracking.json",
|
|
380
|
+
dumps(get_track_metrics_with_timeout(TRACK_METRICS_TIMEOUT_SECONDS)).encode(),
|
|
381
|
+
"application/json",
|
|
382
|
+
)
|
|
378
383
|
additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
|
|
379
384
|
|
|
380
385
|
return self._send_post_file_req_v2(
|
|
@@ -433,7 +438,6 @@ class _RestClient:
|
|
|
433
438
|
digest = md5_hash.hexdigest()
|
|
434
439
|
metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
|
|
435
440
|
|
|
436
|
-
import pandas as pd
|
|
437
441
|
digest_sha256 = hashlib.sha256(pd.util.hash_pandas_object(pd.read_parquet(file_path)).values).hexdigest()
|
|
438
442
|
metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
|
|
439
443
|
|
|
@@ -453,7 +457,11 @@ class _RestClient:
|
|
|
453
457
|
search_customization.json(exclude_none=True).encode(),
|
|
454
458
|
"application/json",
|
|
455
459
|
)
|
|
456
|
-
files["tracking"] = (
|
|
460
|
+
files["tracking"] = (
|
|
461
|
+
"ide",
|
|
462
|
+
dumps(get_track_metrics_with_timeout(TRACK_METRICS_TIMEOUT_SECONDS)).encode(),
|
|
463
|
+
"application/json",
|
|
464
|
+
)
|
|
457
465
|
|
|
458
466
|
additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
|
|
459
467
|
|
|
@@ -787,17 +795,18 @@ class BackendLogHandler(logging.Handler):
|
|
|
787
795
|
def __init__(self, rest_client: _RestClient, *args, **kwargs) -> None:
|
|
788
796
|
super().__init__(*args, **kwargs)
|
|
789
797
|
self.rest_client = rest_client
|
|
790
|
-
|
|
791
|
-
self.
|
|
792
|
-
print("After track metrics")
|
|
793
|
-
if "ip" in self.track_metrics.keys():
|
|
794
|
-
self.hostname = self.track_metrics["ip"]
|
|
795
|
-
else:
|
|
796
|
-
self.hostname = "0.0.0.0"
|
|
798
|
+
self.track_metrics = None
|
|
799
|
+
self.hostname = None
|
|
797
800
|
|
|
798
801
|
def emit(self, record: logging.LogRecord) -> None:
|
|
799
802
|
def task():
|
|
800
803
|
try:
|
|
804
|
+
if self.track_metrics is None:
|
|
805
|
+
self.track_metrics = get_track_metrics_with_timeout(TRACK_METRICS_TIMEOUT_SECONDS)
|
|
806
|
+
if "ip" in self.track_metrics.keys():
|
|
807
|
+
self.hostname = self.track_metrics["ip"]
|
|
808
|
+
else:
|
|
809
|
+
self.hostname = "0.0.0.0"
|
|
801
810
|
text = self.format(record)
|
|
802
811
|
tags = self.track_metrics
|
|
803
812
|
tags["version"] = __version__
|
|
@@ -848,11 +857,8 @@ class LoggerFactory:
|
|
|
848
857
|
|
|
849
858
|
upgini_logger = logging.getLogger(f"upgini.{hash(key)}")
|
|
850
859
|
upgini_logger.handlers.clear()
|
|
851
|
-
print("Before rest client")
|
|
852
860
|
rest_client = get_rest_client(backend_url, api_token)
|
|
853
|
-
print("Before backend log handler")
|
|
854
861
|
datadog_handler = BackendLogHandler(rest_client)
|
|
855
|
-
print("After backend log handler")
|
|
856
862
|
json_formatter = jsonlogger.JsonFormatter(
|
|
857
863
|
"%(asctime)s %(threadName)s %(name)s %(levelname)s %(message)s",
|
|
858
864
|
timestamp=True,
|
|
@@ -25,7 +25,8 @@ metrics_no_important_free_features=WARNING: No important free features to calcul
|
|
|
25
25
|
metrics_no_important_features=WARNING: No important features to calculate metrics
|
|
26
26
|
metrics_negative_uplift_without_cv=Please re-check that your task is not a time series prediction. If so, restart search with cv=CVType.time_series param for correct search results. See docs https://github.com/upgini/upgini#-time-series-prediction-support
|
|
27
27
|
metrics_with_trial_features=The calculation of final accuracy metrics using Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
|
|
28
|
-
transform_with_trial_features=
|
|
28
|
+
transform_with_trial_features=WARNING: Your search results contain Trial data sources. To enrich your dataframe using transform or fit_transform with features from these Trial data sources, please register for a Free API key at https://upgini.com and resubmit your request.
|
|
29
|
+
# Enriching with Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
|
|
29
30
|
metrics_with_paid_features=The calculation of final accuracy metrics using Paid data is not available.\nContact Upgini support for the data access
|
|
30
31
|
transform_with_paid_features=Enriching with Paid data is not available.\nContact Upgini support for the data access
|
|
31
32
|
trial_quota_limit_riched=WARNING: You have reached the quota limit of trial data usage. Please contact Upgini support to remove restriction
|
|
@@ -59,8 +59,8 @@ class SearchTask:
|
|
|
59
59
|
submitted_statuses = {"SUBMITTED", "VALIDATION_SUBMITTED"}
|
|
60
60
|
if not quiet:
|
|
61
61
|
print(bundle.get("polling_search_task").format(self.search_task_id))
|
|
62
|
-
|
|
63
|
-
|
|
62
|
+
if is_demo_api_key(self.api_key):
|
|
63
|
+
print(bundle.get("polling_unregister_information"))
|
|
64
64
|
search_task_id = self.initial_search_task_id if self.initial_search_task_id is not None else self.search_task_id
|
|
65
65
|
|
|
66
66
|
try:
|
|
@@ -6,6 +6,7 @@ from functools import lru_cache
|
|
|
6
6
|
from getpass import getuser
|
|
7
7
|
from hashlib import sha256
|
|
8
8
|
from uuid import getnode
|
|
9
|
+
from concurrent import futures
|
|
9
10
|
|
|
10
11
|
from requests import get, post
|
|
11
12
|
|
|
@@ -45,6 +46,17 @@ def _get_execution_ide() -> str:
|
|
|
45
46
|
return "other"
|
|
46
47
|
|
|
47
48
|
|
|
49
|
+
def get_track_metrics_with_timeout(timeout_seconds: int = 10) -> dict:
|
|
50
|
+
with futures.ThreadPoolExecutor() as executor:
|
|
51
|
+
future = executor.submit(get_track_metrics)
|
|
52
|
+
try:
|
|
53
|
+
result = future.result(timeout_seconds)
|
|
54
|
+
return result
|
|
55
|
+
except futures.TimeoutError:
|
|
56
|
+
future.cancel()
|
|
57
|
+
return dict()
|
|
58
|
+
|
|
59
|
+
|
|
48
60
|
@lru_cache()
|
|
49
61
|
def get_track_metrics() -> dict:
|
|
50
62
|
# default values
|
|
@@ -397,7 +397,7 @@ def test_saved_features_enricher(requests_mock: Mocker):
|
|
|
397
397
|
{
|
|
398
398
|
"segment": [train_segment, eval_1_segment, eval_2_segment],
|
|
399
399
|
rows_header: [10000, 1000, 1000],
|
|
400
|
-
enriched_rocauc: [0.
|
|
400
|
+
enriched_rocauc: [0.500276, 0.499805, 0.497979],
|
|
401
401
|
}
|
|
402
402
|
)
|
|
403
403
|
.set_index("segment")
|
|
@@ -1774,7 +1774,7 @@ def test_correct_order_of_enriched_X(requests_mock: Mocker):
|
|
|
1774
1774
|
df_with_eval_set_index_with_date = converter.convert(df_with_eval_set_index)
|
|
1775
1775
|
mock_features["system_record_id"] = pd.util.hash_pandas_object(
|
|
1776
1776
|
df_with_eval_set_index_with_date[sorted(search_keys.keys())].reset_index(drop=True), index=False
|
|
1777
|
-
)
|
|
1777
|
+
).astype("Float64")
|
|
1778
1778
|
mock_validation_raw_features(requests_mock, url, validation_search_task_id, mock_features)
|
|
1779
1779
|
|
|
1780
1780
|
enriched_df_with_eval_set = enricher.transform(df_with_eval_set_index)
|
|
@@ -2415,12 +2415,12 @@ def test_diff_target_dups(requests_mock: Mocker):
|
|
|
2415
2415
|
self.validate()
|
|
2416
2416
|
assert len(self.data) == 2
|
|
2417
2417
|
print(self.data)
|
|
2418
|
-
assert self.data.loc[
|
|
2419
|
-
assert self.data.loc[
|
|
2420
|
-
assert self.data.loc[
|
|
2421
|
-
assert self.data.loc[
|
|
2422
|
-
assert self.data.loc[
|
|
2423
|
-
assert self.data.loc[
|
|
2418
|
+
assert self.data.loc[0, "date_fake_a"] == 1672531200000
|
|
2419
|
+
assert self.data.loc[0, "feature_fake_a"] == 12
|
|
2420
|
+
assert self.data.loc[0, "target"] == 0
|
|
2421
|
+
assert self.data.loc[1, "date_fake_a"] == 1672531200000
|
|
2422
|
+
assert self.data.loc[1, "feature_fake_a"] == 13
|
|
2423
|
+
assert self.data.loc[1, "target"] == 1
|
|
2424
2424
|
return SearchTask("123", self, endpoint=url, api_key="fake_api_key")
|
|
2425
2425
|
|
|
2426
2426
|
Dataset.search = mock_search
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|