upgini 1.2.85__py3-none-any.whl → 1.2.85a3857.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +17 -34
- upgini/metrics.py +20 -5
- {upgini-1.2.85.dist-info → upgini-1.2.85a3857.dev1.dist-info}/METADATA +1 -1
- {upgini-1.2.85.dist-info → upgini-1.2.85a3857.dev1.dist-info}/RECORD +7 -7
- {upgini-1.2.85.dist-info → upgini-1.2.85a3857.dev1.dist-info}/WHEEL +1 -1
- {upgini-1.2.85.dist-info → upgini-1.2.85a3857.dev1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.85a3857.dev1"
|
upgini/features_enricher.py
CHANGED
@@ -3,7 +3,6 @@ import datetime
|
|
3
3
|
import gc
|
4
4
|
import hashlib
|
5
5
|
import itertools
|
6
|
-
import json
|
7
6
|
import logging
|
8
7
|
import numbers
|
9
8
|
import os
|
@@ -60,7 +59,6 @@ from upgini.metadata import (
|
|
60
59
|
CVType,
|
61
60
|
FeaturesMetadataV2,
|
62
61
|
FileColumnMeaningType,
|
63
|
-
FileColumnMetadata,
|
64
62
|
ModelTaskType,
|
65
63
|
RuntimeParameters,
|
66
64
|
SearchKey,
|
@@ -2154,7 +2152,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
2154
2152
|
trace_id = trace_id or uuid.uuid4()
|
2155
2153
|
return search_task.get_progress(trace_id)
|
2156
2154
|
|
2157
|
-
def
|
2155
|
+
def get_transactional_transform_api(self, only_online_sources=False):
|
2158
2156
|
if self.api_key is None:
|
2159
2157
|
raise ValidationError(self.bundle.get("transactional_transform_unregistered"))
|
2160
2158
|
if self._search_task is None:
|
@@ -2180,36 +2178,20 @@ class FeaturesEnricher(TransformerMixin):
|
|
2180
2178
|
return "test_value"
|
2181
2179
|
|
2182
2180
|
file_metadata = self._search_task.get_file_metadata(str(uuid.uuid4()))
|
2183
|
-
|
2184
|
-
def get_column_meta(column_name: str) -> FileColumnMetadata:
|
2185
|
-
for c in file_metadata.columns:
|
2186
|
-
if c.name == column_name:
|
2187
|
-
return c
|
2188
|
-
|
2189
2181
|
search_keys = file_metadata.search_types()
|
2190
2182
|
if SearchKey.IPV6_ADDRESS in search_keys:
|
2191
2183
|
search_keys.pop(SearchKey.IPV6_ADDRESS, None)
|
2192
|
-
|
2193
|
-
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2198
|
-
|
2199
|
-
|
2200
|
-
|
2201
|
-
|
2202
|
-
|
2203
|
-
search_keys_with_values[sk_type.name] = [
|
2204
|
-
{"name": name, "value": key_example(sk_type)} for name in sk_meta.unnestKeyNames
|
2205
|
-
]
|
2206
|
-
else:
|
2207
|
-
search_keys_with_values[sk_type.name] = [{
|
2208
|
-
"name": sk_meta.originalName,
|
2209
|
-
"value": key_example(sk_type),
|
2210
|
-
}]
|
2211
|
-
|
2212
|
-
keys_section = json.dumps(search_keys_with_values)
|
2184
|
+
original_names = {c.name: c.originalName for c in file_metadata.columns}
|
2185
|
+
keys = (
|
2186
|
+
"{"
|
2187
|
+
+ ", ".join(
|
2188
|
+
[
|
2189
|
+
f'"{key.name}": {{"name": "{original_names.get(name, name)}", "value": "{key_example(key)}"}}'
|
2190
|
+
for key, name in search_keys.items()
|
2191
|
+
]
|
2192
|
+
)
|
2193
|
+
+ "}"
|
2194
|
+
)
|
2213
2195
|
features_for_transform = self._search_task.get_features_for_transform()
|
2214
2196
|
if features_for_transform:
|
2215
2197
|
original_features_for_transform = [
|
@@ -2230,7 +2212,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
2230
2212
|
curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
|
2231
2213
|
-H 'Authorization: {self.api_key}' \\
|
2232
2214
|
-H 'Content-Type: application/json' \\
|
2233
|
-
-d '{{"search_keys": {
|
2215
|
+
-d '{{"search_keys": {keys}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'
|
2234
2216
|
|
2235
2217
|
{Format.BOLD}Python{Format.END}:
|
2236
2218
|
|
@@ -2239,12 +2221,13 @@ import requests
|
|
2239
2221
|
response = requests.post(
|
2240
2222
|
url='https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}',
|
2241
2223
|
headers={{'Authorization': '{self.api_key}'}},
|
2242
|
-
json={{"search_keys": {
|
2224
|
+
json={{"search_keys": {keys}{features_section}, "only_online_sources": {only_online_sources}}}
|
2243
2225
|
)
|
2244
2226
|
if response.status_code == 200:
|
2245
2227
|
print(response.json())
|
2246
2228
|
"""
|
2247
|
-
|
2229
|
+
|
2230
|
+
return api_example
|
2248
2231
|
|
2249
2232
|
def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
|
2250
2233
|
return RuntimeParameters(properties=self.runtime_parameters.properties.copy())
|
@@ -2305,7 +2288,7 @@ if response.status_code == 200:
|
|
2305
2288
|
msg = self.bundle.get("online_api_features_transform").format(online_api_features)
|
2306
2289
|
self.logger.warning(msg)
|
2307
2290
|
print(msg)
|
2308
|
-
self.
|
2291
|
+
print(self.get_transactional_transform_api(only_online_sources=True))
|
2309
2292
|
|
2310
2293
|
if not metrics_calculation:
|
2311
2294
|
transform_usage = self.rest_client.get_current_transform_usage(trace_id)
|
upgini/metrics.py
CHANGED
@@ -391,9 +391,7 @@ class EstimatorWrapper:
|
|
391
391
|
self.converted_to_int.append(c)
|
392
392
|
self.cat_features.remove(c)
|
393
393
|
elif is_float_dtype(x[c]) or (x[c].dtype == "category" and is_float_dtype(x[c].cat.categories)):
|
394
|
-
self.logger.info(
|
395
|
-
f"Convert float cat feature {c} to string"
|
396
|
-
)
|
394
|
+
self.logger.info(f"Convert float cat feature {c} to string")
|
397
395
|
x[c] = x[c].astype(str)
|
398
396
|
self.converted_to_str.append(c)
|
399
397
|
elif x[c].dtype not in ["category", "int64"]:
|
@@ -694,7 +692,15 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
694
692
|
x[c] = x[c].fillna(np.nan)
|
695
693
|
elif x[c].dtype != "category":
|
696
694
|
x[c] = x[c].fillna("NA")
|
697
|
-
|
695
|
+
if isinstance(self.cv, TimeSeriesSplit) or isinstance(self.cv, BlockedTimeSeriesSplit):
|
696
|
+
self.logger.info("Using time-aware encoder for CatBoost")
|
697
|
+
encoder = CatBoostEncoder(random_state=DEFAULT_RANDOM_STATE, cols=self.cat_features, return_df=True)
|
698
|
+
encoded = encoder.fit_transform(x[self.cat_features].astype("object"), y)
|
699
|
+
x[self.cat_features] = encoded
|
700
|
+
self.cat_encoder = encoder
|
701
|
+
else:
|
702
|
+
self.cat_encoder = None
|
703
|
+
params["cat_features"] = self.cat_features
|
698
704
|
|
699
705
|
return x, y, groups, params
|
700
706
|
|
@@ -738,7 +744,16 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
738
744
|
x[c] = x[c].fillna(np.nan)
|
739
745
|
elif x[c].dtype != "category":
|
740
746
|
x[c] = x[c].fillna("NA")
|
741
|
-
|
747
|
+
if (
|
748
|
+
isinstance(self.cv, TimeSeriesSplit)
|
749
|
+
or isinstance(self.cv, BlockedTimeSeriesSplit)
|
750
|
+
and self.cat_encoder is not None
|
751
|
+
):
|
752
|
+
self.logger.info("Using time-aware encoder for CatBoost")
|
753
|
+
encoded = self.cat_encoder.transform(x[self.cat_features].astype("object"), y)
|
754
|
+
x[self.cat_features] = encoded
|
755
|
+
else:
|
756
|
+
params["cat_features"] = self.cat_features
|
742
757
|
|
743
758
|
return x, y, params
|
744
759
|
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=fECI7PUZQG8IW2eHjUqgqHVtT40sMjfMgzLhuxKuQFA,33
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=fRtqSkXNONLnPe6cCL967GMt349FTIpXzy_u8LUKncw,35354
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=2ryADtOVEEebuUBhimusvnBzGxUkdTaqpEh2F1PqHSs,212719
|
7
7
|
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
8
|
upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=zRrRpNqjSTubsyKPi0_jbHjE8QO_YqyHWtt1B5MfVH8,44086
|
10
10
|
upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.85a3857.dev1.dist-info/METADATA,sha256=XycmCsMeqC_7hsO0YzR0E8b4eGnIcD-MBzuFvB4T24s,49172
|
74
|
+
upgini-1.2.85a3857.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
75
|
+
upgini-1.2.85a3857.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.85a3857.dev1.dist-info/RECORD,,
|
File without changes
|