upgini 1.2.85__py3-none-any.whl → 1.2.85a3857.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.85"
1
+ __version__ = "1.2.85a3857.dev1"
@@ -3,7 +3,6 @@ import datetime
3
3
  import gc
4
4
  import hashlib
5
5
  import itertools
6
- import json
7
6
  import logging
8
7
  import numbers
9
8
  import os
@@ -60,7 +59,6 @@ from upgini.metadata import (
60
59
  CVType,
61
60
  FeaturesMetadataV2,
62
61
  FileColumnMeaningType,
63
- FileColumnMetadata,
64
62
  ModelTaskType,
65
63
  RuntimeParameters,
66
64
  SearchKey,
@@ -2154,7 +2152,7 @@ class FeaturesEnricher(TransformerMixin):
2154
2152
  trace_id = trace_id or uuid.uuid4()
2155
2153
  return search_task.get_progress(trace_id)
2156
2154
 
2157
- def display_transactional_transform_api(self, only_online_sources=False):
2155
+ def get_transactional_transform_api(self, only_online_sources=False):
2158
2156
  if self.api_key is None:
2159
2157
  raise ValidationError(self.bundle.get("transactional_transform_unregistered"))
2160
2158
  if self._search_task is None:
@@ -2180,36 +2178,20 @@ class FeaturesEnricher(TransformerMixin):
2180
2178
  return "test_value"
2181
2179
 
2182
2180
  file_metadata = self._search_task.get_file_metadata(str(uuid.uuid4()))
2183
-
2184
- def get_column_meta(column_name: str) -> FileColumnMetadata:
2185
- for c in file_metadata.columns:
2186
- if c.name == column_name:
2187
- return c
2188
-
2189
2181
  search_keys = file_metadata.search_types()
2190
2182
  if SearchKey.IPV6_ADDRESS in search_keys:
2191
2183
  search_keys.pop(SearchKey.IPV6_ADDRESS, None)
2192
-
2193
- search_keys_with_values = dict()
2194
- for sk_type, sk_name in search_keys.items():
2195
- if sk_type == SearchKey.IPV6_ADDRESS:
2196
- continue
2197
-
2198
- sk_meta = get_column_meta(sk_name)
2199
- if sk_meta is None:
2200
- search_keys_with_values[sk_type.name] = [{"name": sk_name, "value": key_example(sk_type)}]
2201
- else:
2202
- if sk_meta.isUnnest:
2203
- search_keys_with_values[sk_type.name] = [
2204
- {"name": name, "value": key_example(sk_type)} for name in sk_meta.unnestKeyNames
2205
- ]
2206
- else:
2207
- search_keys_with_values[sk_type.name] = [{
2208
- "name": sk_meta.originalName,
2209
- "value": key_example(sk_type),
2210
- }]
2211
-
2212
- keys_section = json.dumps(search_keys_with_values)
2184
+ original_names = {c.name: c.originalName for c in file_metadata.columns}
2185
+ keys = (
2186
+ "{"
2187
+ + ", ".join(
2188
+ [
2189
+ f'"{key.name}": {{"name": "{original_names.get(name, name)}", "value": "{key_example(key)}"}}'
2190
+ for key, name in search_keys.items()
2191
+ ]
2192
+ )
2193
+ + "}"
2194
+ )
2213
2195
  features_for_transform = self._search_task.get_features_for_transform()
2214
2196
  if features_for_transform:
2215
2197
  original_features_for_transform = [
@@ -2230,7 +2212,7 @@ class FeaturesEnricher(TransformerMixin):
2230
2212
  curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
2231
2213
  -H 'Authorization: {self.api_key}' \\
2232
2214
  -H 'Content-Type: application/json' \\
2233
- -d '{{"search_keys": {keys_section}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'
2215
+ -d '{{"search_keys": {keys}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'
2234
2216
 
2235
2217
  {Format.BOLD}Python{Format.END}:
2236
2218
 
@@ -2239,12 +2221,13 @@ import requests
2239
2221
  response = requests.post(
2240
2222
  url='https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}',
2241
2223
  headers={{'Authorization': '{self.api_key}'}},
2242
- json={{"search_keys": {keys_section}{features_section}, "only_online_sources": {only_online_sources}}}
2224
+ json={{"search_keys": {keys}{features_section}, "only_online_sources": {only_online_sources}}}
2243
2225
  )
2244
2226
  if response.status_code == 200:
2245
2227
  print(response.json())
2246
2228
  """
2247
- print(api_example)
2229
+
2230
+ return api_example
2248
2231
 
2249
2232
  def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
2250
2233
  return RuntimeParameters(properties=self.runtime_parameters.properties.copy())
@@ -2305,7 +2288,7 @@ if response.status_code == 200:
2305
2288
  msg = self.bundle.get("online_api_features_transform").format(online_api_features)
2306
2289
  self.logger.warning(msg)
2307
2290
  print(msg)
2308
- self.display_transactional_transform_api(only_online_sources=True)
2291
+ print(self.get_transactional_transform_api(only_online_sources=True))
2309
2292
 
2310
2293
  if not metrics_calculation:
2311
2294
  transform_usage = self.rest_client.get_current_transform_usage(trace_id)
upgini/metrics.py CHANGED
@@ -391,9 +391,7 @@ class EstimatorWrapper:
391
391
  self.converted_to_int.append(c)
392
392
  self.cat_features.remove(c)
393
393
  elif is_float_dtype(x[c]) or (x[c].dtype == "category" and is_float_dtype(x[c].cat.categories)):
394
- self.logger.info(
395
- f"Convert float cat feature {c} to string"
396
- )
394
+ self.logger.info(f"Convert float cat feature {c} to string")
397
395
  x[c] = x[c].astype(str)
398
396
  self.converted_to_str.append(c)
399
397
  elif x[c].dtype not in ["category", "int64"]:
@@ -694,7 +692,15 @@ class CatBoostWrapper(EstimatorWrapper):
694
692
  x[c] = x[c].fillna(np.nan)
695
693
  elif x[c].dtype != "category":
696
694
  x[c] = x[c].fillna("NA")
697
- params["cat_features"] = self.cat_features
695
+ if isinstance(self.cv, TimeSeriesSplit) or isinstance(self.cv, BlockedTimeSeriesSplit):
696
+ self.logger.info("Using time-aware encoder for CatBoost")
697
+ encoder = CatBoostEncoder(random_state=DEFAULT_RANDOM_STATE, cols=self.cat_features, return_df=True)
698
+ encoded = encoder.fit_transform(x[self.cat_features].astype("object"), y)
699
+ x[self.cat_features] = encoded
700
+ self.cat_encoder = encoder
701
+ else:
702
+ self.cat_encoder = None
703
+ params["cat_features"] = self.cat_features
698
704
 
699
705
  return x, y, groups, params
700
706
 
@@ -738,7 +744,16 @@ class CatBoostWrapper(EstimatorWrapper):
738
744
  x[c] = x[c].fillna(np.nan)
739
745
  elif x[c].dtype != "category":
740
746
  x[c] = x[c].fillna("NA")
741
- params["cat_features"] = self.cat_features
747
+ if (
748
+ isinstance(self.cv, TimeSeriesSplit)
749
+ or isinstance(self.cv, BlockedTimeSeriesSplit)
750
+ and self.cat_encoder is not None
751
+ ):
752
+ self.logger.info("Using time-aware encoder for CatBoost")
753
+ encoded = self.cat_encoder.transform(x[self.cat_features].astype("object"), y)
754
+ x[self.cat_features] = encoded
755
+ else:
756
+ params["cat_features"] = self.cat_features
742
757
 
743
758
  return x, y, params
744
759
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.85
3
+ Version: 1.2.85a3857.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=GQcOPY8MByuUtzBlWgL-5Ml2KS1SWi19jFLxrEPAY_Q,23
1
+ upgini/__about__.py,sha256=fECI7PUZQG8IW2eHjUqgqHVtT40sMjfMgzLhuxKuQFA,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=fRtqSkXNONLnPe6cCL967GMt349FTIpXzy_u8LUKncw,35354
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=G0qbRPdlWe9p6cwYF3khP99-0kgAO8N0A2sfQxSLgmM,213446
6
+ upgini/features_enricher.py,sha256=2ryADtOVEEebuUBhimusvnBzGxUkdTaqpEh2F1PqHSs,212719
7
7
  upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
8
8
  upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
9
- upgini/metrics.py,sha256=3cip0_L6-OFew74KsRwzxJDU6UFq05h2v7IsyHLcMRc,43164
9
+ upgini/metrics.py,sha256=zRrRpNqjSTubsyKPi0_jbHjE8QO_YqyHWtt1B5MfVH8,44086
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.85.dist-info/METADATA,sha256=cEtUjRx8eUntASmye2LUmZX78RCWtrMm43z2ZCWyhW8,49162
74
- upgini-1.2.85.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.85.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.85.dist-info/RECORD,,
73
+ upgini-1.2.85a3857.dev1.dist-info/METADATA,sha256=XycmCsMeqC_7hsO0YzR0E8b4eGnIcD-MBzuFvB4T24s,49172
74
+ upgini-1.2.85a3857.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
+ upgini-1.2.85a3857.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.85a3857.dev1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.24.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any