upgini 1.2.107__py3-none-any.whl → 1.2.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.107"
1
+ __version__ = "1.2.109"
@@ -234,6 +234,7 @@ class FeaturesEnricher(TransformerMixin):
234
234
  id_columns: Optional[List[str]] = None,
235
235
  generate_search_key_features: bool = True,
236
236
  sample_config: Optional[SampleConfig] = None,
237
+ print_trace_id: bool = False,
237
238
  **kwargs,
238
239
  ):
239
240
  self.bundle = get_custom_bundle(custom_bundle_config)
@@ -305,6 +306,8 @@ class FeaturesEnricher(TransformerMixin):
305
306
 
306
307
  print(self.bundle.get("search_by_task_id_start"))
307
308
  trace_id = str(uuid.uuid4())
309
+ if self.print_trace_id:
310
+ print(f"https://app.datadoghq.eu/logs?query=%40trace_id%3A{trace_id}")
308
311
  with MDC(trace_id=trace_id):
309
312
  try:
310
313
  self.logger.debug(f"FeaturesEnricher created from existing search: {search_id}")
@@ -368,6 +371,7 @@ class FeaturesEnricher(TransformerMixin):
368
371
  self.data_sources_display_handle = None
369
372
  self.autofe_features_display_handle = None
370
373
  self.report_button_handle = None
374
+ self.print_trace_id = print_trace_id
371
375
 
372
376
  def _get_sample_config(self, sample_config: Optional[SampleConfig] = None):
373
377
  sample_config = sample_config or SampleConfig(force_sample_size=Dataset.FORCE_SAMPLE_SIZE)
@@ -463,6 +467,8 @@ class FeaturesEnricher(TransformerMixin):
463
467
  Otherwise, return all features from input and only selected features from data sources.
464
468
  """
465
469
  trace_id = str(uuid.uuid4())
470
+ if self.print_trace_id:
471
+ print(f"https://app.datadoghq.eu/logs?query=%40trace_id%3A{trace_id}")
466
472
  start_time = time.time()
467
473
  auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
468
474
  search_progress = SearchProgress(0.0, ProgressStage.START_FIT)
@@ -621,6 +627,8 @@ class FeaturesEnricher(TransformerMixin):
621
627
  self.warning_counter.reset()
622
628
  auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
623
629
  trace_id = str(uuid.uuid4())
630
+ if self.print_trace_id:
631
+ print(f"https://app.datadoghq.eu/logs?query=%40trace_id%3A{trace_id}")
624
632
  start_time = time.time()
625
633
  with MDC(trace_id=trace_id):
626
634
  if len(args) > 0:
@@ -4635,65 +4643,78 @@ if response.status_code == 200:
4635
4643
  eval_set: Union[Tuple, None] = None,
4636
4644
  ):
4637
4645
  def dump_task(X_, y_, eval_set_):
4638
- try:
4639
- if isinstance(X_, pd.Series):
4640
- X_ = X_.to_frame()
4641
-
4642
- with tempfile.TemporaryDirectory() as tmp_dir:
4643
- X_.to_parquet(f"{tmp_dir}/x.parquet", compression="zstd")
4644
- x_digest_sha256 = self.rest_client.compute_file_digest(f"{tmp_dir}/x.parquet")
4645
- if self.rest_client.is_file_uploaded(trace_id, x_digest_sha256):
4646
- self.logger.info(f"File x.parquet was already uploaded with digest {x_digest_sha256}, skipping")
4647
- else:
4648
- self.rest_client.dump_input_file(trace_id, f"{tmp_dir}/x.parquet", "x.parquet")
4649
-
4650
- if y_ is not None:
4651
- if isinstance(y_, pd.Series):
4652
- y_ = y_.to_frame()
4653
- y_.to_parquet(f"{tmp_dir}/y.parquet", compression="zstd")
4654
- y_digest_sha256 = self.rest_client.compute_file_digest(f"{tmp_dir}/y.parquet")
4655
- if self.rest_client.is_file_uploaded(trace_id, y_digest_sha256):
4646
+ with MDC(trace_id=trace_id):
4647
+ try:
4648
+ if isinstance(X_, pd.Series):
4649
+ X_ = X_.to_frame()
4650
+
4651
+ with tempfile.TemporaryDirectory() as tmp_dir:
4652
+ X_.to_parquet(f"{tmp_dir}/x.parquet", compression="zstd")
4653
+ x_digest_sha256 = self.rest_client.compute_file_digest(f"{tmp_dir}/x.parquet")
4654
+ if self.rest_client.is_file_uploaded(trace_id, x_digest_sha256):
4656
4655
  self.logger.info(
4657
- f"File y.parquet was already uploaded with digest {y_digest_sha256}, skipping"
4656
+ f"File x.parquet was already uploaded with digest {x_digest_sha256}, skipping"
4658
4657
  )
4659
4658
  else:
4660
- self.rest_client.dump_input_file(trace_id, f"{tmp_dir}/y.parquet", "y.parquet")
4661
-
4662
- if eval_set_ is not None and len(eval_set_) > 0:
4663
- for idx, (eval_x_, eval_y_) in enumerate(eval_set_):
4664
- if isinstance(eval_x_, pd.Series):
4665
- eval_x_ = eval_x_.to_frame()
4666
- eval_x_.to_parquet(f"{tmp_dir}/eval_x_{idx}.parquet", compression="zstd")
4667
- eval_x_digest_sha256 = self.rest_client.compute_file_digest(
4668
- f"{tmp_dir}/eval_x_{idx}.parquet"
4669
- )
4670
- if self.rest_client.is_file_uploaded(trace_id, eval_x_digest_sha256):
4671
- self.logger.info(
4672
- f"File eval_x_{idx}.parquet was already uploaded with"
4673
- f" digest {eval_x_digest_sha256}, skipping"
4674
- )
4675
- else:
4676
- self.rest_client.dump_input_file(
4677
- trace_id, f"{tmp_dir}/eval_x_{idx}.parquet", f"eval_x_{idx}.parquet"
4678
- )
4659
+ self.rest_client.dump_input_file(
4660
+ trace_id, f"{tmp_dir}/x.parquet", "x.parquet", x_digest_sha256
4661
+ )
4679
4662
 
4680
- if isinstance(eval_y_, pd.Series):
4681
- eval_y_ = eval_y_.to_frame()
4682
- eval_y_.to_parquet(f"{tmp_dir}/eval_y_{idx}.parquet", compression="zstd")
4683
- eval_y_digest_sha256 = self.rest_client.compute_file_digest(
4684
- f"{tmp_dir}/eval_y_{idx}.parquet"
4663
+ if y_ is not None:
4664
+ if isinstance(y_, pd.Series):
4665
+ y_ = y_.to_frame()
4666
+ y_.to_parquet(f"{tmp_dir}/y.parquet", compression="zstd")
4667
+ y_digest_sha256 = self.rest_client.compute_file_digest(f"{tmp_dir}/y.parquet")
4668
+ if self.rest_client.is_file_uploaded(trace_id, y_digest_sha256):
4669
+ self.logger.info(
4670
+ f"File y.parquet was already uploaded with digest {y_digest_sha256}, skipping"
4685
4671
  )
4686
- if self.rest_client.is_file_uploaded(trace_id, eval_y_digest_sha256):
4687
- self.logger.info(
4688
- f"File eval_y_{idx}.parquet was already uploaded"
4689
- f" with digest {eval_y_digest_sha256}, skipping"
4672
+ else:
4673
+ self.rest_client.dump_input_file(
4674
+ trace_id, f"{tmp_dir}/y.parquet", "y.parquet", y_digest_sha256
4675
+ )
4676
+
4677
+ if eval_set_ is not None and len(eval_set_) > 0:
4678
+ for idx, (eval_x_, eval_y_) in enumerate(eval_set_):
4679
+ if isinstance(eval_x_, pd.Series):
4680
+ eval_x_ = eval_x_.to_frame()
4681
+ eval_x_.to_parquet(f"{tmp_dir}/eval_x_{idx}.parquet", compression="zstd")
4682
+ eval_x_digest_sha256 = self.rest_client.compute_file_digest(
4683
+ f"{tmp_dir}/eval_x_{idx}.parquet"
4690
4684
  )
4691
- else:
4692
- self.rest_client.dump_input_file(
4693
- trace_id, f"{tmp_dir}/eval_y_{idx}.parquet", f"eval_y_{idx}.parquet"
4685
+ if self.rest_client.is_file_uploaded(trace_id, eval_x_digest_sha256):
4686
+ self.logger.info(
4687
+ f"File eval_x_{idx}.parquet was already uploaded with"
4688
+ f" digest {eval_x_digest_sha256}, skipping"
4689
+ )
4690
+ else:
4691
+ self.rest_client.dump_input_file(
4692
+ trace_id,
4693
+ f"{tmp_dir}/eval_x_{idx}.parquet",
4694
+ f"eval_x_{idx}.parquet",
4695
+ eval_x_digest_sha256,
4696
+ )
4697
+
4698
+ if isinstance(eval_y_, pd.Series):
4699
+ eval_y_ = eval_y_.to_frame()
4700
+ eval_y_.to_parquet(f"{tmp_dir}/eval_y_{idx}.parquet", compression="zstd")
4701
+ eval_y_digest_sha256 = self.rest_client.compute_file_digest(
4702
+ f"{tmp_dir}/eval_y_{idx}.parquet"
4694
4703
  )
4695
- except Exception:
4696
- self.logger.warning("Failed to dump input files", exc_info=True)
4704
+ if self.rest_client.is_file_uploaded(trace_id, eval_y_digest_sha256):
4705
+ self.logger.info(
4706
+ f"File eval_y_{idx}.parquet was already uploaded"
4707
+ f" with digest {eval_y_digest_sha256}, skipping"
4708
+ )
4709
+ else:
4710
+ self.rest_client.dump_input_file(
4711
+ trace_id,
4712
+ f"{tmp_dir}/eval_y_{idx}.parquet",
4713
+ f"eval_y_{idx}.parquet",
4714
+ eval_y_digest_sha256,
4715
+ )
4716
+ except Exception:
4717
+ self.logger.warning("Failed to dump input files", exc_info=True)
4697
4718
 
4698
4719
  try:
4699
4720
  Thread(target=dump_task, args=(X, y, eval_set), daemon=True).start()
upgini/http.py CHANGED
@@ -274,7 +274,7 @@ class _RestClient:
274
274
  SEARCH_FILE_METADATA_URI_FMT_V2 = SERVICE_ROOT_V2 + "search/{0}/metadata"
275
275
  SEARCH_TASK_METADATA_FMT_V3 = SERVICE_ROOT_V2 + "search/metadata-v2/{0}"
276
276
  SEARCH_DUMP_INPUT_FMT_V2 = SERVICE_ROOT_V2 + "search/dump-input"
277
- SEARCH_DUMP_INPUT_FILE_FMT = SERVICE_ROOT_V2 + "search/dump-input-file"
277
+ SEARCH_DUMP_INPUT_FILE_FMT = SERVICE_ROOT_V2 + "search/dump-input-file?digest={0}"
278
278
  TRANSFORM_USAGE_FMT = SERVICE_ROOT_V2 + "user/transform-usage"
279
279
 
280
280
  UPLOAD_USER_ADS_URI = SERVICE_ROOT + "ads/upload"
@@ -406,8 +406,8 @@ class _RestClient:
406
406
  meaning_types = [_RestClient.meaning_type_by_name(name, metadata) for name in search_key_names]
407
407
  return [meaning_type.value for meaning_type in meaning_types if meaning_type is not None]
408
408
 
409
- def dump_input_file(self, trace_id: str, path: str, file_name: str):
410
- api_path = self.SEARCH_DUMP_INPUT_FILE_FMT
409
+ def dump_input_file(self, trace_id: str, path: str, file_name: str, digest: str):
410
+ api_path = self.SEARCH_DUMP_INPUT_FILE_FMT.format(digest)
411
411
  with open(path, "rb") as file:
412
412
  files = {"file": (file_name, file, "application/octet-stream")}
413
413
  self._with_unauth_retry(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.107
3
+ Version: 1.2.109
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,10 +1,10 @@
1
- upgini/__about__.py,sha256=E6dOhnEHkanXoxanNLOMoP_tHzJfqbVNEEIpbqjineU,24
1
+ upgini/__about__.py,sha256=7vuOa8DdGmQwFnLZgeu6LWmt3WSDwR-t1m-qM17qEfc,24
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=e6JDYTZ2AwC5aF-dqclKZKkiKrHo2f6cFmMQO2ZZmjM,32724
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=EVDTob2gmtQSpo1wYdExmd0sXDqeZtmg01zr5QHBXZc,220494
7
- upgini/http.py,sha256=DNcoS7qdxG0mOJn6I8r6O5I6XdIJTdzDzW3hkz3NgG4,45443
6
+ upgini/features_enricher.py,sha256=Nn5lDPOoKmfJV9gkzOx_E0AjF_8_le0qzdjw583hN8U,221729
7
+ upgini/http.py,sha256=zeAZvT6IAzOs9jQ3WG8mJBANLajgvv2LZePFzKz004w,45482
8
8
  upgini/metadata.py,sha256=9_0lFEWPpIHRBW-xWYSEcwPzICTC6_bQ6dUUlE75Xns,12773
9
9
  upgini/metrics.py,sha256=V2SP6NS5bfFHzRqufeKVsCXME1yG4t_8Dmk2E3zKdYk,45715
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
@@ -71,7 +71,7 @@ upgini/utils/target_utils.py,sha256=i3Xt5l9ybB2_nF_ma5cfPuL3OeFTs2dY2xDI0p4Azpg,
71
71
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
72
72
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
73
73
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
74
- upgini-1.2.107.dist-info/METADATA,sha256=P8FTzwQgOC9b_KOsV32-zHtNmsdXDnT3xyAO2Ggin74,49529
75
- upgini-1.2.107.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
76
- upgini-1.2.107.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
- upgini-1.2.107.dist-info/RECORD,,
74
+ upgini-1.2.109.dist-info/METADATA,sha256=cnzCNjHafb1Lpr-_U2w1EUNRHJKI2CYDSqj-LlohDic,49529
75
+ upgini-1.2.109.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
76
+ upgini-1.2.109.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
+ upgini-1.2.109.dist-info/RECORD,,