upgini 1.2.46__py3-none-any.whl → 1.2.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.46"
1
+ __version__ = "1.2.48"
@@ -1486,8 +1486,8 @@ class FeaturesEnricher(TransformerMixin):
1486
1486
  for c in X_sampled.columns.to_list()
1487
1487
  if (
1488
1488
  not self.fit_select_features
1489
- or c in self.feature_names_
1490
- or (self.fit_columns_renaming is not None and self.fit_columns_renaming.get(c) in self.feature_names_)
1489
+ or c in set(self.feature_names_).union(self.id_columns)
1490
+ or (self.fit_columns_renaming or {}).get(c, c) in set(self.feature_names_).union(self.id_columns)
1491
1491
  )
1492
1492
  and c
1493
1493
  not in (
@@ -2191,7 +2191,9 @@ if response.status_code == 200:
2191
2191
 
2192
2192
  search_keys = self.search_keys.copy()
2193
2193
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
2194
- self.search_keys.update({col: SearchKey.CUSTOM_KEY for col in self.id_columns})
2194
+ self.search_keys.update(
2195
+ {col: SearchKey.CUSTOM_KEY for col in self.id_columns if col not in self.search_keys}
2196
+ )
2195
2197
 
2196
2198
  search_keys = self.__prepare_search_keys(
2197
2199
  validated_X, search_keys, is_demo_dataset, is_transform=True, silent_mode=silent_mode
@@ -2716,8 +2718,12 @@ if response.status_code == 200:
2716
2718
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
2717
2719
  id_columns = self.__get_renamed_id_columns()
2718
2720
  if id_columns:
2719
- self.fit_search_keys.update({col: SearchKey.CUSTOM_KEY for col in id_columns})
2720
- self.search_keys.update({col: SearchKey.CUSTOM_KEY for col in self.id_columns})
2721
+ self.fit_search_keys.update(
2722
+ {col: SearchKey.CUSTOM_KEY for col in id_columns if col not in self.fit_search_keys}
2723
+ )
2724
+ self.search_keys.update(
2725
+ {col: SearchKey.CUSTOM_KEY for col in self.id_columns if col not in self.search_keys}
2726
+ )
2721
2727
  self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
2722
2728
 
2723
2729
  df, fintech_warnings = remove_fintech_duplicates(
upgini/http.py CHANGED
@@ -16,7 +16,7 @@ from typing import Any, Dict, List, Optional, Tuple
16
16
  from urllib.parse import urljoin
17
17
 
18
18
  import jwt
19
- import pandas as pd
19
+ # import pandas as pd
20
20
  import requests
21
21
  from pydantic import BaseModel
22
22
  from pythonjsonlogger import jsonlogger
@@ -422,6 +422,16 @@ class _RestClient:
422
422
  lambda: self._send_post_file_req_v2(api_path, files, trace_id=trace_id, need_json_response=False)
423
423
  )
424
424
 
425
+ @staticmethod
426
+ def compute_file_digest(filepath: str, algorithm="sha256", chunk_size=4096) -> str:
427
+ hash_func = getattr(hashlib, algorithm)()
428
+
429
+ with open(filepath, "rb") as f:
430
+ for chunk in iter(lambda: f.read(chunk_size), b""):
431
+ hash_func.update(chunk)
432
+
433
+ return hash_func.hexdigest()
434
+
425
435
  def initial_search_v2(
426
436
  self,
427
437
  trace_id: str,
@@ -442,9 +452,10 @@ class _RestClient:
442
452
  digest = md5_hash.hexdigest()
443
453
  metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
444
454
 
445
- digest_sha256 = hashlib.sha256(
446
- pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
447
- ).hexdigest()
455
+ # digest_sha256 = hashlib.sha256(
456
+ # pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
457
+ # ).hexdigest()
458
+ digest_sha256 = self.compute_file_digest(file_path)
448
459
  metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
449
460
 
450
461
  with open(file_path, "rb") as file:
@@ -530,9 +541,10 @@ class _RestClient:
530
541
  digest = md5_hash.hexdigest()
531
542
  metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
532
543
 
533
- digest_sha256 = hashlib.sha256(
534
- pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
535
- ).hexdigest()
544
+ # digest_sha256 = hashlib.sha256(
545
+ # pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
546
+ # ).hexdigest()
547
+ digest_sha256 = self.compute_file_digest(file_path)
536
548
  metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
537
549
 
538
550
  with open(file_path, "rb") as file:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.46
3
+ Version: 1.2.48
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,10 +1,10 @@
1
- upgini/__about__.py,sha256=s7kBVARFz8lSmh7ulygN5xbxPrps18XAib1Arlvg6cw,23
1
+ upgini/__about__.py,sha256=Zm4hDkoG9GX2KfPn6Wt1kdPl888Wv-de5OuQhdWNP9E,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=QC3jncWS3wHe4CY7pWWDMO_3HKxGbi0EyPHXMdBtoQM,33456
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=NWYNZtSgAR05zOZp_Wq1ltVGThCttTbVN_TP2RaWFSI,200008
7
- upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
6
+ upgini/features_enricher.py,sha256=wQmmDAI2F7E2805iX-Cpc5v44QRVrCJV8x8j_Ujh38w,200242
7
+ upgini/http.py,sha256=danPeX7nTMa_70S-pk-4UUm5yOvXYlR84jgyjoHYBkU,43367
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
10
10
  upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
59
59
  upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.46.dist-info/METADATA,sha256=79LcIjwGCdKUlidUqvLOy4YnlBIJEyJwLE5tAPxlKo8,49055
63
- upgini-1.2.46.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
- upgini-1.2.46.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.46.dist-info/RECORD,,
62
+ upgini-1.2.48.dist-info/METADATA,sha256=kGKDKYGXPVY0vOhgpsz1bq3UiMJEy8szoDRToAvVMuA,49055
63
+ upgini-1.2.48.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
+ upgini-1.2.48.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.48.dist-info/RECORD,,