upgini 1.2.89a1__py3-none-any.whl → 1.2.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.89a1"
1
+ __version__ = "1.2.90"
upgini/http.py CHANGED
@@ -252,6 +252,7 @@ class _RestClient:
252
252
 
253
253
  # V2
254
254
  CHECK_UPLOADED_FILE_URL_FMT_V2 = SERVICE_ROOT_V2 + "search/check-file?fileUploadId={0}"
255
+ IS_FILE_UPLOADED_URL_FMT_V2 = SERVICE_ROOT_V2 + "search/files/exists?digest={0}"
255
256
  INITIAL_SEARCH_URI_FMT_V2 = SERVICE_ROOT_V2 + "search/initial"
256
257
  INITIAL_SEARCH_WITHOUT_UPLOAD_URI_FMT_V2 = SERVICE_ROOT_V2 + "search/initial-without-upload?fileUploadId={0}"
257
258
  VALIDATION_SEARCH_URI_FMT_V2 = SERVICE_ROOT_V2 + "search/validation?initialSearchTaskId={0}"
@@ -272,6 +273,7 @@ class _RestClient:
272
273
  SEARCH_FILE_METADATA_URI_FMT_V2 = SERVICE_ROOT_V2 + "search/{0}/metadata"
273
274
  SEARCH_TASK_METADATA_FMT_V3 = SERVICE_ROOT_V2 + "search/metadata-v2/{0}"
274
275
  SEARCH_DUMP_INPUT_FMT_V2 = SERVICE_ROOT_V2 + "search/dump-input"
276
+ SEARCH_DUMP_INPUT_FILE_FMT = SERVICE_ROOT_V2 + "search/dump-input-file"
275
277
  TRANSFORM_USAGE_FMT = SERVICE_ROOT_V2 + "user/transform-usage"
276
278
 
277
279
  UPLOAD_USER_ADS_URI = SERVICE_ROOT + "ads/upload"
@@ -410,32 +412,29 @@ class _RestClient:
410
412
  eval_x_path: Optional[str] = None,
411
413
  eval_y_path: Optional[str] = None,
412
414
  ):
413
- api_path = self.SEARCH_DUMP_INPUT_FMT_V2
414
- files = {}
415
- with open(x_path, "rb") as x_file:
416
- files["x"] = ("x.pickle", x_file, "application/octet-stream")
417
- if y_path:
418
- with open(y_path, "rb") as y_file:
419
- files["y"] = ("y.pickle", y_file, "application/octet-stream")
420
- if eval_x_path and eval_y_path:
421
- with open(eval_x_path, "rb") as eval_x_file, open(eval_y_path, "rb") as eval_y_file:
422
- files["eval_x"] = ("eval_x.pickle", eval_x_file, "application/octet-stream")
423
- files["eval_y"] = ("eval_y.pickle", eval_y_file, "application/octet-stream")
424
- self._with_unauth_retry(
425
- lambda: self._send_post_file_req_v2(
426
- api_path, files, trace_id=trace_id, need_json_response=False
427
- )
428
- )
429
- else:
430
- self._with_unauth_retry(
431
- lambda: self._send_post_file_req_v2(
432
- api_path, files, trace_id=trace_id, need_json_response=False
433
- )
434
- )
415
+ api_path = self.SEARCH_DUMP_INPUT_FILE_FMT
416
+
417
+ def upload_with_check(path: str, file_name: str):
418
+ digest_sha256 = self.compute_file_digest(path)
419
+ if self.is_file_uploaded(trace_id, digest_sha256):
420
+ # print(f"File {path} was already uploaded with digest {digest_sha256}, skipping")
421
+ return
435
422
  else:
436
- self._with_unauth_retry(
437
- lambda: self._send_post_file_req_v2(api_path, files, trace_id=trace_id, need_json_response=False)
438
- )
423
+ with open(path, "rb") as file:
424
+ files = {"file": (file_name, file, "application/octet-stream")}
425
+ self._with_unauth_retry(
426
+ lambda: self._send_post_file_req_v2(
427
+ api_path, files, trace_id=trace_id, need_json_response=False
428
+ )
429
+ )
430
+
431
+ upload_with_check(x_path, "x.parquet")
432
+ if y_path:
433
+ upload_with_check(y_path, "y.parquet")
434
+ if eval_x_path:
435
+ upload_with_check(eval_x_path, "eval_x.parquet")
436
+ if eval_y_path:
437
+ upload_with_check(eval_y_path, "eval_y.parquet")
439
438
 
440
439
  @staticmethod
441
440
  def compute_file_digest(filepath: str, algorithm="sha256", chunk_size=4096) -> str:
@@ -514,6 +513,11 @@ class _RestClient:
514
513
  )
515
514
  return bool(response)
516
515
 
516
+ def is_file_uploaded(self, trace_id: str, digest: str) -> bool:
517
+ api_path = self.IS_FILE_UPLOADED_URL_FMT_V2.format(digest)
518
+ response = self._with_unauth_retry(lambda: self._send_get_req(api_path, trace_id))
519
+ return bool(response)
520
+
517
521
  def initial_search_without_upload_v2(
518
522
  self,
519
523
  trace_id: str,
@@ -416,6 +416,7 @@ def calculate_psi(expected: pd.Series, actual: pd.Series) -> Union[float, Except
416
416
  test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
417
417
 
418
418
  # Calculate the PSI
419
- return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
419
+ ratio = np.where(test_distribution > 0, train_distribution / test_distribution, 1)
420
+ return np.sum((train_distribution - test_distribution) * np.log(ratio))
420
421
  except Exception as e:
421
422
  return e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.89a1
3
+ Version: 1.2.90
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,10 +1,10 @@
1
- upgini/__about__.py,sha256=zOnCJXwGq7WXQ73_SrUBkMqGfV00s4WlXFUPNyejNQ8,25
1
+ upgini/__about__.py,sha256=GHc4XyRcf-LRcunv2-fpap4slj_PhG6QeOQqttDwIno,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=fRtqSkXNONLnPe6cCL967GMt349FTIpXzy_u8LUKncw,35354
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
6
  upgini/features_enricher.py,sha256=rieH8wjC1c_q2LYZoju8KZyshokNzFpwVtrCtG88w3s,215940
7
- upgini/http.py,sha256=6Qcepv0tDC72mBBJxYHnA2xqw6QwFaKrXN8o4vju8Es,44372
7
+ upgini/http.py,sha256=4i7fQwrwU3WzDUOWzrgR-4C8eJwj_5dBwRAR-UjUtlc,44345
8
8
  upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
9
9
  upgini/metrics.py,sha256=zIOaiyfQLedU9Fk4877drnlWh-KiImSkZpPeiq6Xr1E,45295
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
@@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml
66
66
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
67
67
  upgini/utils/sklearn_ext.py,sha256=jLJWAKkqQinV15Z4y1ZnsN3c-fKFwXTsprs00COnyVU,49315
68
68
  upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
69
- upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,16832
69
+ upgini/utils/target_utils.py,sha256=mVZ8wrkBb-tzEnVZwZw0m-Y0Sojb5t-wIsACRH05nIw,16890
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.89a1.dist-info/METADATA,sha256=d9XvUcHoqSr2RzIpqLR42x1bffkKnr7PyT6iB6kZGYQ,49164
74
- upgini-1.2.89a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.89a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.89a1.dist-info/RECORD,,
73
+ upgini-1.2.90.dist-info/METADATA,sha256=QWKn1q4NNZEH8k41xW03uvPmUSjwb-2uFH_Asecnr44,49162
74
+ upgini-1.2.90.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.90.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.90.dist-info/RECORD,,