upgini 1.2.80__py3-none-any.whl → 1.2.81__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/binary.py +2 -2
- upgini/autofe/timeseries/volatility.py +6 -4
- upgini/features_enricher.py +155 -91
- upgini/http.py +21 -21
- upgini/mdc/__init__.py +1 -1
- upgini/metadata.py +1 -1
- upgini/metrics.py +289 -228
- upgini/resource_bundle/strings.properties +1 -1
- upgini/search_task.py +1 -0
- upgini/utils/display_utils.py +12 -7
- upgini/utils/target_utils.py +9 -6
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/METADATA +3 -1
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/RECORD +16 -16
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/WHEEL +0 -0
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/licenses/LICENSE +0 -0
upgini/http.py
CHANGED
@@ -20,7 +20,7 @@ import jwt
|
|
20
20
|
# import pandas as pd
|
21
21
|
import requests
|
22
22
|
from pydantic import BaseModel
|
23
|
-
from pythonjsonlogger import jsonlogger
|
23
|
+
from pythonjsonlogger import json as jsonlogger
|
24
24
|
from requests.exceptions import RequestException
|
25
25
|
|
26
26
|
from upgini.__about__ import __version__
|
@@ -459,19 +459,19 @@ class _RestClient:
|
|
459
459
|
content = file.read()
|
460
460
|
md5_hash.update(content)
|
461
461
|
digest = md5_hash.hexdigest()
|
462
|
-
metadata_with_md5 = metadata.
|
462
|
+
metadata_with_md5 = metadata.model_copy(update={"checksumMD5": digest})
|
463
463
|
|
464
464
|
# digest_sha256 = hashlib.sha256(
|
465
465
|
# pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
|
466
466
|
# ).hexdigest()
|
467
467
|
digest_sha256 = self.compute_file_digest(file_path)
|
468
|
-
metadata_with_md5 = metadata_with_md5.
|
468
|
+
metadata_with_md5 = metadata_with_md5.model_copy(update={"digest": digest_sha256})
|
469
469
|
|
470
470
|
with open(file_path, "rb") as file:
|
471
471
|
files = {
|
472
472
|
"metadata": (
|
473
473
|
"metadata.json",
|
474
|
-
metadata_with_md5.
|
474
|
+
metadata_with_md5.model_dump_json(exclude_none=True).encode(),
|
475
475
|
"application/json",
|
476
476
|
),
|
477
477
|
"tracking": (
|
@@ -481,7 +481,7 @@ class _RestClient:
|
|
481
481
|
),
|
482
482
|
"metrics": (
|
483
483
|
"metrics.json",
|
484
|
-
metrics.
|
484
|
+
metrics.model_dump_json(exclude_none=True).encode(),
|
485
485
|
"application/json",
|
486
486
|
),
|
487
487
|
"file": (metadata_with_md5.name, file, "application/octet-stream"),
|
@@ -489,7 +489,7 @@ class _RestClient:
|
|
489
489
|
if search_customization is not None:
|
490
490
|
files["customization"] = (
|
491
491
|
"customization.json",
|
492
|
-
search_customization.
|
492
|
+
search_customization.model_dump_json(exclude_none=True).encode(),
|
493
493
|
"application/json",
|
494
494
|
)
|
495
495
|
additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
|
@@ -504,7 +504,7 @@ class _RestClient:
|
|
504
504
|
def check_uploaded_file_v2(self, trace_id: str, file_upload_id: str, metadata: FileMetadata) -> bool:
|
505
505
|
api_path = self.CHECK_UPLOADED_FILE_URL_FMT_V2.format(file_upload_id)
|
506
506
|
response = self._with_unauth_retry(
|
507
|
-
lambda: self._send_post_req(api_path, trace_id, metadata.
|
507
|
+
lambda: self._send_post_req(api_path, trace_id, metadata.model_dump_json(exclude_none=True))
|
508
508
|
)
|
509
509
|
return bool(response)
|
510
510
|
|
@@ -518,11 +518,11 @@ class _RestClient:
|
|
518
518
|
) -> SearchTaskResponse:
|
519
519
|
api_path = self.INITIAL_SEARCH_WITHOUT_UPLOAD_URI_FMT_V2.format(file_upload_id)
|
520
520
|
files = {
|
521
|
-
"metadata": ("metadata.json", metadata.
|
522
|
-
"metrics": ("metrics.json", metrics.
|
521
|
+
"metadata": ("metadata.json", metadata.model_dump_json(exclude_none=True).encode(), "application/json"),
|
522
|
+
"metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
|
523
523
|
}
|
524
524
|
if search_customization is not None:
|
525
|
-
files["customization"] = search_customization.
|
525
|
+
files["customization"] = search_customization.model_dump_json(exclude_none=True).encode()
|
526
526
|
additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
|
527
527
|
response = self._with_unauth_retry(
|
528
528
|
lambda: self._send_post_file_req_v2(
|
@@ -548,19 +548,19 @@ class _RestClient:
|
|
548
548
|
content = file.read()
|
549
549
|
md5_hash.update(content)
|
550
550
|
digest = md5_hash.hexdigest()
|
551
|
-
metadata_with_md5 = metadata.
|
551
|
+
metadata_with_md5 = metadata.model_copy(update={"checksumMD5": digest})
|
552
552
|
|
553
553
|
# digest_sha256 = hashlib.sha256(
|
554
554
|
# pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
|
555
555
|
# ).hexdigest()
|
556
556
|
digest_sha256 = self.compute_file_digest(file_path)
|
557
|
-
metadata_with_md5 = metadata_with_md5.
|
557
|
+
metadata_with_md5 = metadata_with_md5.model_copy(update={"digest": digest_sha256})
|
558
558
|
|
559
559
|
with open(file_path, "rb") as file:
|
560
560
|
files = {
|
561
561
|
"metadata": (
|
562
562
|
"metadata.json",
|
563
|
-
metadata_with_md5.
|
563
|
+
metadata_with_md5.model_dump_json(exclude_none=True).encode(),
|
564
564
|
"application/json",
|
565
565
|
),
|
566
566
|
"tracking": (
|
@@ -570,7 +570,7 @@ class _RestClient:
|
|
570
570
|
),
|
571
571
|
"metrics": (
|
572
572
|
"metrics.json",
|
573
|
-
metrics.
|
573
|
+
metrics.model_dump_json(exclude_none=True).encode(),
|
574
574
|
"application/json",
|
575
575
|
),
|
576
576
|
"file": (metadata_with_md5.name, file, "application/octet-stream"),
|
@@ -578,7 +578,7 @@ class _RestClient:
|
|
578
578
|
if search_customization is not None:
|
579
579
|
files["customization"] = (
|
580
580
|
"customization.json",
|
581
|
-
search_customization.
|
581
|
+
search_customization.model_dump_json(exclude_none=True).encode(),
|
582
582
|
"application/json",
|
583
583
|
)
|
584
584
|
|
@@ -602,11 +602,11 @@ class _RestClient:
|
|
602
602
|
) -> SearchTaskResponse:
|
603
603
|
api_path = self.VALIDATION_SEARCH_WITHOUT_UPLOAD_URI_FMT_V2.format(file_upload_id, initial_search_task_id)
|
604
604
|
files = {
|
605
|
-
"metadata": ("metadata.json", metadata.
|
606
|
-
"metrics": ("metrics.json", metrics.
|
605
|
+
"metadata": ("metadata.json", metadata.model_dump_json(exclude_none=True).encode(), "application/json"),
|
606
|
+
"metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
|
607
607
|
}
|
608
608
|
if search_customization is not None:
|
609
|
-
files["customization"] = search_customization.
|
609
|
+
files["customization"] = search_customization.model_dump_json(exclude_none=True).encode()
|
610
610
|
additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
|
611
611
|
response = self._with_unauth_retry(
|
612
612
|
lambda: self._send_post_file_req_v2(
|
@@ -670,7 +670,7 @@ class _RestClient:
|
|
670
670
|
"file": (metadata.name, file, "application/octet-stream"),
|
671
671
|
"metadata": (
|
672
672
|
"metadata.json",
|
673
|
-
metadata.
|
673
|
+
metadata.model_dump_json(exclude_none=True).encode(),
|
674
674
|
"application/json",
|
675
675
|
),
|
676
676
|
}
|
@@ -682,12 +682,12 @@ class _RestClient:
|
|
682
682
|
def get_search_file_metadata(self, search_task_id: str, trace_id: str) -> FileMetadata:
|
683
683
|
api_path = self.SEARCH_FILE_METADATA_URI_FMT_V2.format(search_task_id)
|
684
684
|
response = self._with_unauth_retry(lambda: self._send_get_req(api_path, trace_id))
|
685
|
-
return FileMetadata.
|
685
|
+
return FileMetadata.model_validate(response)
|
686
686
|
|
687
687
|
def get_provider_search_metadata_v3(self, provider_search_task_id: str, trace_id: str) -> ProviderTaskMetadataV2:
|
688
688
|
api_path = self.SEARCH_TASK_METADATA_FMT_V3.format(provider_search_task_id)
|
689
689
|
response = self._with_unauth_retry(lambda: self._send_get_req(api_path, trace_id))
|
690
|
-
return ProviderTaskMetadataV2.
|
690
|
+
return ProviderTaskMetadataV2.model_validate(response)
|
691
691
|
|
692
692
|
def get_current_transform_usage(self, trace_id) -> TransformUsage:
|
693
693
|
track_metrics = get_track_metrics(self.client_ip, self.client_visitorid)
|
upgini/mdc/__init__.py
CHANGED