warpzone-sdk 15.0.0.dev14__py3-none-any.whl → 15.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warpzone/db/client.py +71 -16
- warpzone/deltastorage/store.py +14 -4
- warpzone/deltastorage/table.py +17 -4
- warpzone/function/integrations.py +1 -1
- warpzone/monitor/logs.py +1 -0
- warpzone/monitor/traces.py +1 -0
- warpzone/tablestorage/db/client.py +0 -3
- {warpzone_sdk-15.0.0.dev14.dist-info → warpzone_sdk-15.1.0.dist-info}/METADATA +1 -1
- {warpzone_sdk-15.0.0.dev14.dist-info → warpzone_sdk-15.1.0.dist-info}/RECORD +10 -10
- {warpzone_sdk-15.0.0.dev14.dist-info → warpzone_sdk-15.1.0.dist-info}/WHEEL +0 -0
warpzone/db/client.py
CHANGED
|
@@ -32,29 +32,51 @@ class WarpzoneDatabaseClient:
|
|
|
32
32
|
self,
|
|
33
33
|
path: str,
|
|
34
34
|
storage_options: dict[str, str] | None = None,
|
|
35
|
+
table_prefix: str = "",
|
|
35
36
|
):
|
|
36
37
|
self.store = Store(
|
|
37
38
|
path=path,
|
|
38
39
|
storage_options=storage_options,
|
|
39
40
|
)
|
|
41
|
+
self.table_prefix = table_prefix
|
|
40
42
|
|
|
41
43
|
@classmethod
|
|
42
|
-
def
|
|
44
|
+
def from_resource_name(
|
|
43
45
|
cls,
|
|
44
46
|
storage_account: str,
|
|
45
47
|
container_name: str = "datasets",
|
|
48
|
+
sub_path: str = "",
|
|
49
|
+
table_prefix: str = "",
|
|
46
50
|
credential: (
|
|
47
51
|
AzureNamedKeyCredential | AzureSasCredential | TokenCredential
|
|
48
52
|
) = DefaultAzureCredential(),
|
|
49
53
|
):
|
|
54
|
+
"""Create a WarpzoneDatabaseClient from resource name (storage account).
|
|
55
|
+
This assumes the path of the delta lake is of the form:
|
|
56
|
+
abfss://{container_name}@{storage_account}.dfs.core.windows.net/{sub_path}
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
storage_account (str): Storage account name.
|
|
60
|
+
container_name (str, optional): Container name. Defaults to "datasets".
|
|
61
|
+
sub_path (str, optional): Sub-path within the container. Defaults to "".
|
|
62
|
+
table_prefix (str, optional): Table prefix to use (e.g. `mz_` for archive).
|
|
63
|
+
Defaults to "".
|
|
64
|
+
credential (optional): Azure credential to use.
|
|
65
|
+
Defaults to DefaultAzureCredential().
|
|
66
|
+
"""
|
|
50
67
|
path = f"abfss://{container_name}@{storage_account}.dfs.core.windows.net"
|
|
68
|
+
if sub_path:
|
|
69
|
+
path += f"/{sub_path}"
|
|
70
|
+
|
|
51
71
|
token = credential.get_token("https://storage.azure.com/.default")
|
|
52
72
|
storage_options = {
|
|
53
73
|
"account_name": storage_account,
|
|
54
74
|
"token": token.token,
|
|
55
75
|
}
|
|
56
76
|
|
|
57
|
-
return cls(
|
|
77
|
+
return cls(
|
|
78
|
+
path=path, storage_options=storage_options, table_prefix=table_prefix
|
|
79
|
+
)
|
|
58
80
|
|
|
59
81
|
def get_unit_and_multiple(self, timedelta: pd.Timedelta) -> tuple[str | None, int]:
|
|
60
82
|
"""
|
|
@@ -148,7 +170,41 @@ class WarpzoneDatabaseClient:
|
|
|
148
170
|
time_interval: Optional[pdz.TimeInterval] = None,
|
|
149
171
|
time_travel: Optional[pdz.TimeTravel] = None,
|
|
150
172
|
filters: Optional[dict[str, object]] = None,
|
|
173
|
+
columns: Optional[list[str]] = None,
|
|
174
|
+
include_validity_period_columns: bool = False,
|
|
175
|
+
include_generated_columns: bool = False,
|
|
151
176
|
) -> pd.DataFrame:
|
|
177
|
+
"""Query table.
|
|
178
|
+
Query defaults are set to match old Table Storage client behavior.
|
|
179
|
+
Time travel defaults to "as of now"
|
|
180
|
+
Validity period columns are dropped by default.
|
|
181
|
+
Generated columns are dropped by default.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
table_name (str): Name of the table
|
|
185
|
+
time_interval (Optional[pdz.TimeInterval], optional): Time interval for the
|
|
186
|
+
query. Defaults to None.
|
|
187
|
+
time_travel (Optional[pdz.TimeTravel], optional): Time travel information.
|
|
188
|
+
Defaults to None.
|
|
189
|
+
filters (Optional[dict[str, object]], optional): Filters to apply to the
|
|
190
|
+
query.
|
|
191
|
+
Defaults to None.
|
|
192
|
+
columns (Optional[list[str]], optional): Columns to return.
|
|
193
|
+
Selecting columns can significantly improve query performance.
|
|
194
|
+
Defaults to None, meaning all columns will be returned.
|
|
195
|
+
include_validity_period_columns (bool, optional): Whether to include
|
|
196
|
+
validity period columns in the result;
|
|
197
|
+
(`valid_from_time_utc`, `valid_to_time_utc`).
|
|
198
|
+
Defaults to False. If set to True while using `columns`-argument, make sure
|
|
199
|
+
to include these columns in the `columns`-list.
|
|
200
|
+
include_generated_columns (bool, optional): Whether to include generated
|
|
201
|
+
columns in the result; (e.g. `valid_from_time_utc`, `valid_to_time_utc`).
|
|
202
|
+
Defaults to False. If set to True while using `columns`-argument, make sure
|
|
203
|
+
to include these columns in the `columns`-list.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
pd.DataFrame: The result of the query.
|
|
207
|
+
"""
|
|
152
208
|
# We do 'camelCaseToSnake_case' conversion here because the old
|
|
153
209
|
# naming convention used in WarpZone was CamelCase, while the new
|
|
154
210
|
# naming convention is snake_case. The goal is to remove this
|
|
@@ -164,7 +220,6 @@ class WarpzoneDatabaseClient:
|
|
|
164
220
|
hyper_slice.append((key, "in", value))
|
|
165
221
|
else:
|
|
166
222
|
hyper_slice.append((key, "=", value))
|
|
167
|
-
|
|
168
223
|
if time_interval:
|
|
169
224
|
hyper_slice.append(("time_utc", ">=", time_interval.left))
|
|
170
225
|
hyper_slice.append(("time_utc", "<=", time_interval.right))
|
|
@@ -180,9 +235,9 @@ class WarpzoneDatabaseClient:
|
|
|
180
235
|
valid_from_column="valid_from_time_utc",
|
|
181
236
|
valid_to_column="valid_to_time_utc",
|
|
182
237
|
)
|
|
183
|
-
hyper_slice.extend(tt_filter)
|
|
184
238
|
|
|
185
|
-
|
|
239
|
+
hyper_slice.extend(tt_filter)
|
|
240
|
+
pl_df = table.read(hyper_slice=HyperSlice(hyper_slice), columns=columns)
|
|
186
241
|
|
|
187
242
|
pd_df = pl_df.to_pandas()
|
|
188
243
|
|
|
@@ -191,16 +246,16 @@ class WarpzoneDatabaseClient:
|
|
|
191
246
|
for col in pd_df.select_dtypes(include=["datetime", "datetimetz"]).columns:
|
|
192
247
|
pd_df[col] = pd_df[col].dt.floor("s").dt.as_unit("ns")
|
|
193
248
|
|
|
194
|
-
#
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
249
|
+
# Drop generated columns
|
|
250
|
+
if not include_generated_columns:
|
|
251
|
+
generated_cols = []
|
|
252
|
+
for field in table.schema().fields:
|
|
253
|
+
if field.generated_as is not None:
|
|
254
|
+
generated_cols.append(field.column_name)
|
|
255
|
+
pd_df = pd_df.drop(columns=generated_cols)
|
|
256
|
+
|
|
257
|
+
# Drop valid-from/to columns
|
|
258
|
+
if not include_validity_period_columns:
|
|
259
|
+
pd_df = pd_df.drop(columns=["valid_from_time_utc", "valid_to_time_utc"])
|
|
205
260
|
|
|
206
261
|
return pd_df
|
warpzone/deltastorage/store.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
|
2
2
|
|
|
3
3
|
import deltalake as dl
|
|
4
4
|
import obstore as obs
|
|
5
|
+
from deltalake.exceptions import TableNotFoundError as DeltaTableNotFoundError
|
|
5
6
|
|
|
6
7
|
from .schema import Schema
|
|
7
8
|
from .table import Table
|
|
@@ -76,10 +77,19 @@ class Store:
|
|
|
76
77
|
Args:
|
|
77
78
|
table_name (str): Table name
|
|
78
79
|
"""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
# For some reason `deltalake.DeltaTable.is_deltatable()` can be very slow.
|
|
81
|
+
# deltalake has an issue open about this:
|
|
82
|
+
# https://github.com/delta-io/delta-rs/issues/3942
|
|
83
|
+
# For now we catch the exception when trying to load the table
|
|
84
|
+
try:
|
|
85
|
+
_ = dl.DeltaTable(
|
|
86
|
+
table_uri=self._get_table_uri(table_name),
|
|
87
|
+
storage_options=self.storage_options,
|
|
88
|
+
without_files=True,
|
|
89
|
+
)
|
|
90
|
+
except DeltaTableNotFoundError:
|
|
91
|
+
return False
|
|
92
|
+
return True
|
|
83
93
|
|
|
84
94
|
def create_table(
|
|
85
95
|
self,
|
warpzone/deltastorage/table.py
CHANGED
|
@@ -42,6 +42,7 @@ class Table:
|
|
|
42
42
|
self.storage_options = storage_options
|
|
43
43
|
|
|
44
44
|
self.table_name = self.table_uri.split("/")[-1]
|
|
45
|
+
self._delta_table = None
|
|
45
46
|
|
|
46
47
|
def __repr__(self):
|
|
47
48
|
return f"Table('{self.table_name}')"
|
|
@@ -49,12 +50,21 @@ class Table:
|
|
|
49
50
|
@property
|
|
50
51
|
def delta_table(self) -> dl.DeltaTable:
|
|
51
52
|
"""Get the Delta table object.
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
As the `Table`-class is lazily initialized,
|
|
54
|
+
the `delta_table`-property is initialized on the first access
|
|
55
|
+
and saved for future use to minimize overhead.
|
|
56
|
+
It is *important* that this property is only initialized within
|
|
57
|
+
a lock when doing concurrent reads/writes
|
|
58
|
+
and not initialized when creating the `Table`-object.
|
|
59
|
+
This is important because using the same instance can lead to transaction
|
|
54
60
|
issues in delta as DeltaTable uses metadata (transaction id) from
|
|
55
61
|
the first time the object is instantiated.
|
|
56
62
|
"""
|
|
57
|
-
|
|
63
|
+
if self._delta_table is None:
|
|
64
|
+
self._delta_table = dl.DeltaTable(
|
|
65
|
+
self.table_uri, storage_options=self.storage_options
|
|
66
|
+
)
|
|
67
|
+
return self._delta_table
|
|
58
68
|
|
|
59
69
|
def partition_cols(self) -> list[str]:
|
|
60
70
|
"""Get the partition columns of the table"""
|
|
@@ -65,7 +75,9 @@ class Table:
|
|
|
65
75
|
pa_schema = pa.schema(self.delta_table.schema())
|
|
66
76
|
return Schema.from_arrow(pa_schema)
|
|
67
77
|
|
|
68
|
-
def read(
|
|
78
|
+
def read(
|
|
79
|
+
self, hyper_slice: Optional[HyperSlice] = None, columns=None
|
|
80
|
+
) -> pl.DataFrame:
|
|
69
81
|
"""Read from Delta table
|
|
70
82
|
|
|
71
83
|
Args:
|
|
@@ -88,6 +100,7 @@ class Table:
|
|
|
88
100
|
partition_filters = [f for f in hyper_slice if f[0] in partition_cols]
|
|
89
101
|
|
|
90
102
|
pyarrow_table_existing_data = delta_table.to_pyarrow_table(
|
|
103
|
+
columns=columns,
|
|
91
104
|
partitions=partition_filters,
|
|
92
105
|
filters=file_filters,
|
|
93
106
|
)
|
|
@@ -62,7 +62,7 @@ def get_db_client() -> WarpzoneDatabaseClient:
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
def get_delta_db_client() -> WarpzoneDeltaDatabaseClient:
|
|
65
|
-
db_client = WarpzoneDeltaDatabaseClient.
|
|
65
|
+
db_client = WarpzoneDeltaDatabaseClient.from_resource_name(
|
|
66
66
|
os.environ["OPERATIONAL_DATA_STORAGE_ACCOUNT"],
|
|
67
67
|
credential=_credential,
|
|
68
68
|
)
|
warpzone/monitor/logs.py
CHANGED
warpzone/monitor/traces.py
CHANGED
|
@@ -12,13 +12,10 @@ from azure.identity import DefaultAzureCredential
|
|
|
12
12
|
|
|
13
13
|
from warpzone.blobstorage.client import WarpzoneBlobClient
|
|
14
14
|
from warpzone.healthchecks import HealthCheckResult, check_health_of
|
|
15
|
-
from warpzone.monitor import traces
|
|
16
15
|
from warpzone.tablestorage.db import base_client
|
|
17
16
|
from warpzone.tablestorage.db.table_config import DataType, TableMetadata
|
|
18
17
|
from warpzone.tablestorage.tables.client import WarpzoneTableClient
|
|
19
18
|
|
|
20
|
-
tracer = traces.get_tracer(__name__)
|
|
21
|
-
|
|
22
19
|
|
|
23
20
|
class WarpzoneDatabaseClient:
|
|
24
21
|
"""Class to interact with Azure Table Storage for database queries
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: warpzone-sdk
|
|
3
|
-
Version: 15.
|
|
3
|
+
Version: 15.1.0
|
|
4
4
|
Summary: The main objective of this package is to centralize logic used to interact with Azure Functions, Azure Service Bus and Azure Table Storage
|
|
5
5
|
Author: Team Enigma
|
|
6
6
|
Author-email: enigma@energinet.dk
|
|
@@ -2,21 +2,21 @@ warpzone/__init__.py,sha256=Ay7znIadokCdHHqsPlWAqhdg-zdWhXHNtxVinViYm7o,1533
|
|
|
2
2
|
warpzone/blobstorage/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
|
|
3
3
|
warpzone/blobstorage/client.py,sha256=YwDV83acoCeHS_D_ydsTYwnf56rSTy9CKpsxqeoXmBs,4638
|
|
4
4
|
warpzone/db/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
|
|
5
|
-
warpzone/db/client.py,sha256=
|
|
5
|
+
warpzone/db/client.py,sha256=tqY_ANThnO72rBaB4sydigLhlSFwCmadg_6rVZwjO8M,9688
|
|
6
6
|
warpzone/deltastorage/__init__.py,sha256=cV8sGT2N_N5Z-E179NMW5O7q3FUDrb3j5f-yVNlNPv0,152
|
|
7
7
|
warpzone/deltastorage/data_types.py,sha256=tWjLO_0ig7-tYxSHShvrd0znA7FFDnS-wBuFClUQG2U,2059
|
|
8
8
|
warpzone/deltastorage/generated_columns.py,sha256=Dr_bihM7v9JKCgBXxc3JQC2P2mUGobXsReRKOl6jDO4,4765
|
|
9
9
|
warpzone/deltastorage/lock_client.py,sha256=z19ub2ahsUHdcriRmgLOxO6zGOGadxF6NmNAhcGp9Lg,9024
|
|
10
10
|
warpzone/deltastorage/schema.py,sha256=AhkJnsB9oX3HVwoCUWcYSdD1zwouT2wj5XydXgU5WEA,4483
|
|
11
11
|
warpzone/deltastorage/slicing.py,sha256=r4Fbg8HHBkZlrYEC847yq86caCTTE02JzpnY_TqI_MA,638
|
|
12
|
-
warpzone/deltastorage/store.py,sha256=
|
|
13
|
-
warpzone/deltastorage/table.py,sha256=
|
|
12
|
+
warpzone/deltastorage/store.py,sha256=LwoDXk0xtQuzfMbqv_ldIRh-9RzA-Hx9NRhtBmlbWB8,4812
|
|
13
|
+
warpzone/deltastorage/table.py,sha256=_5tBQEKnimNWsAWyVhGjuX_3pllqzakkMrvALw6-2Qw,5713
|
|
14
14
|
warpzone/enums/__init__.py,sha256=Gvd-EeZc5jWu5WdusYe3i3Zyt1v85CljBK_EJu7EwZk,24
|
|
15
15
|
warpzone/enums/topicenum.py,sha256=cj9F7kYzwvDeHN5jJXwgnHXTh6E64VwRNpFepNthIyk,213
|
|
16
16
|
warpzone/function/__init__.py,sha256=rJOZBpWsUgjMc7YtXMJ1rLGm45KB1AhDJ_Y2ISiSISc,35
|
|
17
17
|
warpzone/function/checks.py,sha256=B9YqThymf16ac_fVAYKilv20ru5v9nwXgHlbxYIaG98,1018
|
|
18
18
|
warpzone/function/functionize.py,sha256=bSV0QvwKbD9Vo3a_8cc1rgV2rzTdMMvidinyXItBfvs,2128
|
|
19
|
-
warpzone/function/integrations.py,sha256=
|
|
19
|
+
warpzone/function/integrations.py,sha256=Law-0TI_tbm8rq5XXuilDH69_0LhoqaZhDbTL498Qik,4016
|
|
20
20
|
warpzone/function/monitor.py,sha256=xD13d4795a9qgGphOywFl4sOXRQjKypiL2ozQFOseqQ,1862
|
|
21
21
|
warpzone/function/process.py,sha256=nbUVywM8ChfUwuaqFisgaD98aNRgeZkK4g5sbtuBdRs,2339
|
|
22
22
|
warpzone/function/processors/__init__.py,sha256=DhIdSWLBcIeSO8IJdxPqGIhgwwnkDN6_Xqwy93BCLeA,46
|
|
@@ -28,8 +28,8 @@ warpzone/function/types.py,sha256=5m2hRrnLC3eqIlAH5-MM9_wKjMZ6lYawZtCOVStyFuY,72
|
|
|
28
28
|
warpzone/healthchecks/__init__.py,sha256=9gc_Mt2szs8sDSwy0V4l3JZ6d9hX41xTpZCkDP2qsY4,2108
|
|
29
29
|
warpzone/healthchecks/model.py,sha256=mM7DnrirLbUpBPPfi82MUPP654D0eOR2_F65TmzsPD0,1187
|
|
30
30
|
warpzone/monitor/__init__.py,sha256=gXT2cxR4tlZER54zd7D49ZQBVyitLaqj13_cUoILuyM,109
|
|
31
|
-
warpzone/monitor/logs.py,sha256=
|
|
32
|
-
warpzone/monitor/traces.py,sha256=
|
|
31
|
+
warpzone/monitor/logs.py,sha256=q3SUQCtG1ii0B9GkVs2l8kgZ5b5bI2qI6L0SoOW2QTY,3147
|
|
32
|
+
warpzone/monitor/traces.py,sha256=Xc_po1LxJFy5jtNWxIVphIInl_d89Zw3Rb21PsdQhQA,4170
|
|
33
33
|
warpzone/servicebus/data/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
|
|
34
34
|
warpzone/servicebus/data/client.py,sha256=zECS3JwedhYnDk8PntYgIYpBF_uu9YN38KzpPFK7CKs,6511
|
|
35
35
|
warpzone/servicebus/events/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
|
|
@@ -37,7 +37,7 @@ warpzone/servicebus/events/client.py,sha256=8v8XsF-2RwzKIi_93IzR_eR-BZTGXXHSuV4P
|
|
|
37
37
|
warpzone/servicebus/events/triggers.py,sha256=_QuPTBbje7LrBoz0qhhgrtDZOcE6x1S9GNu-WJUQ8bY,2626
|
|
38
38
|
warpzone/tablestorage/db/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
|
|
39
39
|
warpzone/tablestorage/db/base_client.py,sha256=ropKO6z0UXqBl38NuGYV4VZ_ZFm4w1d84ReOLYoBKLY,2376
|
|
40
|
-
warpzone/tablestorage/db/client.py,sha256
|
|
40
|
+
warpzone/tablestorage/db/client.py,sha256=-OchZI7x--Z9msNp912ggZ_sNGwNDXO-i7VGV6INSrg,6587
|
|
41
41
|
warpzone/tablestorage/db/table_config.py,sha256=PC45dnr3vVMVr8ktu3GWoH8u3JKJaCRAvMCroUaC1NE,1336
|
|
42
42
|
warpzone/tablestorage/tables/__init__.py,sha256=l_8wElG1oam39fmXqIXDAnCYqbKXEyX1G0cD6JCCv4s,78
|
|
43
43
|
warpzone/tablestorage/tables/client.py,sha256=chvmTz5S8M1-IWYgfJHyTCwLZVsVzlchMuCYwHzQhCE,3636
|
|
@@ -52,6 +52,6 @@ warpzone/tools/copy.py,sha256=5fddotMZkXZO8avzUbGOhvs0cp8mce95pNpy0oPVjnQ,2596
|
|
|
52
52
|
warpzone/transform/__init__.py,sha256=ruGa7tl-v4ndlWpULE1jSGU_a4_iRc3V6eyNr5xKP9E,27
|
|
53
53
|
warpzone/transform/data.py,sha256=Abb8PcrgMbbNCJkkIUdtrTHdlY0OfXid387qw1nDpFY,2362
|
|
54
54
|
warpzone/transform/schema.py,sha256=nbSQtDMvXkyqGKuwhuFCF0WsEDsaNyoPYpMKvbsKlv8,2423
|
|
55
|
-
warpzone_sdk-15.
|
|
56
|
-
warpzone_sdk-15.
|
|
57
|
-
warpzone_sdk-15.
|
|
55
|
+
warpzone_sdk-15.1.0.dist-info/METADATA,sha256=q0pdO2gAFw7uT3fTYGgBfLeFDOyQstIvEQR-3qfKZb8,7279
|
|
56
|
+
warpzone_sdk-15.1.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
57
|
+
warpzone_sdk-15.1.0.dist-info/RECORD,,
|
|
File without changes
|