warpzone-sdk 15.0.0.dev14__py3-none-any.whl → 15.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
warpzone/db/client.py CHANGED
@@ -32,29 +32,51 @@ class WarpzoneDatabaseClient:
32
32
  self,
33
33
  path: str,
34
34
  storage_options: dict[str, str] | None = None,
35
+ table_prefix: str = "",
35
36
  ):
36
37
  self.store = Store(
37
38
  path=path,
38
39
  storage_options=storage_options,
39
40
  )
41
+ self.table_prefix = table_prefix
40
42
 
41
43
  @classmethod
42
- def from_storage_account(
44
+ def from_resource_name(
43
45
  cls,
44
46
  storage_account: str,
45
47
  container_name: str = "datasets",
48
+ sub_path: str = "",
49
+ table_prefix: str = "",
46
50
  credential: (
47
51
  AzureNamedKeyCredential | AzureSasCredential | TokenCredential
48
52
  ) = DefaultAzureCredential(),
49
53
  ):
54
+ """Create a WarpzoneDatabaseClient from resource name (storage account).
55
+ This assumes the path of the delta lake is of the form:
56
+ abfss://{container_name}@{storage_account}.dfs.core.windows.net/{sub_path}
57
+
58
+ Args:
59
+ storage_account (str): Storage account name.
60
+ container_name (str, optional): Container name. Defaults to "datasets".
61
+ sub_path (str, optional): Sub-path within the container. Defaults to "".
62
+ table_prefix (str, optional): Table prefix to use (e.g. `mz_` for archive).
63
+ Defaults to "".
64
+ credential (optional): Azure credential to use.
65
+ Defaults to DefaultAzureCredential().
66
+ """
50
67
  path = f"abfss://{container_name}@{storage_account}.dfs.core.windows.net"
68
+ if sub_path:
69
+ path += f"/{sub_path}"
70
+
51
71
  token = credential.get_token("https://storage.azure.com/.default")
52
72
  storage_options = {
53
73
  "account_name": storage_account,
54
74
  "token": token.token,
55
75
  }
56
76
 
57
- return cls(path=path, storage_options=storage_options)
77
+ return cls(
78
+ path=path, storage_options=storage_options, table_prefix=table_prefix
79
+ )
58
80
 
59
81
  def get_unit_and_multiple(self, timedelta: pd.Timedelta) -> tuple[str | None, int]:
60
82
  """
@@ -148,7 +170,41 @@ class WarpzoneDatabaseClient:
148
170
  time_interval: Optional[pdz.TimeInterval] = None,
149
171
  time_travel: Optional[pdz.TimeTravel] = None,
150
172
  filters: Optional[dict[str, object]] = None,
173
+ columns: Optional[list[str]] = None,
174
+ include_validity_period_columns: bool = False,
175
+ include_generated_columns: bool = False,
151
176
  ) -> pd.DataFrame:
177
+ """Query table.
178
+ Query defaults are set to match old Table Storage client behavior.
179
+ Time travel defaults to "as of now"
180
+ Validity period columns are dropped by default.
181
+ Generated columns are dropped by default.
182
+
183
+ Args:
184
+ table_name (str): Name of the table
185
+ time_interval (Optional[pdz.TimeInterval], optional): Time interval for the
186
+ query. Defaults to None.
187
+ time_travel (Optional[pdz.TimeTravel], optional): Time travel information.
188
+ Defaults to None.
189
+ filters (Optional[dict[str, object]], optional): Filters to apply to the
190
+ query.
191
+ Defaults to None.
192
+ columns (Optional[list[str]], optional): Columns to return.
193
+ Selecting columns can significantly improve query performance.
194
+ Defaults to None, meaning all columns will be returned.
195
+ include_validity_period_columns (bool, optional): Whether to include
196
+ validity period columns in the result;
197
+ (`valid_from_time_utc`, `valid_to_time_utc`).
198
+ Defaults to False. If set to True while using `columns`-argument, make sure
199
+ to include these columns in the `columns`-list.
200
+ include_generated_columns (bool, optional): Whether to include generated
201
+ columns in the result; (e.g. `valid_from_time_utc`, `valid_to_time_utc`).
202
+ Defaults to False. If set to True while using `columns`-argument, make sure
203
+ to include these columns in the `columns`-list.
204
+
205
+ Returns:
206
+ pd.DataFrame: The result of the query.
207
+ """
152
208
  # We do 'camelCaseToSnake_case' conversion here because the old
153
209
  # naming convention used in WarpZone was CamelCase, while the new
154
210
  # naming convention is snake_case. The goal is to remove this
@@ -164,7 +220,6 @@ class WarpzoneDatabaseClient:
164
220
  hyper_slice.append((key, "in", value))
165
221
  else:
166
222
  hyper_slice.append((key, "=", value))
167
-
168
223
  if time_interval:
169
224
  hyper_slice.append(("time_utc", ">=", time_interval.left))
170
225
  hyper_slice.append(("time_utc", "<=", time_interval.right))
@@ -180,9 +235,9 @@ class WarpzoneDatabaseClient:
180
235
  valid_from_column="valid_from_time_utc",
181
236
  valid_to_column="valid_to_time_utc",
182
237
  )
183
- hyper_slice.extend(tt_filter)
184
238
 
185
- pl_df = table.read(hyper_slice=HyperSlice(hyper_slice))
239
+ hyper_slice.extend(tt_filter)
240
+ pl_df = table.read(hyper_slice=HyperSlice(hyper_slice), columns=columns)
186
241
 
187
242
  pd_df = pl_df.to_pandas()
188
243
 
@@ -191,16 +246,16 @@ class WarpzoneDatabaseClient:
191
246
  for col in pd_df.select_dtypes(include=["datetime", "datetimetz"]).columns:
192
247
  pd_df[col] = pd_df[col].dt.floor("s").dt.as_unit("ns")
193
248
 
194
- # We remove the valid-from and valid-to columns, as well
195
- # as any generated columns, as this was not present
196
- # in the old solution (Azure Table Stroage)
197
- generated_cols = []
198
- for field in table.schema().fields:
199
- if field.generated_as is not None:
200
- generated_cols.append(field.column_name)
201
-
202
- pd_df = pd_df.drop(
203
- columns=["valid_from_time_utc", "valid_to_time_utc"] + generated_cols
204
- )
249
+ # Drop generated columns
250
+ if not include_generated_columns:
251
+ generated_cols = []
252
+ for field in table.schema().fields:
253
+ if field.generated_as is not None:
254
+ generated_cols.append(field.column_name)
255
+ pd_df = pd_df.drop(columns=generated_cols)
256
+
257
+ # Drop valid-from/to columns
258
+ if not include_validity_period_columns:
259
+ pd_df = pd_df.drop(columns=["valid_from_time_utc", "valid_to_time_utc"])
205
260
 
206
261
  return pd_df
@@ -2,6 +2,7 @@ import os
2
2
 
3
3
  import deltalake as dl
4
4
  import obstore as obs
5
+ from deltalake.exceptions import TableNotFoundError as DeltaTableNotFoundError
5
6
 
6
7
  from .schema import Schema
7
8
  from .table import Table
@@ -76,10 +77,19 @@ class Store:
76
77
  Args:
77
78
  table_name (str): Table name
78
79
  """
79
- return dl.DeltaTable.is_deltatable(
80
- table_uri=self._get_table_uri(table_name),
81
- storage_options=self.storage_options,
82
- )
80
+ # For some reason `deltalake.DeltaTable.is_deltatable()` can be very slow.
81
+ # deltalake has an issue open about this:
82
+ # https://github.com/delta-io/delta-rs/issues/3942
83
+ # For now we catch the exception when trying to load the table
84
+ try:
85
+ _ = dl.DeltaTable(
86
+ table_uri=self._get_table_uri(table_name),
87
+ storage_options=self.storage_options,
88
+ without_files=True,
89
+ )
90
+ except DeltaTableNotFoundError:
91
+ return False
92
+ return True
83
93
 
84
94
  def create_table(
85
95
  self,
@@ -42,6 +42,7 @@ class Table:
42
42
  self.storage_options = storage_options
43
43
 
44
44
  self.table_name = self.table_uri.split("/")[-1]
45
+ self._delta_table = None
45
46
 
46
47
  def __repr__(self):
47
48
  return f"Table('{self.table_name}')"
@@ -49,12 +50,21 @@ class Table:
49
50
  @property
50
51
  def delta_table(self) -> dl.DeltaTable:
51
52
  """Get the Delta table object.
52
- This is lazily computed to avoid using the same instance.
53
- This is *important* because using the same instance can lead to transaction
53
+ As the `Table`-class is lazily initialized,
54
+ the `delta_table`-property is initialized on the first access
55
+ and saved for future use to minimize overhead.
56
+ It is *important* that this property is only initialized within
57
+ a lock when doing concurrent reads/writes
58
+ and not initialized when creating the `Table`-object.
59
+ This is important because using the same instance can lead to transaction
54
60
  issues in delta as DeltaTable uses metadata (transaction id) from
55
61
  the first time the object is instantiated.
56
62
  """
57
- return dl.DeltaTable(self.table_uri, storage_options=self.storage_options)
63
+ if self._delta_table is None:
64
+ self._delta_table = dl.DeltaTable(
65
+ self.table_uri, storage_options=self.storage_options
66
+ )
67
+ return self._delta_table
58
68
 
59
69
  def partition_cols(self) -> list[str]:
60
70
  """Get the partition columns of the table"""
@@ -65,7 +75,9 @@ class Table:
65
75
  pa_schema = pa.schema(self.delta_table.schema())
66
76
  return Schema.from_arrow(pa_schema)
67
77
 
68
- def read(self, hyper_slice: Optional[HyperSlice] = None) -> pl.DataFrame:
78
+ def read(
79
+ self, hyper_slice: Optional[HyperSlice] = None, columns=None
80
+ ) -> pl.DataFrame:
69
81
  """Read from Delta table
70
82
 
71
83
  Args:
@@ -88,6 +100,7 @@ class Table:
88
100
  partition_filters = [f for f in hyper_slice if f[0] in partition_cols]
89
101
 
90
102
  pyarrow_table_existing_data = delta_table.to_pyarrow_table(
103
+ columns=columns,
91
104
  partitions=partition_filters,
92
105
  filters=file_filters,
93
106
  )
@@ -62,7 +62,7 @@ def get_db_client() -> WarpzoneDatabaseClient:
62
62
 
63
63
 
64
64
  def get_delta_db_client() -> WarpzoneDeltaDatabaseClient:
65
- db_client = WarpzoneDeltaDatabaseClient.from_storage_account(
65
+ db_client = WarpzoneDeltaDatabaseClient.from_resource_name(
66
66
  os.environ["OPERATIONAL_DATA_STORAGE_ACCOUNT"],
67
67
  credential=_credential,
68
68
  )
warpzone/monitor/logs.py CHANGED
@@ -34,6 +34,7 @@ LOGGING_IS_CONFIGURED = False
34
34
 
35
35
  def configure_logging():
36
36
  global LOGGING_IS_CONFIGURED
37
+ # Add thread locking to avoid race conditions during setup
37
38
  with _LOGGING_LOCK:
38
39
  if LOGGING_IS_CONFIGURED:
39
40
  # logging should only be set up once
@@ -23,6 +23,7 @@ TRACING_IS_CONFIGURED = False
23
23
 
24
24
  def configure_tracing():
25
25
  global TRACING_IS_CONFIGURED
26
+ # Add thread locking to avoid race conditions during setup
26
27
  with _TRACING_LOCK:
27
28
  if TRACING_IS_CONFIGURED:
28
29
  # tracing should only be set up once
@@ -12,13 +12,10 @@ from azure.identity import DefaultAzureCredential
12
12
 
13
13
  from warpzone.blobstorage.client import WarpzoneBlobClient
14
14
  from warpzone.healthchecks import HealthCheckResult, check_health_of
15
- from warpzone.monitor import traces
16
15
  from warpzone.tablestorage.db import base_client
17
16
  from warpzone.tablestorage.db.table_config import DataType, TableMetadata
18
17
  from warpzone.tablestorage.tables.client import WarpzoneTableClient
19
18
 
20
- tracer = traces.get_tracer(__name__)
21
-
22
19
 
23
20
  class WarpzoneDatabaseClient:
24
21
  """Class to interact with Azure Table Storage for database queries
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpzone-sdk
3
- Version: 15.0.0.dev14
3
+ Version: 15.1.0
4
4
  Summary: The main objective of this package is to centralize logic used to interact with Azure Functions, Azure Service Bus and Azure Table Storage
5
5
  Author: Team Enigma
6
6
  Author-email: enigma@energinet.dk
@@ -2,21 +2,21 @@ warpzone/__init__.py,sha256=Ay7znIadokCdHHqsPlWAqhdg-zdWhXHNtxVinViYm7o,1533
2
2
  warpzone/blobstorage/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
3
3
  warpzone/blobstorage/client.py,sha256=YwDV83acoCeHS_D_ydsTYwnf56rSTy9CKpsxqeoXmBs,4638
4
4
  warpzone/db/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
5
- warpzone/db/client.py,sha256=_LYIZIYwaii9dyTwpqoOCYbqzzcbJNgflzSvUqeTto8,6919
5
+ warpzone/db/client.py,sha256=tqY_ANThnO72rBaB4sydigLhlSFwCmadg_6rVZwjO8M,9688
6
6
  warpzone/deltastorage/__init__.py,sha256=cV8sGT2N_N5Z-E179NMW5O7q3FUDrb3j5f-yVNlNPv0,152
7
7
  warpzone/deltastorage/data_types.py,sha256=tWjLO_0ig7-tYxSHShvrd0znA7FFDnS-wBuFClUQG2U,2059
8
8
  warpzone/deltastorage/generated_columns.py,sha256=Dr_bihM7v9JKCgBXxc3JQC2P2mUGobXsReRKOl6jDO4,4765
9
9
  warpzone/deltastorage/lock_client.py,sha256=z19ub2ahsUHdcriRmgLOxO6zGOGadxF6NmNAhcGp9Lg,9024
10
10
  warpzone/deltastorage/schema.py,sha256=AhkJnsB9oX3HVwoCUWcYSdD1zwouT2wj5XydXgU5WEA,4483
11
11
  warpzone/deltastorage/slicing.py,sha256=r4Fbg8HHBkZlrYEC847yq86caCTTE02JzpnY_TqI_MA,638
12
- warpzone/deltastorage/store.py,sha256=bmWfIy5TXQU1G89-IuBrK4xZ5s7UtDz18QVuJ-ZECBw,4337
13
- warpzone/deltastorage/table.py,sha256=JE5rviTlmKICRG6u0dFxnLBzkCfT7y0I77pOkxXIXO0,5212
12
+ warpzone/deltastorage/store.py,sha256=LwoDXk0xtQuzfMbqv_ldIRh-9RzA-Hx9NRhtBmlbWB8,4812
13
+ warpzone/deltastorage/table.py,sha256=_5tBQEKnimNWsAWyVhGjuX_3pllqzakkMrvALw6-2Qw,5713
14
14
  warpzone/enums/__init__.py,sha256=Gvd-EeZc5jWu5WdusYe3i3Zyt1v85CljBK_EJu7EwZk,24
15
15
  warpzone/enums/topicenum.py,sha256=cj9F7kYzwvDeHN5jJXwgnHXTh6E64VwRNpFepNthIyk,213
16
16
  warpzone/function/__init__.py,sha256=rJOZBpWsUgjMc7YtXMJ1rLGm45KB1AhDJ_Y2ISiSISc,35
17
17
  warpzone/function/checks.py,sha256=B9YqThymf16ac_fVAYKilv20ru5v9nwXgHlbxYIaG98,1018
18
18
  warpzone/function/functionize.py,sha256=bSV0QvwKbD9Vo3a_8cc1rgV2rzTdMMvidinyXItBfvs,2128
19
- warpzone/function/integrations.py,sha256=sDt2BTx6a4mVc-33wTITP9XQVPustwj7rkX4urTyOqo,4018
19
+ warpzone/function/integrations.py,sha256=Law-0TI_tbm8rq5XXuilDH69_0LhoqaZhDbTL498Qik,4016
20
20
  warpzone/function/monitor.py,sha256=xD13d4795a9qgGphOywFl4sOXRQjKypiL2ozQFOseqQ,1862
21
21
  warpzone/function/process.py,sha256=nbUVywM8ChfUwuaqFisgaD98aNRgeZkK4g5sbtuBdRs,2339
22
22
  warpzone/function/processors/__init__.py,sha256=DhIdSWLBcIeSO8IJdxPqGIhgwwnkDN6_Xqwy93BCLeA,46
@@ -28,8 +28,8 @@ warpzone/function/types.py,sha256=5m2hRrnLC3eqIlAH5-MM9_wKjMZ6lYawZtCOVStyFuY,72
28
28
  warpzone/healthchecks/__init__.py,sha256=9gc_Mt2szs8sDSwy0V4l3JZ6d9hX41xTpZCkDP2qsY4,2108
29
29
  warpzone/healthchecks/model.py,sha256=mM7DnrirLbUpBPPfi82MUPP654D0eOR2_F65TmzsPD0,1187
30
30
  warpzone/monitor/__init__.py,sha256=gXT2cxR4tlZER54zd7D49ZQBVyitLaqj13_cUoILuyM,109
31
- warpzone/monitor/logs.py,sha256=AO853uEpD9ZZldQsBic5IR9k3GuQPyWafk4pC5fAGio,3084
32
- warpzone/monitor/traces.py,sha256=2ANxKmO1yca3JFBi68P9HeXQo8WVOOVeT1ev3GLURSY,4107
31
+ warpzone/monitor/logs.py,sha256=q3SUQCtG1ii0B9GkVs2l8kgZ5b5bI2qI6L0SoOW2QTY,3147
32
+ warpzone/monitor/traces.py,sha256=Xc_po1LxJFy5jtNWxIVphIInl_d89Zw3Rb21PsdQhQA,4170
33
33
  warpzone/servicebus/data/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
34
34
  warpzone/servicebus/data/client.py,sha256=zECS3JwedhYnDk8PntYgIYpBF_uu9YN38KzpPFK7CKs,6511
35
35
  warpzone/servicebus/events/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
@@ -37,7 +37,7 @@ warpzone/servicebus/events/client.py,sha256=8v8XsF-2RwzKIi_93IzR_eR-BZTGXXHSuV4P
37
37
  warpzone/servicebus/events/triggers.py,sha256=_QuPTBbje7LrBoz0qhhgrtDZOcE6x1S9GNu-WJUQ8bY,2626
38
38
  warpzone/tablestorage/db/__init__.py,sha256=lnc0uiaGLF0qMi_rWhCpRSFvaj0CJEiMCAl6Yqn1ZiA,21
39
39
  warpzone/tablestorage/db/base_client.py,sha256=ropKO6z0UXqBl38NuGYV4VZ_ZFm4w1d84ReOLYoBKLY,2376
40
- warpzone/tablestorage/db/client.py,sha256=5I_5pUzB9ZVYyj8cf2g9PIuMdXIfaytWsAnDm6EZnyg,6661
40
+ warpzone/tablestorage/db/client.py,sha256=-OchZI7x--Z9msNp912ggZ_sNGwNDXO-i7VGV6INSrg,6587
41
41
  warpzone/tablestorage/db/table_config.py,sha256=PC45dnr3vVMVr8ktu3GWoH8u3JKJaCRAvMCroUaC1NE,1336
42
42
  warpzone/tablestorage/tables/__init__.py,sha256=l_8wElG1oam39fmXqIXDAnCYqbKXEyX1G0cD6JCCv4s,78
43
43
  warpzone/tablestorage/tables/client.py,sha256=chvmTz5S8M1-IWYgfJHyTCwLZVsVzlchMuCYwHzQhCE,3636
@@ -52,6 +52,6 @@ warpzone/tools/copy.py,sha256=5fddotMZkXZO8avzUbGOhvs0cp8mce95pNpy0oPVjnQ,2596
52
52
  warpzone/transform/__init__.py,sha256=ruGa7tl-v4ndlWpULE1jSGU_a4_iRc3V6eyNr5xKP9E,27
53
53
  warpzone/transform/data.py,sha256=Abb8PcrgMbbNCJkkIUdtrTHdlY0OfXid387qw1nDpFY,2362
54
54
  warpzone/transform/schema.py,sha256=nbSQtDMvXkyqGKuwhuFCF0WsEDsaNyoPYpMKvbsKlv8,2423
55
- warpzone_sdk-15.0.0.dev14.dist-info/METADATA,sha256=C7hvF9-5FlHq-EwoLv65npYpGLZTIEXAsRXo_0Sp_GE,7285
56
- warpzone_sdk-15.0.0.dev14.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
57
- warpzone_sdk-15.0.0.dev14.dist-info/RECORD,,
55
+ warpzone_sdk-15.1.0.dist-info/METADATA,sha256=q0pdO2gAFw7uT3fTYGgBfLeFDOyQstIvEQR-3qfKZb8,7279
56
+ warpzone_sdk-15.1.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
57
+ warpzone_sdk-15.1.0.dist-info/RECORD,,