warpzone-sdk 15.0.1.dev1__tar.gz → 15.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/PKG-INFO +1 -1
  2. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/pyproject.toml +1 -1
  3. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/db/client.py +11 -6
  4. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/store.py +14 -4
  5. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/table.py +17 -4
  6. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/monitor/traces.py +0 -7
  7. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/README.md +0 -0
  8. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/__init__.py +0 -0
  9. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/blobstorage/__init__.py +0 -0
  10. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/blobstorage/client.py +0 -0
  11. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/db/__init__.py +0 -0
  12. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/__init__.py +0 -0
  13. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/data_types.py +0 -0
  14. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/generated_columns.py +0 -0
  15. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/lock_client.py +0 -0
  16. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/schema.py +0 -0
  17. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/deltastorage/slicing.py +0 -0
  18. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/enums/__init__.py +0 -0
  19. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/enums/topicenum.py +0 -0
  20. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/__init__.py +0 -0
  21. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/checks.py +0 -0
  22. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/functionize.py +0 -0
  23. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/integrations.py +0 -0
  24. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/monitor.py +0 -0
  25. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/process.py +0 -0
  26. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/processors/__init__.py +0 -0
  27. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/processors/dependencies.py +0 -0
  28. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/processors/outputs.py +0 -0
  29. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/processors/triggers.py +0 -0
  30. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/signature.py +0 -0
  31. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/function/types.py +0 -0
  32. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/healthchecks/__init__.py +0 -0
  33. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/healthchecks/model.py +0 -0
  34. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/monitor/__init__.py +0 -0
  35. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/monitor/logs.py +0 -0
  36. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/servicebus/data/__init__.py +0 -0
  37. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/servicebus/data/client.py +0 -0
  38. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/servicebus/events/__init__.py +0 -0
  39. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/servicebus/events/client.py +0 -0
  40. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/servicebus/events/triggers.py +0 -0
  41. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/db/__init__.py +0 -0
  42. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/db/base_client.py +0 -0
  43. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/db/client.py +0 -0
  44. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/db/table_config.py +0 -0
  45. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/tables/__init__.py +0 -0
  46. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/tables/client.py +0 -0
  47. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/tables/entities.py +0 -0
  48. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tablestorage/tables/helpers.py +0 -0
  49. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/testing/__init__.py +0 -0
  50. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/testing/assertions.py +0 -0
  51. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/testing/data.py +0 -0
  52. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/testing/matchers.py +0 -0
  53. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tools/__init__.py +0 -0
  54. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/tools/copy.py +0 -0
  55. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/transform/__init__.py +0 -0
  56. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/transform/data.py +0 -0
  57. {warpzone_sdk-15.0.1.dev1 → warpzone_sdk-15.1.0}/warpzone/transform/schema.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpzone-sdk
3
- Version: 15.0.1.dev1
3
+ Version: 15.1.0
4
4
  Summary: The main objective of this package is to centralize logic used to interact with Azure Functions, Azure Service Bus and Azure Table Storage
5
5
  Author: Team Enigma
6
6
  Author-email: enigma@energinet.dk
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "warpzone-sdk"
3
- version = "15.0.1.dev1"
3
+ version = "15.1.0"
4
4
  description = "The main objective of this package is to centralize logic used to interact with Azure Functions, Azure Service Bus and Azure Table Storage"
5
5
  authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
6
6
  requires-python = ">=3.10"
@@ -170,6 +170,7 @@ class WarpzoneDatabaseClient:
170
170
  time_interval: Optional[pdz.TimeInterval] = None,
171
171
  time_travel: Optional[pdz.TimeTravel] = None,
172
172
  filters: Optional[dict[str, object]] = None,
173
+ columns: Optional[list[str]] = None,
173
174
  include_validity_period_columns: bool = False,
174
175
  include_generated_columns: bool = False,
175
176
  ) -> pd.DataFrame:
@@ -188,13 +189,18 @@ class WarpzoneDatabaseClient:
188
189
  filters (Optional[dict[str, object]], optional): Filters to apply to the
189
190
  query.
190
191
  Defaults to None.
192
+ columns (Optional[list[str]], optional): Columns to return.
193
+ Selecting columns can significantly improve query performance.
194
+ Defaults to None, meaning all columns will be returned.
191
195
  include_validity_period_columns (bool, optional): Whether to include
192
196
  validity period columns in the result;
193
- (e.g. `valid_from_time_utc`, `valid_to_time_utc`).
194
- Defaults to False.
197
+ (`valid_from_time_utc`, `valid_to_time_utc`).
198
+ Defaults to False. If set to True while using `columns`-argument, make sure
199
+ to include these columns in the `columns`-list.
195
200
  include_generated_columns (bool, optional): Whether to include generated
196
201
  columns in the result; (e.g. `valid_from_time_utc`, `valid_to_time_utc`).
197
- Defaults to False.
202
+ Defaults to False. If set to True while using `columns`-argument, make sure
203
+ to include these columns in the `columns`-list.
198
204
 
199
205
  Returns:
200
206
  pd.DataFrame: The result of the query.
@@ -214,7 +220,6 @@ class WarpzoneDatabaseClient:
214
220
  hyper_slice.append((key, "in", value))
215
221
  else:
216
222
  hyper_slice.append((key, "=", value))
217
-
218
223
  if time_interval:
219
224
  hyper_slice.append(("time_utc", ">=", time_interval.left))
220
225
  hyper_slice.append(("time_utc", "<=", time_interval.right))
@@ -230,9 +235,9 @@ class WarpzoneDatabaseClient:
230
235
  valid_from_column="valid_from_time_utc",
231
236
  valid_to_column="valid_to_time_utc",
232
237
  )
233
- hyper_slice.extend(tt_filter)
234
238
 
235
- pl_df = table.read(hyper_slice=HyperSlice(hyper_slice))
239
+ hyper_slice.extend(tt_filter)
240
+ pl_df = table.read(hyper_slice=HyperSlice(hyper_slice), columns=columns)
236
241
 
237
242
  pd_df = pl_df.to_pandas()
238
243
 
@@ -2,6 +2,7 @@ import os
2
2
 
3
3
  import deltalake as dl
4
4
  import obstore as obs
5
+ from deltalake.exceptions import TableNotFoundError as DeltaTableNotFoundError
5
6
 
6
7
  from .schema import Schema
7
8
  from .table import Table
@@ -76,10 +77,19 @@ class Store:
76
77
  Args:
77
78
  table_name (str): Table name
78
79
  """
79
- return dl.DeltaTable.is_deltatable(
80
- table_uri=self._get_table_uri(table_name),
81
- storage_options=self.storage_options,
82
- )
80
+ # For some reason `deltalake.DeltaTable.is_deltatable()` can be very slow.
81
+ # deltalake has an issue open about this:
82
+ # https://github.com/delta-io/delta-rs/issues/3942
83
+ # For now we catch the exception when trying to load the table
84
+ try:
85
+ _ = dl.DeltaTable(
86
+ table_uri=self._get_table_uri(table_name),
87
+ storage_options=self.storage_options,
88
+ without_files=True,
89
+ )
90
+ except DeltaTableNotFoundError:
91
+ return False
92
+ return True
83
93
 
84
94
  def create_table(
85
95
  self,
@@ -42,6 +42,7 @@ class Table:
42
42
  self.storage_options = storage_options
43
43
 
44
44
  self.table_name = self.table_uri.split("/")[-1]
45
+ self._delta_table = None
45
46
 
46
47
  def __repr__(self):
47
48
  return f"Table('{self.table_name}')"
@@ -49,12 +50,21 @@ class Table:
49
50
  @property
50
51
  def delta_table(self) -> dl.DeltaTable:
51
52
  """Get the Delta table object.
52
- This is lazily computed to avoid using the same instance.
53
- This is *important* because using the same instance can lead to transaction
53
+ As the `Table`-class is lazily initialized,
54
+ the `delta_table`-property is initialized on the first access
55
+ and saved for future use to minimize overhead.
56
+ It is *important* that this property is only initialized within
57
+ a lock when doing concurrent reads/writes
58
+ and not initialized when creating the `Table`-object.
59
+ This is important because using the same instance can lead to transaction
54
60
  issues in delta as DeltaTable uses metadata (transaction id) from
55
61
  the first time the object is instantiated.
56
62
  """
57
- return dl.DeltaTable(self.table_uri, storage_options=self.storage_options)
63
+ if self._delta_table is None:
64
+ self._delta_table = dl.DeltaTable(
65
+ self.table_uri, storage_options=self.storage_options
66
+ )
67
+ return self._delta_table
58
68
 
59
69
  def partition_cols(self) -> list[str]:
60
70
  """Get the partition columns of the table"""
@@ -65,7 +75,9 @@ class Table:
65
75
  pa_schema = pa.schema(self.delta_table.schema())
66
76
  return Schema.from_arrow(pa_schema)
67
77
 
68
- def read(self, hyper_slice: Optional[HyperSlice] = None) -> pl.DataFrame:
78
+ def read(
79
+ self, hyper_slice: Optional[HyperSlice] = None, columns=None
80
+ ) -> pl.DataFrame:
69
81
  """Read from Delta table
70
82
 
71
83
  Args:
@@ -88,6 +100,7 @@ class Table:
88
100
  partition_filters = [f for f in hyper_slice if f[0] in partition_cols]
89
101
 
90
102
  pyarrow_table_existing_data = delta_table.to_pyarrow_table(
103
+ columns=columns,
91
104
  partitions=partition_filters,
92
105
  filters=file_filters,
93
106
  )
@@ -23,13 +23,6 @@ TRACING_IS_CONFIGURED = False
23
23
 
24
24
  def configure_tracing():
25
25
  global TRACING_IS_CONFIGURED
26
- if TRACING_IS_CONFIGURED:
27
- # tracing should only be set up once
28
- # to avoid duplicated trace handling.
29
- # Global variables is the pattern used
30
- # by opentelemetry, so we use the same
31
- return
32
-
33
26
  # Add thread locking to avoid race conditions during setup
34
27
  with _TRACING_LOCK:
35
28
  if TRACING_IS_CONFIGURED: