wherobots-python-dbapi 0.23.2__tar.gz → 0.25.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/PKG-INFO +54 -1
  2. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/README.md +53 -0
  3. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/pyproject.toml +1 -1
  4. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/__init__.py +2 -1
  5. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/connection.py +44 -6
  6. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/models.py +47 -1
  7. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/types.py +1 -0
  8. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/.gitignore +0 -0
  9. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/LICENSE +0 -0
  10. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/__init__.py +0 -0
  11. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/constants.py +0 -0
  12. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/cursor.py +0 -0
  13. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/driver.py +0 -0
  14. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/errors.py +0 -0
  15. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/region.py +0 -0
  16. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/runtime.py +0 -0
  17. {wherobots_python_dbapi-0.23.2 → wherobots_python_dbapi-0.25.0}/wherobots/db/session_type.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wherobots-python-dbapi
3
- Version: 0.23.2
3
+ Version: 0.25.0
4
4
  Summary: Python DB-API driver for Wherobots DB
5
5
  Project-URL: Homepage, https://github.com/wherobots/wherobots-python-dbapi-driver
6
6
  Project-URL: Tracker, https://github.com/wherobots/wherobots-python-dbapi-driver/issues
@@ -123,6 +123,59 @@ The `Store` class supports the following options:
123
123
  Use `Store.for_download()` as a convenient shorthand for storing results
124
124
  as a single Parquet file with a presigned URL.
125
125
 
126
+ #### Store options
127
+
128
+ You can pass format-specific Spark write options through the `options`
129
+ parameter. These correspond to the options available in Spark's
130
+ `DataFrameWriter` and are applied after the server's default options,
131
+ allowing you to override them.
132
+
133
+ ```python
134
+ # CSV without headers and a custom delimiter
135
+ store = Store.for_download(
136
+ format=StorageFormat.CSV,
137
+ options={"header": "false", "delimiter": "|"},
138
+ )
139
+
140
+ # GeoJSON preserving null fields
141
+ store = Store.for_download(
142
+ format=StorageFormat.GEOJSON,
143
+ options={"ignoreNullFields": "false"},
144
+ )
145
+ ```
146
+
147
+ ### Execution progress
148
+
149
+ You can monitor the progress of running queries by registering a
150
+ progress handler on the connection.
151
+
152
+ ```python
153
+ from wherobots.db import connect, ProgressInfo
154
+ from wherobots.db.region import Region
155
+ from wherobots.db.runtime import Runtime
156
+
157
+ def on_progress(info: ProgressInfo) -> None:
158
+ print(f"{info.tasks_completed}/{info.tasks_total} tasks "
159
+ f"({info.tasks_active} active)")
160
+
161
+ with connect(
162
+ api_key='...',
163
+ runtime=Runtime.TINY,
164
+ region=Region.AWS_US_WEST_2) as conn:
165
+ conn.set_progress_handler(on_progress)
166
+ curr = conn.cursor()
167
+ curr.execute("SELECT ...")
168
+ results = curr.fetchall()
169
+ ```
170
+
171
+ The handler receives a `ProgressInfo` object with `execution_id`,
172
+ `tasks_total`, `tasks_completed`, and `tasks_active` fields. Pass
173
+ `None` to `set_progress_handler()` to disable progress reporting.
174
+
175
+ Progress events are best-effort and may not be available for all query
176
+ types or server versions. The handler is simply not invoked when no
177
+ progress information is available.
178
+
126
179
  ### Runtime and region selection
127
180
 
128
181
  You can chose the Wherobots runtime you want to use using the `runtime`
@@ -99,6 +99,59 @@ The `Store` class supports the following options:
99
99
  Use `Store.for_download()` as a convenient shorthand for storing results
100
100
  as a single Parquet file with a presigned URL.
101
101
 
102
+ #### Store options
103
+
104
+ You can pass format-specific Spark write options through the `options`
105
+ parameter. These correspond to the options available in Spark's
106
+ `DataFrameWriter` and are applied after the server's default options,
107
+ allowing you to override them.
108
+
109
+ ```python
110
+ # CSV without headers and a custom delimiter
111
+ store = Store.for_download(
112
+ format=StorageFormat.CSV,
113
+ options={"header": "false", "delimiter": "|"},
114
+ )
115
+
116
+ # GeoJSON preserving null fields
117
+ store = Store.for_download(
118
+ format=StorageFormat.GEOJSON,
119
+ options={"ignoreNullFields": "false"},
120
+ )
121
+ ```
122
+
123
+ ### Execution progress
124
+
125
+ You can monitor the progress of running queries by registering a
126
+ progress handler on the connection.
127
+
128
+ ```python
129
+ from wherobots.db import connect, ProgressInfo
130
+ from wherobots.db.region import Region
131
+ from wherobots.db.runtime import Runtime
132
+
133
+ def on_progress(info: ProgressInfo) -> None:
134
+ print(f"{info.tasks_completed}/{info.tasks_total} tasks "
135
+ f"({info.tasks_active} active)")
136
+
137
+ with connect(
138
+ api_key='...',
139
+ runtime=Runtime.TINY,
140
+ region=Region.AWS_US_WEST_2) as conn:
141
+ conn.set_progress_handler(on_progress)
142
+ curr = conn.cursor()
143
+ curr.execute("SELECT ...")
144
+ results = curr.fetchall()
145
+ ```
146
+
147
+ The handler receives a `ProgressInfo` object with `execution_id`,
148
+ `tasks_total`, `tasks_completed`, and `tasks_active` fields. Pass
149
+ `None` to `set_progress_handler()` to disable progress reporting.
150
+
151
+ Progress events are best-effort and may not be available for all query
152
+ types or server versions. The handler is simply not invoked when no
153
+ progress information is available.
154
+
102
155
  ### Runtime and region selection
103
156
 
104
157
  You can chose the Wherobots runtime you want to use using the `runtime`
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wherobots-python-dbapi"
3
- version = "0.23.2"
3
+ version = "0.25.0"
4
4
  description = "Python DB-API driver for Wherobots DB"
5
5
  authors = [{ name = "Maxime Petazzoni", email = "max@wherobots.com" }]
6
6
  requires-python = ">=3.10, <4"
@@ -10,7 +10,7 @@ from .errors import (
10
10
  ProgrammingError,
11
11
  NotSupportedError,
12
12
  )
13
- from .models import Store, StoreResult
13
+ from .models import ProgressInfo, Store, StoreResult
14
14
  from .region import Region
15
15
  from .runtime import Runtime
16
16
  from .types import StorageFormat
@@ -18,6 +18,7 @@ from .types import StorageFormat
18
18
  __all__ = [
19
19
  "Connection",
20
20
  "Cursor",
21
+ "ProgressInfo",
21
22
  "connect",
22
23
  "connect_direct",
23
24
  "Error",
@@ -16,7 +16,7 @@ import websockets.sync.client
16
16
  from .constants import DEFAULT_READ_TIMEOUT_SECONDS
17
17
  from .cursor import Cursor
18
18
  from .errors import NotSupportedError, OperationalError
19
- from .models import ExecutionResult, Store, StoreResult
19
+ from .models import ExecutionResult, ProgressInfo, Store, StoreResult
20
20
  from .types import (
21
21
  RequestKind,
22
22
  EventKind,
@@ -27,6 +27,10 @@ from .types import (
27
27
  )
28
28
 
29
29
 
30
+ ProgressHandler = Callable[[ProgressInfo], None]
31
+ """A callable invoked with a :class:`ProgressInfo` on every progress event."""
32
+
33
+
30
34
  @dataclass
31
35
  class Query:
32
36
  sql: str
@@ -64,6 +68,7 @@ class Connection:
64
68
  self.__results_format = results_format
65
69
  self.__data_compression = data_compression
66
70
  self.__geometry_representation = geometry_representation
71
+ self.__progress_handler: ProgressHandler | None = None
67
72
 
68
73
  self.__queries: dict[str, Query] = {}
69
74
  self.__thread = threading.Thread(
@@ -89,6 +94,21 @@ class Connection:
89
94
  def cursor(self) -> Cursor:
90
95
  return Cursor(self.__execute_sql, self.__cancel_query)
91
96
 
97
+ def set_progress_handler(self, handler: ProgressHandler | None) -> None:
98
+ """Register a callback invoked for execution progress events.
99
+
100
+ When a handler is set, every ``execute_sql`` request automatically
101
+ includes ``enable_progress_events: true`` so the SQL session streams
102
+ progress updates for running queries.
103
+
104
+ Pass ``None`` to disable progress reporting.
105
+
106
+ This follows the `sqlite3 Connection.set_progress_handler()
107
+ <https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.set_progress_handler>`_
108
+ pattern (PEP 249 vendor extension).
109
+ """
110
+ self.__progress_handler = handler
111
+
92
112
  def __main_loop(self) -> None:
93
113
  """Main background loop listening for messages from the SQL session."""
94
114
  logging.info("Starting background connection handling loop...")
@@ -116,6 +136,25 @@ class Connection:
116
136
  # Invalid event.
117
137
  return
118
138
 
139
+ # Progress events are independent of the query state machine and don't
140
+ # require a tracked query — the handler is connection-level.
141
+ if kind == EventKind.EXECUTION_PROGRESS:
142
+ handler = self.__progress_handler
143
+ if handler is None:
144
+ return
145
+ try:
146
+ handler(
147
+ ProgressInfo(
148
+ execution_id=execution_id,
149
+ tasks_total=message.get("tasks_total", 0),
150
+ tasks_completed=message.get("tasks_completed", 0),
151
+ tasks_active=message.get("tasks_active", 0),
152
+ )
153
+ )
154
+ except Exception:
155
+ logging.exception("Progress handler raised an exception")
156
+ return
157
+
119
158
  query = self.__queries.get(execution_id)
120
159
  if not query:
121
160
  logging.warning(
@@ -236,12 +275,11 @@ class Connection:
236
275
  "statement": sql,
237
276
  }
238
277
 
278
+ if self.__progress_handler is not None:
279
+ request["enable_progress_events"] = True
280
+
239
281
  if store:
240
- request["store"] = {
241
- "format": store.format.value,
242
- "single": str(store.single).lower(),
243
- "generate_presigned_url": str(store.generate_presigned_url).lower(),
244
- }
282
+ request["store"] = store.to_dict()
245
283
 
246
284
  self.__queries[execution_id] = Query(
247
285
  sql=sql,
@@ -1,4 +1,5 @@
1
1
  from dataclasses import dataclass
2
+ from typing import Any, Dict
2
3
 
3
4
  import pandas
4
5
 
@@ -31,18 +32,32 @@ class Store:
31
32
  single: If True, store as a single file. If False, store as multiple files.
32
33
  generate_presigned_url: If True, generate a presigned URL for the result.
33
34
  Requires single=True.
35
+ options: Optional dict of format-specific Spark DataFrameWriter options
36
+ (e.g. ``{"header": "false", "delimiter": "|"}`` for CSV). These are
37
+ applied after the server's default options, so they can override them.
38
+ An empty dict is normalized to None.
34
39
  """
35
40
 
36
41
  format: StorageFormat
37
42
  single: bool = False
38
43
  generate_presigned_url: bool = False
44
+ options: dict[str, str] | None = None
39
45
 
40
46
  def __post_init__(self) -> None:
41
47
  if self.generate_presigned_url and not self.single:
42
48
  raise ValueError("Presigned URL can only be generated when single=True")
49
+ # Normalize empty options to None and defensively copy.
50
+ if self.options:
51
+ self.options = dict(self.options)
52
+ else:
53
+ self.options = None
43
54
 
44
55
  @classmethod
45
- def for_download(cls, format: StorageFormat | None = None) -> "Store":
56
+ def for_download(
57
+ cls,
58
+ format: StorageFormat | None = None,
59
+ options: dict[str, str] | None = None,
60
+ ) -> "Store":
46
61
  """Create a configuration for downloading results via a presigned URL.
47
62
 
48
63
  This is a convenience method that creates a configuration with
@@ -50,6 +65,7 @@ class Store:
50
65
 
51
66
  Args:
52
67
  format: The storage format.
68
+ options: Optional format-specific Spark DataFrameWriter options.
53
69
 
54
70
  Returns:
55
71
  A Store configured for single-file download with presigned URL.
@@ -58,8 +74,25 @@ class Store:
58
74
  format=format or DEFAULT_STORAGE_FORMAT,
59
75
  single=True,
60
76
  generate_presigned_url=True,
77
+ options=options,
61
78
  )
62
79
 
80
+ def to_dict(self) -> Dict[str, Any]:
81
+ """Serialize this Store to a dict for the WebSocket request.
82
+
83
+ Returns a dict suitable for inclusion as the ``"store"`` field in an
84
+ ``execute_sql`` request. The ``options`` key is omitted when there
85
+ are no user-supplied options (backward compatible).
86
+ """
87
+ d: Dict[str, Any] = {
88
+ "format": self.format.value,
89
+ "single": str(self.single).lower(),
90
+ "generate_presigned_url": str(self.generate_presigned_url).lower(),
91
+ }
92
+ if self.options:
93
+ d["options"] = self.options
94
+ return d
95
+
63
96
 
64
97
  @dataclass
65
98
  class ExecutionResult:
@@ -78,3 +111,16 @@ class ExecutionResult:
78
111
  results: pandas.DataFrame | None = None
79
112
  error: Exception | None = None
80
113
  store_result: StoreResult | None = None
114
+
115
+
116
+ @dataclass(frozen=True)
117
+ class ProgressInfo:
118
+ """Progress information for a running query.
119
+
120
+ Mirrors the ``execution_progress`` event sent by the SQL session.
121
+ """
122
+
123
+ execution_id: str
124
+ tasks_total: int
125
+ tasks_completed: int
126
+ tasks_active: int
@@ -45,6 +45,7 @@ class EventKind(LowercaseStrEnum):
45
45
  STATE_UPDATED = auto()
46
46
  EXECUTION_RESULT = auto()
47
47
  ERROR = auto()
48
+ EXECUTION_PROGRESS = auto()
48
49
 
49
50
 
50
51
  class ResultsFormat(LowercaseStrEnum):