wherobots-python-sdk 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {wherobots_python_sdk-0.2.0/wherobots_python_sdk.egg-info → wherobots_python_sdk-0.3.0}/PKG-INFO +113 -3
  2. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/README.md +111 -2
  3. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/pyproject.toml +8 -0
  4. wherobots_python_sdk-0.3.0/tests/test_client.py +823 -0
  5. wherobots_python_sdk-0.3.0/tests/test_files_api.py +735 -0
  6. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_init_exports.py +8 -0
  7. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_models.py +238 -0
  8. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_security.py +48 -3
  9. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/__init__.py +6 -0
  10. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/__version__.py +1 -1
  11. wherobots_python_sdk-0.3.0/wherobots/api/files.py +579 -0
  12. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/client.py +334 -31
  13. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/models.py +272 -3
  14. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0/wherobots_python_sdk.egg-info}/PKG-INFO +113 -3
  15. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots_python_sdk.egg-info/requires.txt +1 -0
  16. wherobots_python_sdk-0.2.0/tests/test_client.py +0 -449
  17. wherobots_python_sdk-0.2.0/tests/test_files_api.py +0 -622
  18. wherobots_python_sdk-0.2.0/wherobots/api/files.py +0 -386
  19. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/LICENSE +0 -0
  20. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/setup.cfg +0 -0
  21. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_api.py +0 -0
  22. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_base_client.py +0 -0
  23. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_config.py +0 -0
  24. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_enums.py +0 -0
  25. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_exceptions.py +0 -0
  26. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_integration.py +0 -0
  27. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_logger.py +0 -0
  28. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_regressions.py +0 -0
  29. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/tests/test_utils.py +0 -0
  30. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/api/__init__.py +0 -0
  31. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/api/base.py +0 -0
  32. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/api/runs.py +0 -0
  33. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/config.py +0 -0
  34. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/enums.py +0 -0
  35. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/exceptions.py +0 -0
  36. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/py.typed +0 -0
  37. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/utils/__init__.py +0 -0
  38. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/utils/logger.py +0 -0
  39. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots/utils/validation.py +0 -0
  40. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots_python_sdk.egg-info/SOURCES.txt +0 -0
  41. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots_python_sdk.egg-info/dependency_links.txt +0 -0
  42. {wherobots_python_sdk-0.2.0 → wherobots_python_sdk-0.3.0}/wherobots_python_sdk.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wherobots-python-sdk
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Python SDK for Wherobots (currently covers the Jobs REST API)
5
5
  Author-email: Wherobots <support@wherobots.com>
6
6
  License-Expression: Apache-2.0
@@ -22,6 +22,7 @@ Requires-Python: >=3.10
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
24
  Requires-Dist: requests>=2.33.0
25
+ Requires-Dist: boto3>=1.34.0
25
26
  Provides-Extra: dev
26
27
  Requires-Dist: pytest>=7.4.0; extra == "dev"
27
28
  Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
@@ -187,7 +188,7 @@ WherobotsJob(
187
188
  spark_driver_disk_gb: int | None = None, # Driver disk size (GB)
188
189
  spark_executor_disk_gb: int | None = None, # Executor disk size (GB)
189
190
  jar_main_class: str | None = None, # Main class (required for JARs)
190
- auto_upload: bool = True, # Upload local files to S3
191
+ auto_upload: bool = True, # Upload local files to S3 (managed storage)
191
192
  base_url: str | None = None, # Override API URL
192
193
  request_timeout_seconds: int | None = None, # HTTP timeout
193
194
  config: WherobotsConfig | None = None, # Full config object
@@ -202,6 +203,11 @@ WherobotsJob(
202
203
  | `get_status()` | `RunView` | Get current job status and full details. |
203
204
  | `get_logs(cursor=0, size=100)` | `LogsResponse` | Fetch a page of log entries. |
204
205
  | `get_metrics()` | `RunMetricsResponse` | Fetch CPU/memory metrics for the run. |
206
+ | `get_cpu_utilization()` | `UtilizationStats` | Aggregated CPU utilization (`latest`, `max`, `avg`, `series`). |
207
+ | `get_mem_utilization()` | `UtilizationStats` | Aggregated memory utilization (`latest`, `max`, `avg`, `series`). |
208
+ | `get_cost()` | `float \| None` | Total run cost in USD, or `None` if not yet billed. |
209
+ | `get_consumed_spatial_units()` | `float \| None` | Spatial Units (SUs) consumed by the run. |
210
+ | `refresh()` | `RunView` | Re-fetch from the API and update `status`/`name`/`runtime`/`region`/`version` in place. |
205
211
  | `iter_logs(cursor=0, size=100)` | `Iterator[dict]` | Iterate over all log entries, handling pagination automatically. |
206
212
  | `poll_for_logs(follow=True, interval=2.0, log_handler=None, max_errors=10)` | `None` | Poll and print logs. If `follow=True`, continues until job completes. `max_errors` sets the max consecutive transient errors before giving up. |
207
213
  | `cancel()` | `bool` | Request cancellation. Returns `True` on success. |
@@ -227,9 +233,39 @@ with WherobotsJob(script="s3://bucket/script.py", name="my-job") as job:
227
233
 
228
234
  | Method | Returns | Description |
229
235
  |--------|---------|-------------|
236
+ | `from_run_id(run_id, api_key=None, ...)` | `WherobotsJob` | Attach to an existing run for read-only log/metric access. No script required. `submit()` is disabled on the returned instance. |
230
237
  | `list_runs(...)` | `RunListPage` | List runs with optional filters. No instance required. |
231
238
  | `add_pypi_dependency(name, version)` | `dict` | Create a PyPI dependency dict for the `dependencies` parameter. |
232
- | `add_file_dependency(file_path)` | `dict` | Create a file dependency dict (`.jar`, `.whl`, `.zip`, `.json`). |
239
+ | `add_file_dependency(file_path)` | `dict` | Create a file dependency dict (`.jar`, `.whl`, `.zip`, `.json`). Accepts an `s3://` URI or a local path — local paths are auto-uploaded to **managed storage** at `submit()` time. For non-managed destinations, upload via `FilesAPI` first and pass the resulting `s3://` URI. |
240
+
241
+ #### Attaching to an Existing Run
242
+
243
+ If you already have a `run_id` (from the CLI, the Wherobots UI, or a prior SDK
244
+ session) you can attach without a script:
245
+
246
+ ```python
247
+ from wherobots import WherobotsJob
248
+
249
+ job = WherobotsJob.from_run_id("run-abc-123")
250
+ print(job.status, job.name)
251
+
252
+ # Stream remaining logs
253
+ job.poll_for_logs(follow=False)
254
+
255
+ # Or paginate
256
+ for entry in job.iter_logs(size=200):
257
+ print(entry["raw"])
258
+
259
+ # Aggregated utilization for a completed run
260
+ cpu = job.get_cpu_utilization()
261
+ print(f"CPU peak {cpu.max}, avg {cpu.avg}, samples {len(cpu.series)}")
262
+
263
+ # Billing
264
+ print(f"cost ${job.get_cost():.2f}, SUs {job.get_consumed_spatial_units()}")
265
+ ```
266
+
267
+ Calling `job.submit()` on an attached instance raises
268
+ `WherobotsValidationError` — these instances are read-only.
233
269
 
234
270
  #### Listing Runs
235
271
 
@@ -272,6 +308,80 @@ job = WherobotsJob(
272
308
  )
273
309
  ```
274
310
 
311
+ ##### Local Dependency Files
312
+
313
+ `add_file_dependency` also accepts a **local path** to a `.whl`, `.jar`, `.zip`,
314
+ or `.json`. The SDK uploads the file just before the job is submitted, fetching
315
+ short-lived AWS STS credentials from the Wherobots API (`POST /storage/credentials`)
316
+ and using boto3 multipart under the hood — no AWS credentials need to leave your
317
+ environment.
318
+
319
+ ```python
320
+ job = WherobotsJob(
321
+ script="my_job.py", # local script auto-upload still works
322
+ name="job-with-local-wheel",
323
+ dependencies=[
324
+ # Local wheel — uploaded to managed storage at submit() time
325
+ WherobotsJob.add_file_dependency("dist/mypkg-0.1.0-py3-none-any.whl"),
326
+ # Already-uploaded artifact — used as-is
327
+ WherobotsJob.add_file_dependency("s3://bucket/shared-libs/lib-2.0.jar"),
328
+ WherobotsJob.add_pypi_dependency("pandas", "2.0.3"),
329
+ ],
330
+ )
331
+ job.submit()
332
+ ```
333
+
334
+ Auto-upload always targets the org's **managed storage**:
335
+
336
+ | Artifact | Destination |
337
+ |---|---|
338
+ | Script (when `script` is a local path) | `s3://<managed.path>/scripts/<filename>` |
339
+ | Dependency (when path is a local file) | `s3://<managed.path>/dependencies/<filename>` |
340
+
341
+ Local dependency uploads honor the same `auto_upload` flag as the script —
342
+ if `auto_upload=False`, attempting to submit a job with a local dependency
343
+ path raises `WherobotsValidationError`. The 5 GiB cap on a single upload
344
+ applies; for larger artifacts, pre-upload and pass an `s3://` URI.
345
+
346
+ ### Uploading to a Storage Integration
347
+
348
+ `WherobotsJob` stays focused on job orchestration — it doesn't take any
349
+ "where files go" kwargs. To upload to a [Storage Integration](https://docs.wherobots.com/latest/develop/storage-management/storage/)
350
+ or a custom subdirectory, use `FilesAPI` directly and pass the resulting
351
+ `s3://` URI to `WherobotsJob`:
352
+
353
+ ```python
354
+ from wherobots import FilesAPI, WherobotsJob
355
+ from wherobots.config import WherobotsConfig
356
+
357
+ with FilesAPI.from_config(WherobotsConfig.from_env()) as files:
358
+ si = files.resolve_integration("customer-bucket")
359
+ # Build explicit destinations; upload_file is the "upload this file
360
+ # to this exact s3 path" primitive.
361
+ script_uri = files.upload_file(
362
+ "my_job.py",
363
+ files.dest_uri_for(si, "my_job.py", "job-scripts/sdk-e2e"),
364
+ )
365
+ wheel_uri = files.upload_file(
366
+ "dist/internal-lib-1.0.whl",
367
+ files.dest_uri_for(si, "internal-lib-1.0.whl", "job-scripts/sdk-e2e"),
368
+ )
369
+
370
+ job = WherobotsJob(
371
+ script=script_uri, # already-uploaded URI
372
+ name="job-in-integration",
373
+ dependencies=[WherobotsJob.add_file_dependency(wheel_uri)],
374
+ )
375
+ job.submit()
376
+ ```
377
+
378
+ Both artifacts land under `s3://<integration.path>/job-scripts/sdk-e2e/`.
379
+
380
+ `FilesAPI` is also where to look for low-level controls (custom managed
381
+ subdirs, raw `s3://` destinations, STS credential fetch for BYO upload
382
+ tooling). See `FilesAPI.upload_file`, `upload_managed_file`,
383
+ `upload_dependency`, `dest_uri_for`, and `get_storage_credentials`.
384
+
275
385
  #### JAR Jobs
276
386
 
277
387
  ```python
@@ -154,7 +154,7 @@ WherobotsJob(
154
154
  spark_driver_disk_gb: int | None = None, # Driver disk size (GB)
155
155
  spark_executor_disk_gb: int | None = None, # Executor disk size (GB)
156
156
  jar_main_class: str | None = None, # Main class (required for JARs)
157
- auto_upload: bool = True, # Upload local files to S3
157
+ auto_upload: bool = True, # Upload local files to S3 (managed storage)
158
158
  base_url: str | None = None, # Override API URL
159
159
  request_timeout_seconds: int | None = None, # HTTP timeout
160
160
  config: WherobotsConfig | None = None, # Full config object
@@ -169,6 +169,11 @@ WherobotsJob(
169
169
  | `get_status()` | `RunView` | Get current job status and full details. |
170
170
  | `get_logs(cursor=0, size=100)` | `LogsResponse` | Fetch a page of log entries. |
171
171
  | `get_metrics()` | `RunMetricsResponse` | Fetch CPU/memory metrics for the run. |
172
+ | `get_cpu_utilization()` | `UtilizationStats` | Aggregated CPU utilization (`latest`, `max`, `avg`, `series`). |
173
+ | `get_mem_utilization()` | `UtilizationStats` | Aggregated memory utilization (`latest`, `max`, `avg`, `series`). |
174
+ | `get_cost()` | `float \| None` | Total run cost in USD, or `None` if not yet billed. |
175
+ | `get_consumed_spatial_units()` | `float \| None` | Spatial Units (SUs) consumed by the run. |
176
+ | `refresh()` | `RunView` | Re-fetch from the API and update `status`/`name`/`runtime`/`region`/`version` in place. |
172
177
  | `iter_logs(cursor=0, size=100)` | `Iterator[dict]` | Iterate over all log entries, handling pagination automatically. |
173
178
  | `poll_for_logs(follow=True, interval=2.0, log_handler=None, max_errors=10)` | `None` | Poll and print logs. If `follow=True`, continues until job completes. `max_errors` sets the max consecutive transient errors before giving up. |
174
179
  | `cancel()` | `bool` | Request cancellation. Returns `True` on success. |
@@ -194,9 +199,39 @@ with WherobotsJob(script="s3://bucket/script.py", name="my-job") as job:
194
199
 
195
200
  | Method | Returns | Description |
196
201
  |--------|---------|-------------|
202
+ | `from_run_id(run_id, api_key=None, ...)` | `WherobotsJob` | Attach to an existing run for read-only log/metric access. No script required. `submit()` is disabled on the returned instance. |
197
203
  | `list_runs(...)` | `RunListPage` | List runs with optional filters. No instance required. |
198
204
  | `add_pypi_dependency(name, version)` | `dict` | Create a PyPI dependency dict for the `dependencies` parameter. |
199
- | `add_file_dependency(file_path)` | `dict` | Create a file dependency dict (`.jar`, `.whl`, `.zip`, `.json`). |
205
+ | `add_file_dependency(file_path)` | `dict` | Create a file dependency dict (`.jar`, `.whl`, `.zip`, `.json`). Accepts an `s3://` URI or a local path — local paths are auto-uploaded to **managed storage** at `submit()` time. For non-managed destinations, upload via `FilesAPI` first and pass the resulting `s3://` URI. |
206
+
207
+ #### Attaching to an Existing Run
208
+
209
+ If you already have a `run_id` (from the CLI, the Wherobots UI, or a prior SDK
210
+ session) you can attach without a script:
211
+
212
+ ```python
213
+ from wherobots import WherobotsJob
214
+
215
+ job = WherobotsJob.from_run_id("run-abc-123")
216
+ print(job.status, job.name)
217
+
218
+ # Stream remaining logs
219
+ job.poll_for_logs(follow=False)
220
+
221
+ # Or paginate
222
+ for entry in job.iter_logs(size=200):
223
+ print(entry["raw"])
224
+
225
+ # Aggregated utilization for a completed run
226
+ cpu = job.get_cpu_utilization()
227
+ print(f"CPU peak {cpu.max}, avg {cpu.avg}, samples {len(cpu.series)}")
228
+
229
+ # Billing
230
+ print(f"cost ${job.get_cost():.2f}, SUs {job.get_consumed_spatial_units()}")
231
+ ```
232
+
233
+ Calling `job.submit()` on an attached instance raises
234
+ `WherobotsValidationError` — these instances are read-only.
200
235
 
201
236
  #### Listing Runs
202
237
 
@@ -239,6 +274,80 @@ job = WherobotsJob(
239
274
  )
240
275
  ```
241
276
 
277
+ ##### Local Dependency Files
278
+
279
+ `add_file_dependency` also accepts a **local path** to a `.whl`, `.jar`, `.zip`,
280
+ or `.json`. The SDK uploads the file just before the job is submitted, fetching
281
+ short-lived AWS STS credentials from the Wherobots API (`POST /storage/credentials`)
282
+ and using boto3 multipart under the hood — no AWS credentials need to leave your
283
+ environment.
284
+
285
+ ```python
286
+ job = WherobotsJob(
287
+ script="my_job.py", # local script auto-upload still works
288
+ name="job-with-local-wheel",
289
+ dependencies=[
290
+ # Local wheel — uploaded to managed storage at submit() time
291
+ WherobotsJob.add_file_dependency("dist/mypkg-0.1.0-py3-none-any.whl"),
292
+ # Already-uploaded artifact — used as-is
293
+ WherobotsJob.add_file_dependency("s3://bucket/shared-libs/lib-2.0.jar"),
294
+ WherobotsJob.add_pypi_dependency("pandas", "2.0.3"),
295
+ ],
296
+ )
297
+ job.submit()
298
+ ```
299
+
300
+ Auto-upload always targets the org's **managed storage**:
301
+
302
+ | Artifact | Destination |
303
+ |---|---|
304
+ | Script (when `script` is a local path) | `s3://<managed.path>/scripts/<filename>` |
305
+ | Dependency (when path is a local file) | `s3://<managed.path>/dependencies/<filename>` |
306
+
307
+ Local dependency uploads honor the same `auto_upload` flag as the script —
308
+ if `auto_upload=False`, attempting to submit a job with a local dependency
309
+ path raises `WherobotsValidationError`. The 5 GiB cap on a single upload
310
+ applies; for larger artifacts, pre-upload and pass an `s3://` URI.
311
+
312
+ ### Uploading to a Storage Integration
313
+
314
+ `WherobotsJob` stays focused on job orchestration — it doesn't take any
315
+ "where files go" kwargs. To upload to a [Storage Integration](https://docs.wherobots.com/latest/develop/storage-management/storage/)
316
+ or a custom subdirectory, use `FilesAPI` directly and pass the resulting
317
+ `s3://` URI to `WherobotsJob`:
318
+
319
+ ```python
320
+ from wherobots import FilesAPI, WherobotsJob
321
+ from wherobots.config import WherobotsConfig
322
+
323
+ with FilesAPI.from_config(WherobotsConfig.from_env()) as files:
324
+ si = files.resolve_integration("customer-bucket")
325
+ # Build explicit destinations; upload_file is the "upload this file
326
+ # to this exact s3 path" primitive.
327
+ script_uri = files.upload_file(
328
+ "my_job.py",
329
+ files.dest_uri_for(si, "my_job.py", "job-scripts/sdk-e2e"),
330
+ )
331
+ wheel_uri = files.upload_file(
332
+ "dist/internal-lib-1.0.whl",
333
+ files.dest_uri_for(si, "internal-lib-1.0.whl", "job-scripts/sdk-e2e"),
334
+ )
335
+
336
+ job = WherobotsJob(
337
+ script=script_uri, # already-uploaded URI
338
+ name="job-in-integration",
339
+ dependencies=[WherobotsJob.add_file_dependency(wheel_uri)],
340
+ )
341
+ job.submit()
342
+ ```
343
+
344
+ Both artifacts land under `s3://<integration.path>/job-scripts/sdk-e2e/`.
345
+
346
+ `FilesAPI` is also where to look for low-level controls (custom managed
347
+ subdirs, raw `s3://` destinations, STS credential fetch for BYO upload
348
+ tooling). See `FilesAPI.upload_file`, `upload_managed_file`,
349
+ `upload_dependency`, `dest_uri_for`, and `get_storage_credentials`.
350
+
242
351
  #### JAR Jobs
243
352
 
244
353
  ```python
@@ -26,6 +26,7 @@ classifiers = [
26
26
  keywords = ["wherobots", "spark", "sedona", "geospatial", "jobs", "sdk", "rest-api"]
27
27
  dependencies = [
28
28
  "requests>=2.33.0",
29
+ "boto3>=1.34.0",
29
30
  ]
30
31
 
31
32
  [project.urls]
@@ -85,6 +86,13 @@ show_error_codes = true
85
86
  module = "tests.*"
86
87
  ignore_errors = true
87
88
 
89
+ [[tool.mypy.overrides]]
90
+ # boto3 / botocore ship runtime types in some versions but the
91
+ # pre-commit mypy env doesn't install them. Declare the policy once
92
+ # here rather than scattering per-import ``# type: ignore`` comments.
93
+ module = ["boto3", "boto3.*", "botocore", "botocore.*"]
94
+ ignore_missing_imports = true
95
+
88
96
  [tool.pytest.ini_options]
89
97
  testpaths = ["tests"]
90
98
  python_files = ["test_*.py"]