temporal-workdir 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: ${{ matrix.python-version }}
20
+ - run: pip install -e ".[dev]"
21
+ - run: ruff check src/ tests/
22
+ - run: ruff format --check src/ tests/
23
+ - run: pytest -v
24
+
25
+ type-check:
26
+ runs-on: ubuntu-latest
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+ - uses: actions/setup-python@v5
30
+ with:
31
+ python-version: "3.13"
32
+ - run: pip install -e ".[dev]"
33
+ - run: pyright src/
@@ -0,0 +1,30 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ release:
13
+ runs-on: ubuntu-latest
14
+ environment: release
15
+ permissions:
16
+ id-token: write
17
+ contents: write
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.13"
23
+ - run: pip install build
24
+ - run: python -m build
25
+ - name: Publish to PyPI
26
+ uses: pypa/gh-action-pypi-publish@release/v1
27
+ - name: Create GitHub Release
28
+ uses: softprops/action-gh-release@v2
29
+ with:
30
+ generate_release_notes: true
@@ -0,0 +1,5 @@
1
+ __pycache__/
2
+ *.egg-info/
3
+ dist/
4
+ build/
5
+ .pytest_cache/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Saeed Seyfi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: temporal-workdir
3
+ Version: 0.1.0
4
+ Summary: Remote-backed workspace sync for Temporal activities
5
+ Project-URL: Homepage, https://github.com/saeedseyfi/temporal-workdir
6
+ Project-URL: Repository, https://github.com/saeedseyfi/temporal-workdir
7
+ Author-email: Saeed Seyfi <me@saeedseyfi.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: distributed,fsspec,temporal,workflow,workspace
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: fsspec>=2024.1.0
18
+ Requires-Dist: temporalio>=1.0.0
19
+ Provides-Extra: dev
20
+ Requires-Dist: pyright>=1.1; extra == 'dev'
21
+ Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
22
+ Requires-Dist: pytest>=7.0; extra == 'dev'
23
+ Requires-Dist: ruff>=0.5.0; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # Workspace Sync for Temporal Activities
27
+
28
+ Sync a local directory with remote storage before and after a Temporal activity. Enables file-based activities to work across distributed workers where disk is not shared.
29
+
30
+ ## Problem
31
+
32
+ Temporal activities that read/write files on local disk break when you scale to multiple worker instances. Each worker has its own disk. This module syncs a remote storage location to a local temp directory before the activity runs, and pushes changes back after.
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install temporal-workdir
38
+
39
+ # With a specific cloud backend:
40
+ pip install temporal-workdir gcsfs # Google Cloud Storage
41
+ pip install temporal-workdir s3fs # Amazon S3
42
+ pip install temporal-workdir adlfs # Azure Blob Storage
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ ### As a context manager (generic, works anywhere)
48
+
49
+ ```python
50
+ from temporal_workdir import Workspace
51
+
52
+ async with Workspace("gs://my-bucket/pipeline/component-x") as ws:
53
+ # ws.path is a local Path — read and write files normally
54
+ data = json.loads((ws.path / "component.json").read_text())
55
+ (ws.path / "result.csv").write_text("col1,col2\nval1,val2")
56
+ # On clean exit: local dir is archived and uploaded
57
+ # On exception: no upload (remote state unchanged)
58
+ ```
59
+
60
+ ### As a Temporal activity decorator
61
+
62
+ ```python
63
+ from temporalio import activity
64
+ from temporal_workdir import workspace, get_workspace_path
65
+
66
+ @workspace("gs://my-bucket/{workflow_id}/{activity_type}")
67
+ @activity.defn
68
+ async def extract(input: ExtractInput) -> ExtractOutput:
69
+ ws = get_workspace_path()
70
+ # Template vars resolved from activity.info()
71
+ source = (ws / "source.json").read_text()
72
+ (ws / "output.csv").write_text(process(source))
73
+ return ExtractOutput(success=True)
74
+ ```
75
+
76
+ ### Custom template variables
77
+
78
+ ```python
79
+ @workspace(
80
+ "gs://my-bucket/{workflow_id}/components/{component}",
81
+ key_fn=lambda input: {"component": input.component_name},
82
+ )
83
+ @activity.defn
84
+ async def register(input: RegisterInput) -> RegisterOutput:
85
+ ws = get_workspace_path()
86
+ ...
87
+ ```
88
+
89
+ ## How It Works
90
+
91
+ 1. **Pull**: On entry, downloads `{remote_url}.tar.gz` and unpacks to a temp directory
92
+ 2. **Execute**: Your activity reads/writes files in the local directory
93
+ 3. **Push**: On clean exit, packs the directory into `tar.gz` and uploads
94
+
95
+ If the archive doesn't exist yet (first run), the local directory starts empty. If the activity raises an exception, no push happens. Remote state is untouched.
96
+
97
+ ## Storage Backends
98
+
99
+ Any backend supported by [fsspec](https://filesystem-spec.readthedocs.io/):
100
+
101
+ | Scheme | Backend | Extra package |
102
+ |--------|---------|--------------|
103
+ | `gs://` | Google Cloud Storage | `gcsfs` |
104
+ | `s3://` | Amazon S3 | `s3fs` |
105
+ | `az://` | Azure Blob Storage | `adlfs` |
106
+ | `file://` | Local filesystem | (none) |
107
+ | `memory://` | In-memory (testing) | (none) |
108
+
109
+ Pass backend-specific options as keyword arguments:
110
+
111
+ ```python
112
+ Workspace("gs://bucket/key", project="my-gcp-project", token="cloud")
113
+ ```
@@ -0,0 +1,88 @@
1
+ # Workspace Sync for Temporal Activities
2
+
3
+ Sync a local directory with remote storage before and after a Temporal activity. Enables file-based activities to work across distributed workers where disk is not shared.
4
+
5
+ ## Problem
6
+
7
+ Temporal activities that read/write files on local disk break when you scale to multiple worker instances. Each worker has its own disk. This module syncs a remote storage location to a local temp directory before the activity runs, and pushes changes back after.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install temporal-workdir
13
+
14
+ # With a specific cloud backend:
15
+ pip install temporal-workdir gcsfs # Google Cloud Storage
16
+ pip install temporal-workdir s3fs # Amazon S3
17
+ pip install temporal-workdir adlfs # Azure Blob Storage
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ ### As a context manager (generic, works anywhere)
23
+
24
+ ```python
25
+ from temporal_workdir import Workspace
26
+
27
+ async with Workspace("gs://my-bucket/pipeline/component-x") as ws:
28
+ # ws.path is a local Path — read and write files normally
29
+ data = json.loads((ws.path / "component.json").read_text())
30
+ (ws.path / "result.csv").write_text("col1,col2\nval1,val2")
31
+ # On clean exit: local dir is archived and uploaded
32
+ # On exception: no upload (remote state unchanged)
33
+ ```
34
+
35
+ ### As a Temporal activity decorator
36
+
37
+ ```python
38
+ from temporalio import activity
39
+ from temporal_workdir import workspace, get_workspace_path
40
+
41
+ @workspace("gs://my-bucket/{workflow_id}/{activity_type}")
42
+ @activity.defn
43
+ async def extract(input: ExtractInput) -> ExtractOutput:
44
+ ws = get_workspace_path()
45
+ # Template vars resolved from activity.info()
46
+ source = (ws / "source.json").read_text()
47
+ (ws / "output.csv").write_text(process(source))
48
+ return ExtractOutput(success=True)
49
+ ```
50
+
51
+ ### Custom template variables
52
+
53
+ ```python
54
+ @workspace(
55
+ "gs://my-bucket/{workflow_id}/components/{component}",
56
+ key_fn=lambda input: {"component": input.component_name},
57
+ )
58
+ @activity.defn
59
+ async def register(input: RegisterInput) -> RegisterOutput:
60
+ ws = get_workspace_path()
61
+ ...
62
+ ```
63
+
64
+ ## How It Works
65
+
66
+ 1. **Pull**: On entry, downloads `{remote_url}.tar.gz` and unpacks to a temp directory
67
+ 2. **Execute**: Your activity reads/writes files in the local directory
68
+ 3. **Push**: On clean exit, packs the directory into `tar.gz` and uploads
69
+
70
+ If the archive doesn't exist yet (first run), the local directory starts empty. If the activity raises an exception, no push happens. Remote state is untouched.
71
+
72
+ ## Storage Backends
73
+
74
+ Any backend supported by [fsspec](https://filesystem-spec.readthedocs.io/):
75
+
76
+ | Scheme | Backend | Extra package |
77
+ |--------|---------|--------------|
78
+ | `gs://` | Google Cloud Storage | `gcsfs` |
79
+ | `s3://` | Amazon S3 | `s3fs` |
80
+ | `az://` | Azure Blob Storage | `adlfs` |
81
+ | `file://` | Local filesystem | (none) |
82
+ | `memory://` | In-memory (testing) | (none) |
83
+
84
+ Pass backend-specific options as keyword arguments:
85
+
86
+ ```python
87
+ Workspace("gs://bucket/key", project="my-gcp-project", token="cloud")
88
+ ```
@@ -0,0 +1,46 @@
1
+ [project]
2
+ name = "temporal-workdir"
3
+ dynamic = ["version"]
4
+ description = "Remote-backed workspace sync for Temporal activities"
5
+ authors = [{ name = "Saeed Seyfi", email = "me@saeedseyfi.com" }]
6
+ requires-python = ">=3.10"
7
+ readme = "README.md"
8
+ license = "MIT"
9
+ keywords = ["temporal", "workflow", "distributed", "fsspec", "workspace"]
10
+ dependencies = [
11
+ "temporalio>=1.0.0",
12
+ "fsspec>=2024.1.0",
13
+ ]
14
+ classifiers = [
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ ]
21
+
22
+ [project.optional-dependencies]
23
+ dev = [
24
+ "pytest>=7.0",
25
+ "pytest-asyncio>=0.21",
26
+ "ruff>=0.5.0",
27
+ "pyright>=1.1",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/saeedseyfi/temporal-workdir"
32
+ Repository = "https://github.com/saeedseyfi/temporal-workdir"
33
+
34
+ [build-system]
35
+ requires = ["hatchling", "hatch-vcs"]
36
+ build-backend = "hatchling.build"
37
+
38
+ [tool.hatch.version]
39
+ source = "vcs"
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["src/temporal_workdir"]
43
+
44
+ [tool.pytest.ini_options]
45
+ asyncio_mode = "auto"
46
+ testpaths = ["tests"]
@@ -0,0 +1,20 @@
1
+ """Remote workspace sync for Temporal activities.
2
+
3
+ This package provides a :class:`Workspace` that syncs a local directory with
4
+ remote storage (GCS, S3, Azure, local, etc.) before and after a Temporal
5
+ activity executes. This enables file-based activities to work correctly across
6
+ distributed workers where disk state is not shared.
7
+
8
+ The storage backend is auto-detected from the URL scheme via `fsspec`_.
9
+
10
+ .. _fsspec: https://filesystem-spec.readthedocs.io/
11
+ """
12
+
13
+ from temporal_workdir._temporal import get_workspace_path, workspace
14
+ from temporal_workdir._workspace import Workspace
15
+
16
+ __all__ = [
17
+ "Workspace",
18
+ "get_workspace_path",
19
+ "workspace",
20
+ ]
@@ -0,0 +1,43 @@
1
+ """Archive utilities for packing/unpacking workspace directories."""
2
+
3
+ import io
4
+ import tarfile
5
+ from pathlib import Path
6
+
7
+
8
+ def pack(directory: Path) -> bytes:
9
+ """Pack a directory into a gzipped tar archive.
10
+
11
+ Args:
12
+ directory: Local directory to archive. Must exist.
13
+
14
+ Returns:
15
+ The tar.gz archive as bytes.
16
+ """
17
+ buf = io.BytesIO()
18
+ with tarfile.open(fileobj=buf, mode="w:gz") as tar:
19
+ for entry in sorted(directory.rglob("*")):
20
+ if entry.is_file():
21
+ arcname = str(entry.relative_to(directory))
22
+ tar.add(str(entry), arcname=arcname)
23
+ return buf.getvalue()
24
+
25
+
26
+ def unpack(data: bytes, directory: Path) -> None:
27
+ """Unpack a gzipped tar archive into a directory.
28
+
29
+ Args:
30
+ data: The tar.gz archive bytes.
31
+ directory: Target directory. Created if it doesn't exist.
32
+ """
33
+ directory.mkdir(parents=True, exist_ok=True)
34
+ buf = io.BytesIO(data)
35
+ with tarfile.open(fileobj=buf, mode="r:gz") as tar:
36
+ # Security: prevent path traversal
37
+ for member in tar.getmembers():
38
+ member_path = Path(directory / member.name).resolve()
39
+ if not str(member_path).startswith(str(directory.resolve())):
40
+ raise ValueError(
41
+ f"Archive member {member.name!r} would escape target directory"
42
+ )
43
+ tar.extractall(path=str(directory), filter="data")
@@ -0,0 +1,112 @@
1
+ """Temporal-specific integration for Workspace."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextvars
6
+ import functools
7
+ from collections.abc import Callable
8
+ from pathlib import Path
9
+ from typing import Any, TypeVar
10
+
11
+ import temporalio.activity
12
+
13
+ from temporal_workdir._workspace import Workspace
14
+
15
+ F = TypeVar("F", bound=Callable[..., Any])
16
+
17
+ _current_workspace_path: contextvars.ContextVar[Path | None] = contextvars.ContextVar(
18
+ "_current_workspace_path", default=None
19
+ )
20
+
21
+
22
+ def workspace(
23
+ remote_url_template: str,
24
+ key_fn: Callable[..., dict[str, str]] | None = None,
25
+ **workspace_kwargs: Any,
26
+ ) -> Callable[[F], F]:
27
+ """Decorator that provides a :class:`Workspace` to a Temporal activity.
28
+
29
+ The workspace path is available via :func:`get_workspace_path` inside
30
+ the activity body. The workspace is pulled before execution and pushed
31
+ after successful completion.
32
+
33
+ Template variables in ``remote_url_template`` are resolved from
34
+ :func:`temporalio.activity.info` and, optionally, from ``key_fn``.
35
+
36
+ Built-in template variables (from ``activity.info()``):
37
+
38
+ - ``{workflow_id}`` — stable across retries
39
+ - ``{activity_id}`` — unique per scheduling within a workflow
40
+ - ``{activity_type}`` — the activity name
41
+ - ``{task_queue}``
42
+
43
+ Example::
44
+
45
+ @workspace("gs://bucket/{workflow_id}/{activity_type}")
46
+ @activity.defn
47
+ async def process(input: ProcessInput) -> Output:
48
+ ws = get_workspace_path()
49
+ data = json.loads((ws / "config.json").read_text())
50
+ ...
51
+
52
+ # With key_fn for custom template vars:
53
+ @workspace(
54
+ "gs://bucket/{workflow_id}/{component}",
55
+ key_fn=lambda input: {"component": input.component_name},
56
+ )
57
+ @activity.defn
58
+ async def process(input: ProcessInput) -> Output:
59
+ ...
60
+
61
+ Args:
62
+ remote_url_template: URL template with ``{var}`` placeholders.
63
+ key_fn: Optional function that receives the activity's positional
64
+ arguments and returns a dict of additional template variables.
65
+ **workspace_kwargs: Extra keyword arguments forwarded to
66
+ :class:`Workspace` (e.g., ``cleanup="keep"``).
67
+ """
68
+
69
+ def decorator(fn: F) -> F:
70
+ @functools.wraps(fn)
71
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
72
+ info = temporalio.activity.info()
73
+ template_vars: dict[str, str] = {
74
+ "workflow_id": info.workflow_id or "",
75
+ "activity_id": info.activity_id,
76
+ "activity_type": info.activity_type,
77
+ "task_queue": info.task_queue,
78
+ }
79
+ if key_fn is not None:
80
+ template_vars.update(key_fn(*args))
81
+
82
+ remote_url = remote_url_template.format(**template_vars)
83
+
84
+ async with Workspace(remote_url, **workspace_kwargs) as ws:
85
+ token = _current_workspace_path.set(ws.path)
86
+ try:
87
+ return await fn(*args, **kwargs)
88
+ finally:
89
+ _current_workspace_path.reset(token)
90
+
91
+ return wrapper # type: ignore[return-value]
92
+
93
+ return decorator
94
+
95
+
96
+ def get_workspace_path() -> Path:
97
+ """Get the workspace path for the currently executing activity.
98
+
99
+ Call this from inside an activity decorated with :func:`workspace`.
100
+
101
+ Returns:
102
+ The local workspace :class:`~pathlib.Path`.
103
+
104
+ Raises:
105
+ RuntimeError: If called outside a workspace-decorated activity.
106
+ """
107
+ path = _current_workspace_path.get(None)
108
+ if path is None:
109
+ raise RuntimeError(
110
+ "get_workspace_path() called outside a workspace-decorated activity"
111
+ )
112
+ return path
@@ -0,0 +1,130 @@
1
+ """Core Workspace class for syncing file trees with remote storage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ import tempfile
7
+ from pathlib import Path
8
+ from typing import Literal
9
+ from urllib.parse import urlparse
10
+
11
+ import fsspec
12
+
13
+ from temporal_workdir._archive import pack, unpack
14
+
15
+
16
+ class Workspace:
17
+ """Sync a local directory with a remote storage location.
18
+
19
+ A Workspace maps a remote URL (the "key") to a local directory. On entry,
20
+ the remote archive is downloaded and unpacked. On clean exit, the local
21
+ directory is packed and uploaded back.
22
+
23
+ Works with any storage backend supported by fsspec (GCS, S3, Azure, local
24
+ filesystem, etc.). The backend is auto-detected from the URL scheme.
25
+
26
+ Usage::
27
+
28
+ async with Workspace("gs://bucket/state/component-x") as ws:
29
+ data = json.loads((ws.path / "component.json").read_text())
30
+ (ws.path / "output.csv").write_text("a,b\\n1,2")
31
+ # On clean exit: local dir archived and uploaded to remote
32
+
33
+ Args:
34
+ remote_url: Remote storage URL. The scheme determines the fsspec
35
+ backend (``gs://`` for GCS, ``s3://`` for S3, ``file://`` for
36
+ local, etc.). An ``.tar.gz`` suffix is appended automatically
37
+ for the archive file.
38
+ local_path: Local directory to use as the working copy. If ``None``,
39
+ a temporary directory is created.
40
+ cleanup: What to do with the local directory after push.
41
+ ``"auto"`` deletes it, ``"keep"`` leaves it in place.
42
+ storage_options: Extra keyword arguments passed to
43
+ ``fsspec.filesystem()``. Use for authentication, project IDs, etc.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ remote_url: str,
49
+ local_path: Path | None = None,
50
+ cleanup: Literal["auto", "keep"] = "auto",
51
+ **storage_options: object,
52
+ ) -> None:
53
+ self._remote_url = remote_url.rstrip("/")
54
+ self._archive_url = self._remote_url + ".tar.gz"
55
+ self._cleanup = cleanup
56
+ self._storage_options = storage_options
57
+
58
+ parsed = urlparse(self._archive_url)
59
+ self._protocol = parsed.scheme or "file"
60
+ # fsspec expects path without scheme for most backends
61
+ self._remote_path = (
62
+ parsed.netloc + parsed.path if parsed.netloc else parsed.path
63
+ )
64
+
65
+ self._fs = fsspec.filesystem(self._protocol, **storage_options)
66
+
67
+ if local_path is not None:
68
+ self._local_path = local_path
69
+ self._owns_tempdir = False
70
+ else:
71
+ self._local_path = Path(tempfile.mkdtemp(prefix="temporal-workdir-"))
72
+ self._owns_tempdir = True
73
+
74
+ @property
75
+ def path(self) -> Path:
76
+ """The local working directory.
77
+
78
+ Read and write files here freely. Changes are pushed to remote storage
79
+ when the context manager exits cleanly.
80
+ """
81
+ return self._local_path
82
+
83
+ async def pull(self) -> None:
84
+ """Download and unpack the remote archive to the local directory.
85
+
86
+ If no archive exists at the remote URL, the local directory is left
87
+ empty (first run). Existing local files are removed before unpacking.
88
+ """
89
+ if not self._fs.exists(self._remote_path):
90
+ self._local_path.mkdir(parents=True, exist_ok=True)
91
+ return
92
+
93
+ data = self._fs.cat_file(self._remote_path)
94
+ # Clear local dir before unpacking to avoid stale files
95
+ if self._local_path.exists():
96
+ shutil.rmtree(self._local_path)
97
+ unpack(data, self._local_path)
98
+
99
+ async def push(self) -> None:
100
+ """Pack the local directory and upload to remote storage.
101
+
102
+ If the local directory is empty, the remote archive is deleted
103
+ (if it exists) to keep storage clean.
104
+ """
105
+ files = list(self._local_path.rglob("*"))
106
+ if not any(f.is_file() for f in files):
107
+ # Empty workspace — remove remote archive if it exists
108
+ if self._fs.exists(self._remote_path):
109
+ self._fs.rm(self._remote_path)
110
+ return
111
+
112
+ data = pack(self._local_path)
113
+ self._fs.pipe_file(self._remote_path, data)
114
+
115
+ async def __aenter__(self) -> Workspace:
116
+ """Pull remote state and return the workspace."""
117
+ await self.pull()
118
+ return self
119
+
120
+ async def __aexit__(
121
+ self,
122
+ exc_type: type[BaseException] | None,
123
+ exc_val: BaseException | None,
124
+ exc_tb: object,
125
+ ) -> None:
126
+ """Push local state on clean exit, then optionally clean up."""
127
+ if exc_type is None:
128
+ await self.push()
129
+ if self._cleanup == "auto" and self._owns_tempdir:
130
+ shutil.rmtree(self._local_path, ignore_errors=True)
File without changes
@@ -0,0 +1,96 @@
1
+ """Tests for the @workspace Temporal decorator."""
2
+
3
+ import json
4
+ import uuid
5
+ from dataclasses import dataclass
6
+ from datetime import timedelta
7
+
8
+ import pytest
9
+
10
+ from temporalio import activity, workflow
11
+ from temporal_workdir import get_workspace_path, workspace
12
+ from temporalio.testing import WorkflowEnvironment
13
+ from temporalio.worker import Worker
14
+
15
+
16
+ @dataclass
17
+ class ComponentInput:
18
+ """Input for test activities."""
19
+
20
+ component_name: str
21
+ data: str
22
+
23
+
24
+ @workspace(
25
+ "memory://temporal-test/{workflow_id}/{component}",
26
+ key_fn=lambda input: {"component": input.component_name},
27
+ )
28
+ @activity.defn
29
+ async def write_component(input: ComponentInput) -> str:
30
+ """Activity that writes data to a workspace."""
31
+ ws = get_workspace_path()
32
+ (ws / "component.json").write_text(
33
+ json.dumps({"name": input.component_name, "data": input.data})
34
+ )
35
+ return f"wrote {input.component_name}"
36
+
37
+
38
+ @workspace(
39
+ "memory://temporal-test/{workflow_id}/{component}",
40
+ key_fn=lambda input: {"component": input.component_name},
41
+ )
42
+ @activity.defn
43
+ async def read_component(input: ComponentInput) -> str:
44
+ """Activity that reads data from a workspace."""
45
+ ws = get_workspace_path()
46
+ content = json.loads((ws / "component.json").read_text())
47
+ return content["data"]
48
+
49
+
50
+ @workflow.defn
51
+ class WriteReadWorkflow:
52
+ """Workflow that writes then reads from a workspace."""
53
+
54
+ @workflow.run
55
+ async def run(self, component_name: str, data: str) -> str:
56
+ input_obj = ComponentInput(component_name=component_name, data=data)
57
+
58
+ await workflow.execute_activity(
59
+ write_component,
60
+ input_obj,
61
+ start_to_close_timeout=timedelta(seconds=30),
62
+ )
63
+
64
+ return await workflow.execute_activity(
65
+ read_component,
66
+ input_obj,
67
+ start_to_close_timeout=timedelta(seconds=30),
68
+ )
69
+
70
+
71
+ class TestWorkspaceDecorator:
72
+ """Tests for the @workspace decorator with real Temporal workers."""
73
+
74
+ @pytest.fixture
75
+ async def env(self) -> WorkflowEnvironment:
76
+ """Start a local Temporal test environment."""
77
+ return await WorkflowEnvironment.start_local()
78
+
79
+ async def test_write_then_read(self, env: WorkflowEnvironment) -> None:
80
+ """Data written in one activity is readable in another."""
81
+ task_queue = str(uuid.uuid4())
82
+
83
+ async with Worker(
84
+ env.client,
85
+ task_queue=task_queue,
86
+ workflows=[WriteReadWorkflow],
87
+ activities=[write_component, read_component],
88
+ ):
89
+ result = await env.client.execute_workflow(
90
+ WriteReadWorkflow.run,
91
+ args=["my-component", "hello-world"],
92
+ id=str(uuid.uuid4()),
93
+ task_queue=task_queue,
94
+ )
95
+
96
+ assert result == "hello-world"
@@ -0,0 +1,162 @@
1
+ """Tests for the Workspace class."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ import pytest
7
+
8
+ from temporal_workdir import Workspace
9
+
10
+
11
+ @pytest.fixture
12
+ def memory_url() -> str:
13
+ """Return a unique memory:// URL for each test."""
14
+ import uuid
15
+
16
+ return f"memory://workdir-test/{uuid.uuid4()}"
17
+
18
+
19
+ class TestWorkspace:
20
+ """Tests for Workspace pull/push lifecycle."""
21
+
22
+ async def test_empty_remote_starts_empty_local(self, memory_url: str) -> None:
23
+ """First run with no remote archive creates an empty local dir."""
24
+ async with Workspace(memory_url) as ws:
25
+ assert ws.path.exists()
26
+ assert list(ws.path.iterdir()) == []
27
+
28
+ async def test_roundtrip_single_file(self, memory_url: str) -> None:
29
+ """Write a file, push, then pull into a new workspace."""
30
+ # Write
31
+ async with Workspace(memory_url) as ws:
32
+ (ws.path / "hello.txt").write_text("world")
33
+
34
+ # Read back
35
+ async with Workspace(memory_url) as ws:
36
+ assert (ws.path / "hello.txt").read_text() == "world"
37
+
38
+ async def test_roundtrip_nested_directories(self, memory_url: str) -> None:
39
+ """Nested directory structures survive the archive round-trip."""
40
+ async with Workspace(memory_url) as ws:
41
+ (ws.path / "a" / "b").mkdir(parents=True)
42
+ (ws.path / "a" / "b" / "deep.json").write_text('{"nested": true}')
43
+ (ws.path / "top.txt").write_text("top")
44
+
45
+ async with Workspace(memory_url) as ws:
46
+ assert json.loads((ws.path / "a" / "b" / "deep.json").read_text()) == {
47
+ "nested": True
48
+ }
49
+ assert (ws.path / "top.txt").read_text() == "top"
50
+
51
+ async def test_overwrite_replaces_previous_state(self, memory_url: str) -> None:
52
+ """Second push replaces the first — no stale files from run 1."""
53
+ # Run 1: write file_a
54
+ async with Workspace(memory_url) as ws:
55
+ (ws.path / "file_a.txt").write_text("a")
56
+
57
+ # Run 2: write file_b only
58
+ async with Workspace(memory_url) as ws:
59
+ # file_a was pulled from run 1
60
+ assert (ws.path / "file_a.txt").exists()
61
+ # Delete file_a, write file_b
62
+ (ws.path / "file_a.txt").unlink()
63
+ (ws.path / "file_b.txt").write_text("b")
64
+
65
+ # Run 3: only file_b should exist
66
+ async with Workspace(memory_url) as ws:
67
+ assert not (ws.path / "file_a.txt").exists()
68
+ assert (ws.path / "file_b.txt").read_text() == "b"
69
+
70
+ async def test_exception_skips_push(self, memory_url: str) -> None:
71
+ """If the activity raises, remote state is not updated."""
72
+ # Write initial state
73
+ async with Workspace(memory_url) as ws:
74
+ (ws.path / "original.txt").write_text("safe")
75
+
76
+ # Fail mid-activity — should not push
77
+ with pytest.raises(RuntimeError, match="boom"):
78
+ async with Workspace(memory_url) as ws:
79
+ (ws.path / "original.txt").write_text("corrupted")
80
+ (ws.path / "new_file.txt").write_text("should not persist")
81
+ raise RuntimeError("boom")
82
+
83
+ # Original state preserved
84
+ async with Workspace(memory_url) as ws:
85
+ assert (ws.path / "original.txt").read_text() == "safe"
86
+ assert not (ws.path / "new_file.txt").exists()
87
+
88
+ async def test_cleanup_auto_removes_tempdir(self, memory_url: str) -> None:
89
+ """Auto cleanup removes the temp directory after exit."""
90
+ async with Workspace(memory_url, cleanup="auto") as ws:
91
+ tmpdir = ws.path
92
+ (ws.path / "file.txt").write_text("data")
93
+
94
+ assert not tmpdir.exists()
95
+
96
+ async def test_cleanup_keep_preserves_dir(self, memory_url: str) -> None:
97
+ """Keep cleanup leaves the local directory in place."""
98
+ async with Workspace(memory_url, cleanup="keep") as ws:
99
+ tmpdir = ws.path
100
+ (ws.path / "file.txt").write_text("data")
101
+
102
+ assert tmpdir.exists()
103
+ assert (tmpdir / "file.txt").read_text() == "data"
104
+ # Manual cleanup
105
+ import shutil
106
+
107
+ shutil.rmtree(tmpdir)
108
+
109
+ async def test_explicit_local_path(self, memory_url: str, tmp_path: Path) -> None:
110
+ """User-specified local_path is used instead of a temp directory."""
111
+ local = tmp_path / "my_workspace"
112
+ async with Workspace(memory_url, local_path=local) as ws:
113
+ assert ws.path == local
114
+ (ws.path / "data.txt").write_text("hello")
115
+
116
+ # With explicit local_path + auto cleanup, dir should still exist
117
+ # (we only auto-clean tempdirs we created)
118
+ assert local.exists()
119
+
120
+ async def test_empty_push_removes_remote_archive(self, memory_url: str) -> None:
121
+ """Pushing an empty directory removes the remote archive."""
122
+ import fsspec
123
+
124
+ fs = fsspec.filesystem("memory")
125
+ archive_path = memory_url.replace("memory://", "") + ".tar.gz"
126
+
127
+ # Create initial state
128
+ async with Workspace(memory_url) as ws:
129
+ (ws.path / "data.txt").write_text("hello")
130
+
131
+ assert fs.exists(archive_path)
132
+
133
+ # Push empty
134
+ async with Workspace(memory_url) as ws:
135
+ (ws.path / "data.txt").unlink()
136
+
137
+ assert not fs.exists(archive_path)
138
+
139
+ async def test_binary_files(self, memory_url: str) -> None:
140
+ """Binary files survive the archive round-trip."""
141
+ binary_data = bytes(range(256))
142
+
143
+ async with Workspace(memory_url) as ws:
144
+ (ws.path / "data.bin").write_bytes(binary_data)
145
+
146
+ async with Workspace(memory_url) as ws:
147
+ assert (ws.path / "data.bin").read_bytes() == binary_data
148
+
149
+
150
+ class TestWorkspaceExplicitPullPush:
151
+ """Tests for using pull/push directly without context manager."""
152
+
153
+ async def test_manual_pull_push(self, memory_url: str) -> None:
154
+ """Pull and push can be called explicitly."""
155
+ ws = Workspace(memory_url)
156
+ await ws.pull()
157
+ (ws.path / "manual.txt").write_text("works")
158
+ await ws.push()
159
+
160
+ ws2 = Workspace(memory_url)
161
+ await ws2.pull()
162
+ assert (ws2.path / "manual.txt").read_text() == "works"