PyPI - streamrelay - Versions diffs - 0.1.0__py3-none-any.whl - Mend

streamrelay 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

streamrelay/__init__.py +45 -0
streamrelay/consumer.py +249 -0
streamrelay/crypto.py +170 -0
streamrelay/executor.py +146 -0
streamrelay/producer.py +267 -0
streamrelay/server.py +533 -0
streamrelay-0.1.0.dist-info/METADATA +788 -0
streamrelay-0.1.0.dist-info/RECORD +11 -0
streamrelay-0.1.0.dist-info/WHEEL +4 -0
streamrelay-0.1.0.dist-info/entry_points.txt +2 -0
streamrelay-0.1.0.dist-info/licenses/LICENSE +153 -0

streamrelay/__init__.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""
+streamrelay — Real-time token streaming from batch HPC executors via WebSocket relay.
+Solves a fundamental gap: HPC job schedulers (Globus Compute, SLURM, PBS) execute
+functions to completion and return a single result. This library adds a lightweight
+bidirectional channel so tokens stream out of the compute node in real time, with
+both ends connecting *outbound* to the relay (no inbound ports needed, no VPN).
+Basic usage:
+    # On the HPC compute node (producer)
+    from streamrelay import RelayProducer
+    with RelayProducer(relay_url, channel_id) as relay:
+        for token in your_model_stream(prompt):
+            relay.send_token(token)
+    # "done" signal sent automatically on exit
+    # On your client/middleware (consumer)
+    from streamrelay import RelayConsumer
+    for token in RelayConsumer(relay_url, channel_id).stream():
+        print(token, end="", flush=True)
+    # High-level: submit a Globus Compute function and stream its output
+    from streamrelay import StreamingExecutor
+    async with StreamingExecutor(endpoint_id, relay_url) as executor:
+        async for token in executor.stream(my_fn, prompt="Hello"):
+            print(token, end="", flush=True)
+"""
+from streamrelay.consumer import RelayConsumer
+from streamrelay.producer import RelayProducer
+from streamrelay.server import start_relay
+from streamrelay.crypto import encrypt_message, decrypt_message, generate_key
+from streamrelay.executor import StreamingExecutor
+__version__ = "0.1.0"
+__all__ = [
+    "RelayProducer",
+    "RelayConsumer",
+    "StreamingExecutor",
+    "start_relay",
+    "generate_key",
+    "encrypt_message",
+    "decrypt_message",
+]

streamrelay/consumer.py ADDED Viewed

@@ -0,0 +1,249 @@
+"""
+streamrelay.consumer — Receive tokens from the relay on the client side.
+WHAT THIS FILE DOES
+===================
+This module runs on YOUR MACHINE (or your web server) — the application side
+that wants to display or process tokens as they arrive from the HPC compute node.
+RelayConsumer connects to the relay as a consumer and yields token strings one
+by one as the producer sends them. From the caller's perspective, it looks just
+like iterating over any Python generator:
+  for token in consumer.stream():
+      print(token, end="", flush=True)
+All the WebSocket complexity — connecting, receiving, decrypting, parsing JSON,
+detecting the end of the stream — is handled internally.
+TWO ITERATION STYLES
+====================
+  # Synchronous — use in plain scripts, notebooks, CLI tools
+  consumer = RelayConsumer(relay_url, channel_id)
+  for token in consumer.stream():
+      print(token, end="", flush=True)
+  # Asynchronous — use in FastAPI, aiohttp, or any asyncio application
+  async for token in RelayConsumer(relay_url, channel_id):
+      yield f"data: {token}\\n\\n"   # forward as Server-Sent Events to browser
+The async version also supports ``await consumer.acollect()`` to get the full
+response as a single string.
+TIMING
+======
+You should connect the consumer BEFORE submitting the HPC job. The relay
+buffers any tokens that arrive before you connect, so you won't miss the
+beginning of the response even if the job starts faster than expected.
+  channel_id = str(uuid.uuid4())
+  # Submit job first, consumer second — or consumer first, doesn't matter.
+  # The relay handles both orderings via buffering.
+  submit_slurm_job(relay_url, channel_id)
+  for token in RelayConsumer(relay_url, channel_id).stream():
+      ...
+"""
+import json
+import logging
+from typing import AsyncIterator, Iterator
+logger = logging.getLogger(__name__)
+class RelayConsumer:
+    """
+    WebSocket client that receives tokens from the relay.
+    Args:
+        relay_url: WebSocket URL of the relay server.
+            Example: ``"wss://relay.example.com"`` (production)
+                  or ``"ws://localhost:8765"`` (local development)
+        channel_id: UUID string that pairs this consumer with its producer.
+            Must be the same value that was passed to RelayProducer.
+        encryption_key: Optional base64-encoded AES-256 key for decryption.
+            Must match the key used by the producer. When set, each received
+            message is decrypted before being parsed and yielded.
+        relay_secret: Optional shared secret for relay authentication.
+            Must match the relay server's ``--secret`` flag.
+    """
+    def __init__(
+        self,
+        relay_url: str,
+        channel_id: str,
+        encryption_key: str = "",
+        relay_secret: str = "",
+    ):
+        self.relay_url = relay_url.rstrip("/")
+        self.channel_id = channel_id
+        self.encryption_key = encryption_key
+        self.relay_secret = relay_secret
+    # -----------------------------------------------------------------------
+    # Internal helpers
+    # -----------------------------------------------------------------------
+    def _consume_url(self) -> str:
+        """Build the /consume/{channel_id} URL, appending ?secret= if needed."""
+        url = f"{self.relay_url}/consume/{self.channel_id}"
+        if self.relay_secret:
+            url += f"?secret={self.relay_secret}"
+        return url
+    def _decrypt(self, msg_str: str) -> str:
+        """
+        Decrypt a message if encryption is configured; otherwise pass through.
+        The relay forwards messages as-is. If the producer encrypted them with
+        AES-256-GCM (wrapping in {"type":"enc","d":"<base64blob>"}), this
+        function unwraps and decrypts them back to the original JSON string.
+        """
+        if self.encryption_key:
+            from streamrelay.crypto import decrypt_message
+            return decrypt_message(self.encryption_key, msg_str)
+        return msg_str
+    def _parse_and_yield(self, raw: str):
+        """
+        Parse a raw WebSocket message and return the appropriate action.
+        Returns:
+          ("token", content)   — yield this token string to the caller
+          ("done", None)       — stop iteration
+          ("error", message)   — raise RuntimeError
+          ("skip", None)       — ignore this message (unknown type)
+        """
+        msg_str = self._decrypt(raw)
+        msg = json.loads(msg_str)
+        msg_type = msg.get("type")
+        if msg_type == "token":
+            return ("token", msg["content"])
+        elif msg_type == "done":
+            return ("done", None)
+        elif msg_type == "error":
+            return ("error", msg.get("message", "unknown error from producer"))
+        else:
+            return ("skip", None)
+    # -----------------------------------------------------------------------
+    # Synchronous iterator
+    # -----------------------------------------------------------------------
+    def stream(self) -> Iterator[str]:
+        """
+        Connect to the relay and yield token strings synchronously.
+        Blocks until each token arrives. Returns (stops iteration) when the
+        producer sends a "done" message. The WebSocket connection is closed
+        automatically when the generator exits.
+        Yields:
+            str: Each token string in arrival order.
+        Raises:
+            RuntimeError: If the producer sent an "error" message.
+        Example::
+            for token in RelayConsumer(relay_url, channel_id).stream():
+                print(token, end="", flush=True)
+        """
+        from websockets.sync.client import connect as ws_connect
+        url = self._consume_url()
+        logger.debug(f"[streamrelay] consumer connecting: channel={self.channel_id[:8]}")
+        with ws_connect(url) as ws:
+            for raw in ws:
+                # Each raw message from the relay is one JSON string.
+                action, value = self._parse_and_yield(raw)
+                if action == "token":
+                    yield value
+                elif action == "done":
+                    return  # clean end of stream
+                elif action == "error":
+                    raise RuntimeError(f"Producer error: {value}")
+                # "skip": unknown message type, ignore and continue
+    # -----------------------------------------------------------------------
+    # Asynchronous iterator
+    # -----------------------------------------------------------------------
+    def __aiter__(self):
+        """
+        Enable ``async for token in RelayConsumer(...)`` syntax.
+        Returns the async generator from astream(). This lets you use a
+        RelayConsumer directly in an ``async for`` loop without calling
+        .astream() explicitly.
+        """
+        return self.astream()
+    async def astream(self) -> AsyncIterator[str]:
+        """
+        Connect to the relay and yield token strings asynchronously.
+        Non-blocking: yields control to the event loop while waiting for
+        each token. Suitable for FastAPI route handlers, aiohttp servers,
+        or any asyncio application.
+        Yields:
+            str: Each token string in arrival order.
+        Raises:
+            RuntimeError: If the producer sent an "error" message.
+        Example (FastAPI SSE endpoint)::
+            @app.get("/stream")
+            async def stream():
+                async def generate():
+                    async for token in RelayConsumer(relay_url, channel_id):
+                        yield f"data: {token}\\n\\n"
+                return StreamingResponse(generate(), media_type="text/event-stream")
+        """
+        from websockets.asyncio.client import connect as ws_connect
+        url = self._consume_url()
+        logger.debug(f"[streamrelay] async consumer connecting: channel={self.channel_id[:8]}")
+        async with ws_connect(url) as ws:
+            async for raw in ws:
+                action, value = self._parse_and_yield(raw)
+                if action == "token":
+                    yield value
+                elif action == "done":
+                    return
+                elif action == "error":
+                    raise RuntimeError(f"Producer error: {value}")
+    # -----------------------------------------------------------------------
+    # Convenience: collect the full response as a single string
+    # -----------------------------------------------------------------------
+    def collect(self) -> str:
+        """
+        Stream all tokens and join them into a single string (blocking).
+        Useful when you want the complete response but don't need to display
+        it incrementally.
+        Returns:
+            str: The complete generated text.
+        """
+        return "".join(self.stream())
+    async def acollect(self) -> str:
+        """
+        Async version of collect().
+        Returns:
+            str: The complete generated text.
+        """
+        parts = []
+        async for token in self.astream():
+            parts.append(token)
+        return "".join(parts)

streamrelay/crypto.py ADDED Viewed

@@ -0,0 +1,170 @@
+"""
+streamrelay.crypto — AES-256-GCM end-to-end encryption for relay messages.
+WHAT THIS FILE DOES
+===================
+The relay server is a public intermediary: it sees every message that flows
+between the producer (HPC node) and the consumer (your application). By default
+that means the relay operator can read token payloads.
+This module adds optional end-to-end encryption so that the relay only ever
+sees opaque ciphertext. The producer encrypts before sending; the consumer
+decrypts after receiving. The relay cannot read anything.
+WHY AES-256-GCM
+===============
+AES-256-GCM is the standard choice for this use case:
+  - AES-256: 256-bit key — computationally unbreakable with current hardware.
+  - GCM (Galois/Counter Mode): "authenticated encryption" — provides both
+    confidentiality (nobody can read the message) AND integrity (any tampering
+    at the relay is detected and raises an exception at decrypt time).
+  - Fresh nonce per message: GCM requires a unique nonce (number-used-once)
+    for every encryption. We use os.urandom(12) — cryptographically random
+    12 bytes — so even if you send the same token twice, the ciphertexts are
+    different. This prevents replay and pattern-analysis attacks.
+WIRE FORMAT
+===========
+An encrypted message is a JSON string wrapping a single base64-encoded blob:
+  {"type": "enc", "d": "<base64(nonce[12 bytes] + ciphertext + tag[16 bytes])>"}
+The nonce (12 bytes) and GCM authentication tag (16 bytes) are packed together
+with the ciphertext into a single base64 blob. This makes it easy to pass over
+JSON without any binary escaping.
+The relay forwards this JSON string unchanged. It doesn't know or care that
+it contains encrypted data.
+BACKWARD COMPATIBILITY
+======================
+If decrypt_message() receives a message that is NOT of type "enc" (i.e. an
+unencrypted message), it passes it through unchanged. This means you can
+enable encryption on a running system without breaking existing unencrypted
+connections.
+SETUP
+=====
+Generate a key once and share it between the producer and consumer via
+environment variables or a secrets manager:
+  python -c "from streamrelay import generate_key; print(generate_key())"
+  # Outputs something like: xK3mP9vQ...  (44 characters, base64-encoded)
+Store it in your .env file:
+  RELAY_ENCRYPTION_KEY=xK3mP9vQ...
+Then pass it to both sides:
+  RelayProducer(relay_url, channel_id, encryption_key=os.getenv("RELAY_ENCRYPTION_KEY"))
+  RelayConsumer(relay_url, channel_id, encryption_key=os.getenv("RELAY_ENCRYPTION_KEY"))
+"""
+import base64
+import json
+import os
+from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+# Standard sizes for AES-GCM (defined by NIST SP 800-38D)
+_NONCE_SIZE = 12  # 96 bits — the recommended nonce length for GCM
+_TAG_SIZE = 16    # 128 bits — GCM appends this authentication tag automatically
+def generate_key() -> str:
+    """
+    Generate a random AES-256 encryption key.
+    Returns a base64-encoded string suitable for storing in a .env file or
+    passing as an environment variable. Run this once per deployment and
+    keep the key secret.
+    Returns:
+        str: Base64-encoded 32-byte (256-bit) key, e.g. ``"xK3mP9vQ..."``
+    Example::
+        from streamrelay import generate_key
+        key = generate_key()
+        print(key)   # store this in your .env as RELAY_ENCRYPTION_KEY
+    """
+    return base64.b64encode(os.urandom(32)).decode()
+    # os.urandom(32): 32 cryptographically random bytes from the OS entropy pool
+    # base64.b64encode: converts raw bytes to a printable ASCII string
+def encrypt_message(key_b64: str, plaintext_json: str) -> str:
+    """
+    Encrypt a JSON string and return the relay wire format.
+    Takes a plaintext JSON message (e.g. ``'{"type":"token","content":"Hello"}'``)
+    and returns an encrypted JSON string in the relay wire format:
+    ``'{"type": "enc", "d": "<base64blob>"}'``
+    The relay forwards this opaque blob. The consumer calls decrypt_message()
+    to recover the original plaintext.
+    Args:
+        key_b64: Base64-encoded 32-byte AES-256 key (from generate_key()).
+        plaintext_json: Any JSON string to encrypt.
+    Returns:
+        str: JSON string ``{"type": "enc", "d": "<base64(nonce+ciphertext+tag)>"}``
+    """
+    key = base64.b64decode(key_b64)         # decode base64 key → 32 raw bytes
+    nonce = os.urandom(_NONCE_SIZE)          # fresh random nonce for every message
+    aesgcm = AESGCM(key)
+    # aesgcm.encrypt() returns ciphertext + authentication tag (tag is appended
+    # automatically by the GCM implementation — we don't need to handle it separately)
+    ciphertext_with_tag = aesgcm.encrypt(nonce, plaintext_json.encode(), None)
+    # Pack nonce + ciphertext+tag into one base64 string.
+    # The recipient needs the nonce to decrypt, so it must travel with the message.
+    blob = base64.b64encode(nonce + ciphertext_with_tag).decode()
+    return json.dumps({"type": "enc", "d": blob})
+def decrypt_message(key_b64: str, msg_str: str) -> str:
+    """
+    Decrypt a relay message, or pass through if it is not encrypted.
+    If the message has ``"type": "enc"``, decrypt it and return the original
+    plaintext JSON string. If the message has any other type, return it unchanged
+    (backward-compatible passthrough for unencrypted messages).
+    Args:
+        key_b64: Base64-encoded 32-byte AES-256 key (must match the producer's key).
+        msg_str: JSON string received from the relay.
+    Returns:
+        str: Decrypted inner JSON string, or original ``msg_str`` if not encrypted.
+    Raises:
+        cryptography.exceptions.InvalidTag: If the ciphertext was tampered with.
+            This means the relay (or someone in between) modified the message.
+            Treat this as a security event.
+    """
+    msg = json.loads(msg_str)
+    if msg.get("type") != "enc":
+        # Not an encrypted message — pass through unchanged.
+        # This allows the consumer to handle both encrypted and unencrypted
+        # messages on the same channel (useful during a rolling migration).
+        return msg_str
+    # Decode the blob back into raw bytes
+    blob = base64.b64decode(msg["d"])
+    # Unpack: first 12 bytes are the nonce, the rest is ciphertext+tag
+    nonce = blob[:_NONCE_SIZE]
+    ciphertext_with_tag = blob[_NONCE_SIZE:]
+    key = base64.b64decode(key_b64)
+    aesgcm = AESGCM(key)
+    # aesgcm.decrypt() verifies the GCM authentication tag before decrypting.
+    # If the ciphertext was modified in any way, it raises InvalidTag instead
+    # of returning corrupted plaintext — this is the integrity guarantee.
+    plaintext = aesgcm.decrypt(nonce, ciphertext_with_tag, None)
+    return plaintext.decode()

streamrelay/executor.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+streamrelay.executor — High-level API for streaming from Globus Compute.
+This is the primary user-facing class. It wraps channel ID management, Globus
+job submission, and relay consumption into a single ``async for`` loop::
+    from streamrelay import StreamingExecutor
+    async with StreamingExecutor(endpoint_id, relay_url, secret, key) as executor:
+        async for token in executor.stream(my_vllm_function, prompt="Hello"):
+            print(token, end="", flush=True)
+The ``stream()`` method:
+  1. Generates a random channel ID.
+  2. Submits ``fn`` to the Globus Compute endpoint with the channel ID and relay
+     URL passed as extra keyword arguments.
+  3. Immediately connects to the relay as a consumer and yields tokens as they
+     arrive — without waiting for Globus to complete.
+``fn`` must accept two extra kwargs automatically injected by the executor:
+  ``relay_url`` (str) and ``channel_id`` (str).
+  Optionally also ``relay_secret`` and ``encryption_key`` if you set those.
+If ``streamrelay`` is installed on the HPC endpoint workers, ``fn`` can use
+``RelayProducer`` directly. If not, embed the inline pattern from
+``remote_vllm_streaming`` in STREAM's ``globus_compute_client.py``.
+"""
+import uuid
+from collections.abc import AsyncIterator
+from typing import Callable
+class StreamingExecutor:
+    """Submit a Globus Compute function and receive its output via relay.
+    Args:
+        endpoint_id: Globus Compute endpoint UUID.
+        relay_url: WebSocket URL of the relay server.
+        relay_secret: Optional shared secret (must match relay's ``--secret``).
+        encryption_key: Optional base64 AES-256 key for E2E encryption.
+        consumer_timeout: Seconds to wait for the first token before timing out.
+    """
+    def __init__(
+        self,
+        endpoint_id: str,
+        relay_url: str,
+        relay_secret: str = "",
+        encryption_key: str = "",
+        consumer_timeout: float = 300.0,
+    ):
+        self.endpoint_id = endpoint_id
+        self.relay_url = relay_url
+        self.relay_secret = relay_secret
+        self.encryption_key = encryption_key
+        self.consumer_timeout = consumer_timeout
+        self._executor = None
+    # ------------------------------------------------------------------
+    # Context manager
+    # ------------------------------------------------------------------
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, *args):
+        self.close()
+    # ------------------------------------------------------------------
+    # Lazy Globus executor
+    # ------------------------------------------------------------------
+    def _get_gc_executor(self):
+        if self._executor is None:
+            from globus_compute_sdk import Executor
+            self._executor = Executor(endpoint_id=self.endpoint_id)
+        return self._executor
+    def close(self):
+        """Shut down the underlying Globus Compute executor."""
+        if self._executor is not None:
+            try:
+                self._executor.shutdown(wait=False)
+            except Exception:
+                pass
+            self._executor = None
+    # ------------------------------------------------------------------
+    # Main API
+    # ------------------------------------------------------------------
+    async def stream(self, fn: Callable, *args, **kwargs) -> AsyncIterator[str]:
+        """Submit ``fn`` to the endpoint and stream its output token by token.
+        ``fn`` will be called on the HPC node with ``*args, **kwargs`` PLUS
+        these additional keyword arguments injected automatically:
+        - ``relay_url`` — where to send tokens
+        - ``channel_id`` — this request's unique channel
+        - ``relay_secret`` — auth secret (if configured)
+        - ``encryption_key`` — E2E encryption key (if configured)
+        Args:
+            fn: Callable to submit. Must send tokens to the relay
+                (e.g., use :class:`~streamrelay.producer.RelayProducer`).
+            *args: Positional arguments forwarded to ``fn``.
+            **kwargs: Keyword arguments forwarded to ``fn``.
+        Yields:
+            str: Token strings in arrival order.
+        """
+        channel_id = str(uuid.uuid4())
+        # Inject relay coordinates into the function's kwargs
+        kwargs["relay_url"] = self.relay_url
+        kwargs["channel_id"] = channel_id
+        if self.relay_secret:
+            kwargs["relay_secret"] = self.relay_secret
+        if self.encryption_key:
+            kwargs["encryption_key"] = self.encryption_key
+        # Submit to Globus Compute (non-blocking — returns a Future immediately)
+        gc = self._get_gc_executor()
+        future = gc.submit(fn, *args, **kwargs)
+        # Connect as consumer and yield tokens in real time.
+        # The relay buffers any tokens that arrive before we connect.
+        from streamrelay.consumer import RelayConsumer
+        consumer = RelayConsumer(
+            relay_url=self.relay_url,
+            channel_id=channel_id,
+            encryption_key=self.encryption_key,
+            relay_secret=self.relay_secret,
+        )
+        async for token in consumer.astream():
+            yield token
+        # After streaming, check for Globus-level errors (infrastructure faults).
+        # By this point the HPC function has already completed.
+        try:
+            future.result(timeout=10)
+        except Exception as e:
+            raise RuntimeError(f"Globus Compute reported an error: {e}") from e