PyPI - tensorzero - Versions diffs - 0.0.1__py3-none-any.whl - Mend

tensorzero 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

tensorzero/__init__.py +29 -0
tensorzero/client.py +194 -0
tensorzero/types.py +175 -0
tensorzero-0.0.1.dist-info/METADATA +56 -0
tensorzero-0.0.1.dist-info/RECORD +6 -0
tensorzero-0.0.1.dist-info/WHEEL +4 -0

tensorzero/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+from .client import AsyncTensorZero
+from .types import (
+    ChatInferenceResponse,
+    ContentBlock,
+    FeedbackResponse,
+    InferenceChunk,
+    InferenceResponse,
+    JsonInferenceOutput,
+    Text,
+    TextChunk,
+    ToolCall,
+    ToolCallChunk,
+    Usage,
+)
+__all__ = [
+    "ChatInferenceResponse",
+    "ContentBlock",
+    "FeedbackResponse",
+    "InferenceChunk",
+    "InferenceResponse",
+    "JsonInferenceOutput",
+    "AsyncTensorZero",
+    "Text",
+    "TextChunk",
+    "ToolCall",
+    "ToolCallChunk",
+    "Usage",
+]

tensorzero/client.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""
+TensorZero Client
+This module provides an asynchronous client for interacting with the TensorZero gateway.
+It includes functionality for making inference requests and sending feedback.
+The main class, AsyncTensorZero, offers methods for:
+- Initializing the client with a base URL
+- Making inference requests (with optional streaming)
+- Sending feedback on episodes or inferences
+- Managing the client session using async context managers
+Usage:
+    async with TensorZero(base_url) as client:
+        response = await client.inference(...)
+        feedback = await client.feedback(...)
+"""
+import json
+import logging
+from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Union
+from urllib.parse import urljoin
+from uuid import UUID
+import httpx
+from .types import (
+    FeedbackResponse,
+    InferenceChunk,
+    InferenceResponse,
+    parse_inference_chunk,
+    parse_inference_response,
+)
+class AsyncTensorZero:
+    def __init__(self, base_url: str):
+        """
+        Initialize the TensorZero client.
+        :param base_url: The base URL of the TensorZero gateway. Example:"http://localhost:3000"
+        """
+        self.base_url = base_url
+        self.client = httpx.AsyncClient()
+        self.logger = logging.getLogger(__name__)
+    async def inference(
+        self,
+        function_name: str,
+        input: Dict[str, Any],
+        episode_id: Optional[UUID] = None,
+        stream: Optional[bool] = None,
+        params: Optional[Dict[str, Any]] = None,
+        variant_name: Optional[str] = None,
+        dryrun: Optional[bool] = None,
+        allowed_tools: Optional[List[str]] = None,
+        additional_tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[
+            Union[Literal["auto", "required", "off"], Dict[Literal["specific"], str]]
+        ] = None,
+        parallel_tool_calls: Optional[bool] = None,
+    ) -> Union[InferenceResponse, AsyncGenerator[InferenceChunk, None]]:
+        """
+        Make a POST request to the /inference endpoint.
+        :param function_name: The name of the function to call
+        :param input: The input to the function
+                      Structure: {"system": Optional[str], "messages": List[{"role": "user" | "assistant", "content": Any}]}
+                      The input will be validated server side against the input schema of the function being called.
+        :param episode_id: The episode ID to use for the inference.
+                           If this is the first inference in an episode, leave this field blank. The TensorZero gateway will generate and return a new episode ID.
+                           Note: Only use episode IDs generated by the TensorZero gateway. Don't generate them yourself.
+        :param stream: If set, the TensorZero gateway will stream partial message deltas (e.g. generated tokens) as it receives them from model providers.
+        :param params: Override inference-time parameters for a particular variant type. Currently, we support:
+                        {"chat_completion": {"temperature": float, "max_tokens": int, "seed": int}}
+        :param variant_name: If set, pins the inference request to a particular variant.
+                             Note: You should generally not do this, and instead let the TensorZero gateway assign a
+                             particular variant. This field is primarily used for testing or debugging purposes.
+        :param dryrun: If true, the request will be executed but won't be stored to the database.
+        :param allowed_tools: If set, restricts the tools available during this inference request.
+                              The list of names should be a subset of the tools configured for the function.
+                              Tools provided at inference time in `additional_tools` (if any) are always available.
+        :param additional_tools: A list of additional tools to use for the request. Each element should look like {"name": str, "parameters": valid JSON Schema, "description": str}
+        :param tool_choice: If set, overrides the tool choice strategy for the request.
+                            It should be one of: "auto", "required", "off", or {"specific": str}. The last option pins the request to a specific tool name.
+        :param parallel_tool_calls: If true, the request will allow for multiple tool calls in a single inference request.
+        :return: If stream is false, returns an InferenceResponse.
+                 If stream is true, returns an async generator that yields InferenceChunks as they come in.
+        """
+        url = urljoin(self.base_url, "inference")
+        data = {
+            "function_name": function_name,
+            "input": input,
+        }
+        if episode_id is not None:
+            data["episode_id"] = str(episode_id)
+        if stream is not None:
+            data["stream"] = stream
+        if params is not None:
+            data["params"] = params
+        if variant_name is not None:
+            data["variant_name"] = variant_name
+        if dryrun is not None:
+            data["dryrun"] = dryrun
+        if allowed_tools is not None:
+            data["allowed_tools"] = allowed_tools
+        if additional_tools is not None:
+            data["additional_tools"] = additional_tools
+        if tool_choice is not None:
+            data["tool_choice"] = tool_choice
+        if parallel_tool_calls is not None:
+            data["parallel_tool_calls"] = parallel_tool_calls
+        response = await self.client.post(url, json=data)
+        response.raise_for_status()
+        if not stream:
+            return parse_inference_response(response.json())
+        else:
+            return self._stream_sse(response)
+    async def feedback(
+        self,
+        metric_name: str,
+        value: Any,
+        inference_id: Optional[UUID] = None,
+        episode_id: Optional[UUID] = None,
+        dryrun: Optional[bool] = None,
+    ) -> Dict[str, Any]:
+        """
+        Make a POST request to the /feedback endpoint.
+        :param metric_name: The name of the metric to provide feedback for
+        :param value: The value of the feedback. It should correspond to the metric type.
+        :param inference_id: The inference ID to assign the feedback to.
+                             Only use inference IDs that were returned by the TensorZero gateway.
+                             Note: You can assign feedback to either an episode or an inference, but not both.
+        :param episode_id: The episode ID to use for the request
+                           Only use episode IDs that were returned by the TensorZero gateway.
+                           Note: You can assign feedback to either an episode or an inference, but not both.
+        :param dryrun: If true, the feedback request will be executed but won't be stored to the database (i.e. no-op).
+        :return: {"feedback_id": str}
+        """
+        if episode_id is None and inference_id is None:
+            raise ValueError("Either episode_id or inference_id must be provided")
+        if episode_id is not None and inference_id is not None:
+            raise ValueError(
+                "Only one of episode_id or inference_id can be provided, not both"
+            )
+        data = {
+            "metric_name": metric_name,
+            "value": value,
+        }
+        if dryrun is not None:
+            data["dryrun"] = dryrun
+        if episode_id is not None:
+            data["episode_id"] = str(episode_id)
+        if inference_id is not None:
+            data["inference_id"] = str(inference_id)
+        url = urljoin(self.base_url, "feedback")
+        response = await self.client.post(url, json=data)
+        response.raise_for_status()
+        feedback_result = FeedbackResponse(**response.json())
+        return feedback_result
+    async def close(self):
+        """
+        Close the connection to the TensorZero gateway.
+        """
+        await self.client.aclose()
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
+    async def _stream_sse(
+        self, response: httpx.Response
+    ) -> AsyncGenerator[InferenceChunk, None]:
+        """
+        Parse the SSE stream from the response.
+        :param response: The httpx.Response object
+        :yield: Parsed SSE events as dictionaries
+        """
+        async for line in response.aiter_lines():
+            if line.startswith("data: "):
+                data = line[6:].strip()
+                if data == "[DONE]":
+                    break
+                try:
+                    data = json.loads(data)
+                    yield parse_inference_chunk(data)
+                except json.JSONDecodeError:
+                    self.logger.error(f"Failed to parse SSE data: {data}")

tensorzero/types.py ADDED Viewed

@@ -0,0 +1,175 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Union
+from uuid import UUID
+# Types for non-streaming inference responses
+@dataclass
+class Usage:
+    input_tokens: int
+    output_tokens: int
+@dataclass
+class Text:
+    text: str
+@dataclass
+class ToolCall:
+    name: str
+    arguments: Dict[str, Any]
+    id: str
+    parsed_name: Optional[str]
+    parsed_arguments: Optional[Dict[str, Any]]
+ContentBlock = Union[Text, ToolCall]
+@dataclass
+class JsonInferenceOutput:
+    raw: str
+    parsed: Optional[Dict[str, Any]]
+@dataclass
+class ChatInferenceResponse:
+    inference_id: UUID
+    episode_id: UUID
+    variant_name: str
+    output: List[ContentBlock]
+    usage: Usage
+@dataclass
+class JsonInferenceResponse:
+    inference_id: UUID
+    episode_id: UUID
+    variant_name: str
+    output: JsonInferenceOutput
+    usage: Usage
+InferenceResponse = Union[ChatInferenceResponse, JsonInferenceResponse]
+def parse_inference_response(data: Dict[str, Any]) -> InferenceResponse:
+    if "output" in data and isinstance(data["output"], list):
+        return ChatInferenceResponse(
+            inference_id=UUID(data["inference_id"]),
+            episode_id=UUID(data["episode_id"]),
+            variant_name=data["variant_name"],
+            output=[parse_content_block(block) for block in data["output"]],
+            usage=Usage(**data["usage"]),
+        )
+    elif "output" in data and isinstance(data["output"], dict):
+        return JsonInferenceResponse(
+            inference_id=UUID(data["inference_id"]),
+            episode_id=UUID(data["episode_id"]),
+            variant_name=data["variant_name"],
+            output=JsonInferenceOutput(**data["output"]),
+            usage=Usage(**data["usage"]),
+        )
+    else:
+        raise ValueError("Unable to determine response type")
+def parse_content_block(block: Dict[str, Any]) -> ContentBlock:
+    block_type = block["type"]
+    if block_type == "text":
+        return Text(text=block["text"])
+    elif block_type == "tool_call":
+        return ToolCall(
+            name=block["name"],
+            arguments=block["arguments"],
+            id=block["id"],
+            parsed_name=block.get("parsed_name"),
+            parsed_arguments=block.get("parsed_arguments"),
+        )
+    else:
+        raise ValueError(f"Unknown content block type: {block}")
+# Types for streaming inference responses
+@dataclass
+class TextChunk:
+    # In the possibility that multiple text messages are sent in a single streaming response,
+    # this `id` will be used to disambiguate them
+    id: str
+    text: str
+@dataclass
+class ToolCallChunk:
+    name: str
+    # This is the tool call ID that many LLM APIs use to associate tool calls with tool responses
+    id: str
+    # `arguments` will come as partial JSON
+    arguments: str
+ContentBlockChunk = Union[TextChunk, ToolCallChunk]
+@dataclass
+class ChatChunk:
+    inference_id: UUID
+    episode_id: UUID
+    variant_name: str
+    content: List[ContentBlockChunk]
+    usage: Optional[Usage]
+@dataclass
+class JsonChunk:
+    inference_id: UUID
+    episode_id: UUID
+    variant_name: str
+    raw: str
+    usage: Optional[Usage]
+InferenceChunk = Union[ChatChunk, JsonChunk]
+def parse_inference_chunk(chunk: Dict[str, Any]) -> InferenceChunk:
+    if "content" in chunk:
+        return ChatChunk(
+            inference_id=UUID(chunk["inference_id"]),
+            episode_id=UUID(chunk["episode_id"]),
+            variant_name=chunk["variant_name"],
+            content=[parse_content_block_chunk(block) for block in chunk["content"]],
+            usage=Usage(**chunk["usage"]) if "usage" in chunk else None,
+        )
+    elif "raw" in chunk:
+        return JsonChunk(
+            inference_id=UUID(chunk["inference_id"]),
+            episode_id=UUID(chunk["episode_id"]),
+            variant_name=chunk["variant_name"],
+            raw=chunk["raw"],
+            usage=Usage(**chunk["usage"]) if "usage" in chunk else None,
+        )
+    else:
+        raise ValueError(f"Unable to determine response type: {chunk}")
+def parse_content_block_chunk(block: Dict[str, Any]) -> ContentBlockChunk:
+    block_type = block["type"]
+    if block_type == "text":
+        return TextChunk(id=block["id"], text=block["text"])
+    elif block_type == "tool_call":
+        return ToolCallChunk(
+            name=block["name"], id=block["id"], arguments=block["arguments"]
+        )
+    else:
+        raise ValueError(f"Unknown content block type: {block}")
+# Types for feedback
+@dataclass
+class FeedbackResponse:
+    feedback_id: UUID

tensorzero-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,56 @@
+Metadata-Version: 2.3
+Name: tensorzero
+Version: 0.0.1
+Summary: The Python client for TensorZero
+Author-email: Viraj Mehta <viraj@tensorzero.com>, Gabriel Bianconi <gabriel@tensorzero.com>
+Requires-Python: >=3.10
+Requires-Dist: httpx>=0.27.0
+Description-Content-Type: text/markdown
+# TensorZero Python Client
+This is an async Python client for the TensorZero gateway. Check out the [docs](https://tensorzero.com/docs/) for more information. This client allows you to easily make inference requests and assign feedback to them via the TensorZero gateway.
+## Installation
+```bash
+pip install tensorzero
+```
+## Basic Usage
+### Non-Streaming Inference
+```python
+from tensorzero import AsyncTensorZero
+with AsyncTensorZero("http://localhost:3000") as client:
+    result = await client.inference(
+        function_name="basic_test",
+        input={
+            "system": {"assistant_name": "Alfred Pennyworth"},
+            "messages": [{"role": "user", "content": "Hello"}],
+        },
+    )
+episode_id = result.episode_id
+output = result.output
+print(output[0].text)  # Prints the text of the first content block returned by TensorZero
+```
+### Streaming Inference
+```python
+from tensorzero import AsyncTensorZero
+async with AsyncTensorZero() as client:
+    stream = await client.chat.completions.create(
+        function_name="basic_test",
+        input={
+            "system": {"assistant_name": "Alfred Pennyworth"},
+            "messages": [{"role": "user", "content": "Hello"}],
+        },
+        stream=True,
+    )
+    async for chunk in stream:
+        print(chunk.content[0].text)  # Prints the text in each chunk returned by TensorZero
+```

tensorzero-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+tensorzero/__init__.py,sha256=X7ombaI3m4Slt1qgOCCQqsqujxGz27v-Ux7Q0M-j9dA,527
+tensorzero/client.py,sha256=qWpgC5yPUuxmsYzNvXl3Oiv3c9VRPPjHaWeJA-njMT0,8801
+tensorzero/types.py,sha256=owmbIa1HpdpcyeC8uZJJhGwqr_LDat0rHjo4reJS9m4,4564
+tensorzero-0.0.1.dist-info/METADATA,sha256=1gCyEXjz2eUGJxZmD729NXCQytCLMUIzeNTI-1mubcU,1655
+tensorzero-0.0.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+tensorzero-0.0.1.dist-info/RECORD,,

tensorzero-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.25.0
+Root-Is-Purelib: true
+Tag: py3-none-any