PyPI - stores - Versions diffs - 0.0.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

stores 0.0.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

stores/__init__.py +9 -0
stores/constants.py +2 -0
stores/format.py +214 -0
stores/indexes/__init__.py +11 -0
stores/indexes/base_index.py +283 -0
stores/indexes/index.py +56 -0
stores/indexes/local_index.py +84 -0
stores/indexes/remote_index.py +76 -0
stores/indexes/venv_utils.py +376 -0
stores/parse.py +144 -0
stores/utils.py +8 -0
stores-0.1.1.dist-info/METADATA +85 -0
stores-0.1.1.dist-info/RECORD +15 -0
{stores-0.0.0.dist-info → stores-0.1.1.dist-info}/WHEEL +1 -2
stores-0.1.1.dist-info/licenses/LICENSE +21 -0
stores-0.0.0.dist-info/METADATA +0 -19
stores-0.0.0.dist-info/RECORD +0 -4
stores-0.0.0.dist-info/top_level.txt +0 -1

stores/indexes/remote_index.py ADDED Viewed

@@ -0,0 +1,76 @@
+import json
+import logging
+import venv
+from pathlib import Path
+import requests
+from git import Repo
+from stores.constants import VENV_NAME
+from stores.indexes.base_index import BaseIndex
+from stores.indexes.venv_utils import init_venv_tools, install_venv_deps
+logging.basicConfig()
+logger = logging.getLogger("stores.indexes.remote_index")
+logger.setLevel(logging.INFO)
+# TODO: CACHE_DIR might resolve differently
+CACHE_DIR = Path(".tools")
+INDEX_LOOKUP_URL = (
+    "https://mnryl5tkkol3yitc3w2rupqbae0ovnej.lambda-url.us-east-1.on.aws/"
+)
+def lookup_index(index_id: str, index_version: str | None = None):
+    response = requests.post(
+        INDEX_LOOKUP_URL,
+        headers={
+            "content-type": "application/json",
+        },
+        data=json.dumps(
+            {
+                "index_id": index_id,
+                "index_version": index_version,
+            }
+        ),
+    )
+    if response.ok:
+        return response.json()
+class RemoteIndex(BaseIndex):
+    def __init__(self, index_id: str, env_var: dict | None = None):
+        self.index_id = index_id
+        self.index_folder = CACHE_DIR / self.index_id
+        self.env_var = env_var or {}
+        if not self.index_folder.exists():
+            commit_like = None
+            if ":" in index_id:
+                index_id, commit_like = index_id.split(":")
+            # Lookup Stores DB
+            repo_url = None
+            try:
+                index_metadata = lookup_index(index_id, commit_like)
+                if index_metadata:
+                    repo_url = index_metadata["clone_url"]
+                    commit_like = index_metadata["commit"]
+            except Exception:
+                logger.warning(
+                    f"Could not find {index_id} in stores, assuming index references a GitHub repo..."
+                )
+                pass
+            if not repo_url:
+                # Otherwise, assume index references a GitHub repo
+                repo_url = f"https://github.com/{index_id}.git"
+            repo = Repo.clone_from(repo_url, self.index_folder)
+            if commit_like:
+                repo.git.checkout(commit_like)
+        # Create venv and install deps
+        self.venv = self.index_folder / VENV_NAME
+        if not self.venv.exists():
+            venv.create(self.venv, symlinks=True, with_pip=True, upgrade_deps=True)
+        install_venv_deps(self.index_folder)
+        # Initialize tools
+        tools = init_venv_tools(self.index_folder, self.env_var)
+        super().__init__(tools)

stores/indexes/venv_utils.py ADDED Viewed

@@ -0,0 +1,376 @@
+import hashlib
+import inspect
+import json
+import logging
+import os
+import pickle
+import socket
+import subprocess
+import sys
+import threading
+from enum import Enum
+from pathlib import Path
+from typing import Dict, Literal, Tuple, TypedDict, Union
+from makefun import create_function
+from stores.constants import TOOLS_CONFIG_FILENAME, VENV_NAME
+if sys.version_info >= (3, 11):
+    import tomllib
+else:
+    import tomli as tomllib
+logging.basicConfig()
+logger = logging.getLogger("stores.indexes.venv_utils")
+logger.setLevel(logging.INFO)
+HASH_FILE = ".deps_hash"
+SUPPORTED_DEP_CONFIGS = {
+    "pyproject.toml": f"{VENV_NAME}/bin/pip install .",
+    "setup.py": f"{VENV_NAME}/bin/pip install .",
+    "requirements.txt": f"{VENV_NAME}/bin/pip install -r requirements.txt",
+}
+def has_installed(config_path: os.PathLike):
+    """
+    Read hash file to check if dependencies have been installed
+    """
+    with open(config_path, "rb") as f:
+        config_hash = hashlib.sha256(f.read()).hexdigest()
+    hash_path = config_path.parent / HASH_FILE
+    if hash_path.exists():
+        with open(hash_path) as f:
+            return config_hash == f.read().strip()
+    else:
+        return False
+def write_hash(config_path: os.PathLike):
+    """
+    Write hash file once dependencies have been installed
+    """
+    with open(config_path, "rb") as f:
+        config_hash = hashlib.sha256(f.read()).hexdigest()
+    hash_path = config_path.parent / HASH_FILE
+    with open(hash_path, "w") as f:
+        f.write(config_hash)
+def install_venv_deps(index_folder: os.PathLike):
+    index_folder = Path(index_folder)
+    for config_file, install_cmd in SUPPORTED_DEP_CONFIGS.items():
+        config_path = index_folder / config_file
+        if config_path.exists():
+            # Check if already installed
+            if has_installed(config_path):
+                return "Already installed"
+            subprocess.check_call(
+                install_cmd.split(),
+                cwd=index_folder,
+            )
+            write_hash(config_path)
+            message = f"Installed with {index_folder}/{install_cmd}"
+            logger.info(message)
+            return message
+def init_venv_tools(index_folder: os.PathLike, env_var: dict | None = None):
+    index_folder = Path(index_folder)
+    env_var = env_var or {}
+    index_manifest = index_folder / TOOLS_CONFIG_FILENAME
+    with open(index_manifest, "rb") as file:
+        manifest = tomllib.load(file)["index"]
+    tools = []
+    for tool_id in manifest.get("tools", []):
+        tool_sig = get_tool_signature(
+            tool_id=tool_id,
+            index_folder=index_folder,
+            venv=VENV_NAME,
+        )
+        tool = parse_tool_signature(
+            signature_dict=tool_sig,
+            index_folder=index_folder,
+            venv=VENV_NAME,
+            env_var=env_var,
+        )
+        tools.append(tool)
+    return tools
+# TODO: Sanitize tool_id, args, and kwargs
+def get_tool_signature(tool_id: str, index_folder: os.PathLike, venv: str = VENV_NAME):
+    module_name = ".".join(tool_id.split(".")[:-1])
+    tool_name = tool_id.split(".")[-1]
+    runner = f"""
+import pickle, sys, traceback, inspect, enum
+from typing import Any, Dict, List, Literal, Tuple, Union, get_args, get_origin, get_type_hints
+import types as T
+def extract_type_info(typ):
+    origin = get_origin(typ)
+    args = list(get_args(typ))
+    if origin is Literal:
+        return {{"type": "Literal", "values": args}}
+    elif inspect.isclass(typ) and issubclass(typ, enum.Enum):
+        return {{
+            "type": "Enum",
+            "type_name": typ.__name__,
+            "values": {{v.name: v.value for v in typ}},
+        }}
+    elif isinstance(typ, type) and typ.__class__.__name__ == "_TypedDictMeta":
+        hints = get_type_hints(typ)
+        return {{
+            "type": "TypedDict",
+            "type_name": typ.__name__,
+            "fields": {{k: extract_type_info(v) for k, v in hints.items()}}
+        }}
+    elif origin in (list, List) or typ is list:
+        return {{
+            "type": "List",
+            "item_type": extract_type_info(args[0]) if args else {{"type": Any}}
+        }}
+    elif origin in (dict, Dict) or typ is dict:
+        return {{
+            "type": "Dict",
+            "key_type": extract_type_info(args[0]) if args else {{"type": Any}},
+            "value_type": extract_type_info(args[1]) if len(args) > 1 else {{"type": Any}}
+        }}
+    elif origin in (tuple, Tuple) or typ is tuple:
+        return {{
+            "type": "Tuple",
+            "item_types": [extract_type_info(arg) for arg in args] if args else [{{"type": Any}}]
+        }}
+    elif origin is Union or origin is T.UnionType:
+        return {{
+            "type": "Union",
+            "options": [extract_type_info(arg) for arg in args]
+        }}
+    else:
+        return {{"type": typ}}
+try:
+    from {module_name} import {tool_name}
+    sig = inspect.signature({tool_name})
+    hints = get_type_hints({tool_name})
+    params = {{}}
+    for name, param in sig.parameters.items():
+        hint = hints.get(name, param.annotation)
+        param_info = extract_type_info(hint)
+        param_info["kind"] = param.kind
+        param_info["default"] = param.default
+        params[name] = param_info
+    return_type = hints.get('return', sig.return_annotation)
+    return_info = extract_type_info(return_type)
+    pickle.dump(
+        {{
+            "ok": True,
+            "result": {{
+                "tool_id": "{tool_id}",
+                "params": params,
+                "return": return_info,
+                "is_async": inspect.iscoroutinefunction({tool_name}),
+                "doc": inspect.getdoc({tool_name}),
+            }},
+        }},
+        sys.stdout.buffer,
+    )
+except Exception as e:
+    err = traceback.format_exc()
+    pickle.dump({{"ok": False, "error": err}}, sys.stdout.buffer)
+"""
+    result = subprocess.run(
+        [f"{venv}/bin/python", "-c", runner],
+        capture_output=True,
+        cwd=index_folder,
+    )
+    try:
+        response = pickle.loads(result.stdout)
+    except ModuleNotFoundError as e:
+        raise RuntimeError(
+            f"Error loading tool {tool_id}:\nThe tool most likely has a parameter of a custom type that cannot be exported"
+        ) from e
+    if response.get("ok"):
+        return response["result"]
+    else:
+        raise RuntimeError(f"Error loading tool {tool_id}:\n{response['error']}")
+def parse_param_type(param_info: dict):
+    param_type = param_info["type"]
+    if not isinstance(param_type, str):
+        return param_type
+    if param_type == "Literal":
+        return Literal.__getitem__(tuple(param_info["values"]))
+    elif param_type == "Enum":
+        return Enum(param_info["type_name"], param_info["values"])
+    elif param_type == "TypedDict":
+        properties = {}
+        for k, v in param_info["fields"].items():
+            properties[k] = parse_param_type(v)
+        return TypedDict(param_info["type_name"], properties)
+    elif param_type == "List":
+        return list[parse_param_type(param_info["item_type"])]
+    elif param_type == "Dict":
+        return Dict[
+            parse_param_type(param_info["key_type"]),
+            parse_param_type(param_info["value_type"]),
+        ]
+    elif param_type == "Tuple":
+        return Tuple.__getitem__(
+            tuple([parse_param_type(i) for i in param_info["item_types"]])
+        )
+    elif param_type == "Union":
+        return Union.__getitem__(
+            tuple([parse_param_type(i) for i in param_info["options"]])
+        )
+    else:
+        raise TypeError(f"Invalid param type {param_type} in param info {param_info}")
+def parse_tool_signature(
+    signature_dict: dict,
+    index_folder: os.PathLike,
+    venv: str = VENV_NAME,
+    env_var: dict | None = None,
+):
+    """
+    Create a wrapper function that replicates the remote tool
+    given its signature
+    """
+    env_var = env_var or {}
+    def func_handler(*args, **kwargs):
+        return run_remote_tool(
+            tool_id=signature_dict["tool_id"],
+            index_folder=index_folder,
+            args=args,
+            kwargs=kwargs,
+            venv=venv,
+            env_var=env_var,
+        )
+    async def async_func_handler(*args, **kwargs):
+        return run_remote_tool(
+            tool_id=signature_dict["tool_id"],
+            index_folder=index_folder,
+            args=args,
+            kwargs=kwargs,
+            venv=venv,
+            env_var=env_var,
+        )
+    # Reconstruct signature from list of args
+    params = []
+    for param_name, param_info in signature_dict["params"].items():
+        params.append(
+            inspect.Parameter(
+                name=param_name,
+                kind=param_info["kind"],
+                default=param_info["default"],
+                annotation=parse_param_type(param_info),
+            )
+        )
+    # Reconstruct return type
+    return_type = parse_param_type(signature_dict["return"])
+    signature = inspect.Signature(params, return_annotation=return_type)
+    func = create_function(
+        signature,
+        async_func_handler if signature_dict.get("is_async") else func_handler,
+        qualname=signature_dict["tool_id"],
+        doc=signature_dict.get("doc"),
+    )
+    func.__name__ = signature_dict["tool_id"]
+    return func
+# TODO: Sanitize tool_id, args, and kwargs
+def run_remote_tool(
+    tool_id: str,
+    index_folder: os.PathLike,
+    args: list | None = None,
+    kwargs: dict | None = None,
+    venv: str = VENV_NAME,
+    env_var: dict | None = None,
+):
+    args = args or []
+    kwargs = kwargs or {}
+    env_var = env_var or {}
+    module_name = ".".join(tool_id.split(".")[:-1])
+    tool_name = tool_id.split(".")[-1]
+    payload = json.dumps(
+        {
+            "args": args,
+            "kwargs": kwargs,
+        }
+    ).encode("utf-8")
+    # We use sockets to pass function output
+    listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    listener.bind(("localhost", 0))
+    listener.listen(1)
+    _, port = listener.getsockname()
+    def handle_connection():
+        conn, _ = listener.accept()
+        with conn:
+            data = b""
+            while True:
+                chunk = conn.recv(4096)
+                if not chunk:
+                    break
+                data += chunk
+            listener.close()
+            return data
+    result_data = {}
+    t = threading.Thread(
+        target=lambda: result_data.setdefault("data", handle_connection())
+    )
+    t.start()
+    runner = f"""
+import asyncio, inspect, json, socket, sys, traceback
+sys.path.insert(0, "{index_folder}")
+try:
+    from {module_name} import {tool_name}
+    params = json.load(sys.stdin)
+    args = params.get("args", [])
+    kwargs = params.get("kwargs", {{}})
+    if inspect.iscoroutinefunction({tool_name}):
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        result = loop.run_until_complete({tool_name}(*args, **kwargs))
+    else:
+        result = {tool_name}(*args, **kwargs)
+    response = json.dumps({{"ok": True, "result": result}})
+except Exception as e:
+    err = traceback.format_exc()
+    response = json.dumps({{"ok": False, "error": err}})
+sock = socket.create_connection(("localhost", {port}))
+sock.sendall(response.encode("utf-8"))
+sock.close()
+"""
+    subprocess.run(
+        [f"{index_folder}/{venv}/bin/python", "-c", runner],
+        input=payload,
+        capture_output=True,
+        env=env_var,
+    )
+    t.join()
+    response = json.loads(result_data["data"].decode("utf-8"))
+    if response.get("ok"):
+        return response["result"]
+    else:
+        raise RuntimeError(f"Subprocess failed with error:\n{response['error']}")

stores/parse.py ADDED Viewed

@@ -0,0 +1,144 @@
+import json
+import logging
+import re
+from itertools import combinations
+from typing import Any
+import dirtyjson
+from dirtyjson.attributed_containers import AttributedDict, AttributedList
+from fuzzywuzzy import process
+logging.basicConfig()
+logger = logging.getLogger("stores.parse")
+logger.setLevel(logging.INFO)
+def find_json(rgx: str, text: str):
+    match = re.search(rgx, text)
+    if match is None:
+        return text
+    else:
+        return match.groupdict().get("json")
+def convert_attributed_container(
+    container: Any | AttributedDict | AttributedList | float | int,
+):
+    if isinstance(container, AttributedList):
+        return [convert_attributed_container(i) for i in container]
+    elif isinstance(container, AttributedDict):
+        dict_container = {**container}
+        for k, v in dict_container.items():
+            dict_container[k] = convert_attributed_container(v)
+        return dict_container
+    else:
+        return container
+def llm_parse_json(text: str, keys: list[str] = None, autoescape=True):
+    """Read LLM output and extract JSON data from it."""
+    keys = keys or []
+    # First check for ```json
+    code_snippet_pattern = r"```json(?P<json>(.|\s|\n)*?)```"
+    code_snippet_result = find_json(code_snippet_pattern, text)
+    # Then try to find the longer match between [.*?] and {.*?}
+    array_pattern = re.compile("(?P<json>\\[.*\\])", re.DOTALL)
+    array_result = find_json(array_pattern, text)
+    dict_pattern = re.compile("(?P<json>{.*})", re.DOTALL)
+    dict_result = find_json(dict_pattern, text)
+    if array_result and dict_result and len(dict_result) > len(array_result):
+        results = [
+            code_snippet_result,
+            dict_result,
+            array_result,
+        ]
+    else:
+        results = [
+            code_snippet_result,
+            array_result,
+            dict_result,
+        ]
+    # Try each result in order
+    result_json = None
+    for result in results:
+        if result is not None:
+            try:
+                result_json = dirtyjson.loads(result)
+                break
+            except dirtyjson.error.Error as e:
+                if autoescape and e.msg.startswith("Expecting ',' delimiter"):
+                    # Possibly due to non-escaped quotes
+                    corrected_json_str = escape_quotes(result, keys)
+                    if corrected_json_str:
+                        result_json = dirtyjson.loads(corrected_json_str)
+                        break
+            try:
+                result = (
+                    result.replace("None", "null")
+                    .replace("True", "true")
+                    .replace("False", "false")
+                )
+                result_json = dirtyjson.loads(result)
+                break
+            except dirtyjson.error.Error:
+                continue
+    if result_json:
+        result_json = fuzzy_match_keys(result_json, keys)
+        return convert_attributed_container(result_json)
+    error_message = f"Failed to parse JSON from text {text}"
+    raise ValueError(error_message)
+# Brute force escape chars
+def escape_quotes(json_str: str, keys: list[str] = None):
+    keys = keys or []
+    quote_pos = [i for i, c in enumerate(json_str) if c in "\"'"]
+    # At minimum there should be 2*len(keys) quotes, any quotes
+    # more than this is a candidate for escape
+    # In addition, as long as there is an escaped quote, we need
+    # at least two none-escaped quotes
+    # TODO: Stricter conditions
+    max_escapes = len(quote_pos) - 2 * len(keys) - 2
+    candidate_json_str = None
+    for n in range(1, max_escapes + 1):
+        candidates = list(combinations(quote_pos, n))
+        for candidate in candidates:
+            new_json_str = ""
+            for start, end in zip(
+                [0, *candidate], [*candidate, len(json_str)], strict=True
+            ):
+                new_json_str += json_str[start:end] + "\\"
+            new_json_str = new_json_str[:-1]
+            try:
+                parsed = llm_parse_json(new_json_str, keys, autoescape=False)
+                if all(key in parsed for key in keys):
+                    new_candidate = json.dumps(parsed)
+                    if candidate_json_str is None:
+                        candidate_json_str = new_candidate
+                    # Get the largest valid JSON
+                    elif len(new_candidate) > len(candidate_json_str):
+                        candidate_json_str = new_candidate
+            except Exception:
+                pass
+    return candidate_json_str
+def fuzzy_match_keys(json_dict: dict, gold_keys: list[str] = None, min_score=80):
+    if not gold_keys:
+        return json_dict
+    keys = list(json_dict.keys())
+    for key in keys:
+        closest_key, score = process.extractOne(key, gold_keys)
+        if score == 100:
+            continue
+        elif score >= min_score:
+            json_dict[closest_key] = json_dict[key]
+            del json_dict[key]
+    return json_dict

stores/utils.py ADDED Viewed

@@ -0,0 +1,8 @@
+from collections import Counter
+def check_duplicates(input_list: list):
+    counts = Counter(input_list)
+    duplicates = [i for i in counts if counts[i] > 1]
+    if duplicates:
+        raise ValueError(f"Found duplicate(s): {duplicates}")

stores-0.1.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,85 @@
+Metadata-Version: 2.4
+Name: stores
+Version: 0.1.1
+Summary: Repository of Python functions and tools for LLMs
+License-File: LICENSE
+Requires-Python: >=3.10
+Requires-Dist: dirtyjson>=1.0.8
+Requires-Dist: dotenv>=0.9.9
+Requires-Dist: fuzzywuzzy>=0.18.0
+Requires-Dist: gitpython>=3.1.44
+Requires-Dist: makefun>=1.15.6
+Requires-Dist: python-levenshtein>=0.27.1
+Requires-Dist: requests>=2.32.3
+Requires-Dist: tomli>=1.1.0; python_version < '3.11'
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.49.0; extra == 'anthropic'
+Provides-Extra: google
+Requires-Dist: google-genai>=1.7.0; extra == 'google'
+Provides-Extra: langchain
+Requires-Dist: langchain-google-genai>=2.1.0; extra == 'langchain'
+Provides-Extra: langgraph
+Requires-Dist: langchain-core>=0.3.45; extra == 'langgraph'
+Requires-Dist: langchain-google-genai>=2.1.0; extra == 'langgraph'
+Requires-Dist: langgraph>=0.3.16; extra == 'langgraph'
+Provides-Extra: litellm
+Requires-Dist: litellm>=1.63.11; extra == 'litellm'
+Provides-Extra: llamaindex
+Requires-Dist: llama-index-llms-google-genai>=0.1.4; extra == 'llamaindex'
+Requires-Dist: llama-index>=0.12.25; extra == 'llamaindex'
+Provides-Extra: openai
+Requires-Dist: openai>=1.66.5; extra == 'openai'
+Provides-Extra: openai-agent
+Requires-Dist: openai-agents>=0.0.7; extra == 'openai-agent'
+Description-Content-Type: text/markdown
+# stores
+Repository of Python functions and tools for LLMs
+## Why we built Stores
+Just as tool use is often cited as a key development in human civilization, we believe that tool use represents a major transition in AI development.
+**The aim of Stores is to make it super simple to build LLM Agents that use tools.**
+There are two main elements:
+1. A public repository of [tools](https://stores-tools.vercel.app) that anyone can contribute to
+2. This Python library that handles tool installation and formatting
+For more details, check out the [documentation](https://stores-tools.vercel.app/docs).
+## Design principles
+- **Open-source**: Each set of tools in the Stores collection is a public git repository. In the event the Stores database is no longer operational, the library and tools will still work as long as the git repositories exist.
+- **Isolation**: Tools are isolated in their own virtual environments. This makes it trivial to manage tools with conflicting dependencies and reduces unnecessary access to sensitive environment variables.
+- **Framework compatibility**: In order to pass information about tools, LLM providers often require different formats that can make it cumbersome to switch between providers. Stores makes it easy to output the required formats across providers.
+## Usage
+```sh
+pip install stores
+```
+Or if you are using `uv`:
+```sh
+uv add stores
+```
+Then load one of the available indexes and use it with your favorite LLM package.
+```python {6, 11}
+import anthropic
+import stores
+client = anthropic.Anthropic()
+index = stores.Index(["silanthro/hackernews"])
+response = client.messages.create(
+    model=model,
+    messages=messages,
+    tools=index.format_tools("anthropic"),
+)
+```

stores 0.0.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

stores 0.0.0py3-none-any.whl → 0.1.1py3-none-any.whl