PyPI - vision-agent - Versions diffs - 0.0.9__tar.gz → 0.0.11__tar.gz - Mend

vision-agent 0.0.9tar.gz → 0.0.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{vision_agent-0.0.9 → vision_agent-0.0.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.0.9
+Version: 0.0.11
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -33,7 +33,7 @@ To get started you can create an LMM and start generating text from images. The
 ```python
 import vision_agent as va
-model = va.lmm.get_model("llava")
+model = va.lmm.get_lmm("llava")
 model.generate("Describe this image", "image.png")
 >>> "A yellow house with a green lawn."
 ```
@@ -49,7 +49,7 @@ import pandas as pd
 df = pd.DataFrame({"image_paths": ["image1.png", "image2.png", "image3.png"]})
 ds = va.data.DataStore(df)
-ds = ds.add_lmm(va.lmm.get_model("llava"))
+ds = ds.add_lmm(va.lmm.get_lmm("llava"))
 ds = ds.add_embedder(va.emb.get_embedder("sentence-transformer"))
 ds = ds.add_column("descriptions", "Describe this image.")

{vision_agent-0.0.9 → vision_agent-0.0.11}/README.md RENAMED Viewed

@@ -8,7 +8,7 @@ To get started you can create an LMM and start generating text from images. The
 ```python
 import vision_agent as va
-model = va.lmm.get_model("llava")
+model = va.lmm.get_lmm("llava")
 model.generate("Describe this image", "image.png")
 >>> "A yellow house with a green lawn."
 ```
@@ -24,7 +24,7 @@ import pandas as pd
 df = pd.DataFrame({"image_paths": ["image1.png", "image2.png", "image3.png"]})
 ds = va.data.DataStore(df)
-ds = ds.add_lmm(va.lmm.get_model("llava"))
+ds = ds.add_lmm(va.lmm.get_lmm("llava"))
 ds = ds.add_embedder(va.emb.get_embedder("sentence-transformer"))
 ds = ds.add_column("descriptions", "Describe this image.")

{vision_agent-0.0.9 → vision_agent-0.0.11}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.0.9"
+version = "0.0.11"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/data/data.py RENAMED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import uuid
 from pathlib import Path
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, List, Optional, Union, cast, Callable
 import faiss
 import numpy as np
@@ -44,18 +44,25 @@ class DataStore:
         self.lmm = lmm
         return self
-    def add_column(self, name: str, prompt: str) -> Self:
+    def add_column(
+        self, name: str, prompt: str, func: Optional[Callable[[str], str]] = None
+    ) -> Self:
         r"""Adds a new column to the DataFrame containing the generated metadata from the LMM.
         Args:
             name (str): The name of the column to be added.
             prompt (str): The prompt to be used to generate the metadata.
+            func (Optional[Callable[[Any], Any]]): A Python function to be applied on the output of `lmm.generate`. Defaults to None.
         """
         if self.lmm is None:
             raise ValueError("LMM not set yet")
         self.df[name] = self.df["image_paths"].progress_apply(  # type: ignore
-            lambda x: self.lmm.generate(prompt, image=x)
+            lambda x: (
+                func(self.lmm.generate(prompt, image=x))
+                if func
+                else self.lmm.generate(prompt, image=x)
+            )
         )
         return self

{vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/lmm/lmm.py RENAMED Viewed

@@ -1,11 +1,16 @@
 import base64
+import logging
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union, cast
 import requests
-from vision_agent.config import BASETEN_API_KEY, BASETEN_URL
+logging.basicConfig(level=logging.INFO)
+_LOGGER = logging.getLogger(__name__)
+_LLAVA_ENDPOINT = "https://cpvlqoxw6vhpdro27uhkvceady0kvvqk.lambda-url.us-east-2.on.aws"
 def encode_image(image: Union[str, Path]) -> str:
@@ -26,16 +31,27 @@ class LLaVALMM(LMM):
     def __init__(self, name: str):
         self.name = name
-    def generate(self, prompt: str, image: Optional[Union[str, Path]] = None) -> str:
+    def generate(
+        self,
+        prompt: str,
+        image: Optional[Union[str, Path]] = None,
+        temperature: float = 0.1,
+        max_new_tokens: int = 1500,
+    ) -> str:
         data = {"prompt": prompt}
         if image:
             data["image"] = encode_image(image)
+        data["temperature"] = temperature  # type: ignore
+        data["max_new_tokens"] = max_new_tokens  # type: ignore
         res = requests.post(
-            BASETEN_URL,
-            headers={"Authorization": f"Api-Key {BASETEN_API_KEY}"},
+            _LLAVA_ENDPOINT,
+            headers={"Content-Type": "application/json"},
             json=data,
         )
-        return res.text
+        resp_json: Dict[str, Any] = res.json()
+        if resp_json["statusCode"] != 200:
+            _LOGGER.error(f"Request failed: {resp_json['data']}")
+        return cast(str, resp_json["data"])
 class OpenAILMM(LMM):

vision_agent-0.0.9/vision_agent/config.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- BASETEN_API_KEY = "PRxjuebe.VQJQ7rCvswimP5y8GeSmZA03I4zw6dgB"
2	- BASETEN_URL = "https://model-232pg41q.api.baseten.co/production/predict"

{vision_agent-0.0.9 → vision_agent-0.0.11}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/__init__.py RENAMED Viewed

File without changes

{vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/data/__init__.py RENAMED Viewed

File without changes

{vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/emb/__init__.py RENAMED Viewed

File without changes

{vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/emb/emb.py RENAMED Viewed

File without changes

{vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/lmm/__init__.py RENAMED Viewed

File without changes

vision-agent 0.0.9__tar.gz → 0.0.11__tar.gz

vision-agent 0.0.9tar.gz → 0.0.11tar.gz