vision-agent 0.0.9__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -33,7 +33,7 @@ To get started you can create an LMM and start generating text from images. The
33
33
  ```python
34
34
  import vision_agent as va
35
35
 
36
- model = va.lmm.get_model("llava")
36
+ model = va.lmm.get_lmm("llava")
37
37
  model.generate("Describe this image", "image.png")
38
38
  >>> "A yellow house with a green lawn."
39
39
  ```
@@ -49,7 +49,7 @@ import pandas as pd
49
49
 
50
50
  df = pd.DataFrame({"image_paths": ["image1.png", "image2.png", "image3.png"]})
51
51
  ds = va.data.DataStore(df)
52
- ds = ds.add_lmm(va.lmm.get_model("llava"))
52
+ ds = ds.add_lmm(va.lmm.get_lmm("llava"))
53
53
  ds = ds.add_embedder(va.emb.get_embedder("sentence-transformer"))
54
54
 
55
55
  ds = ds.add_column("descriptions", "Describe this image.")
@@ -8,7 +8,7 @@ To get started you can create an LMM and start generating text from images. The
8
8
  ```python
9
9
  import vision_agent as va
10
10
 
11
- model = va.lmm.get_model("llava")
11
+ model = va.lmm.get_lmm("llava")
12
12
  model.generate("Describe this image", "image.png")
13
13
  >>> "A yellow house with a green lawn."
14
14
  ```
@@ -24,7 +24,7 @@ import pandas as pd
24
24
 
25
25
  df = pd.DataFrame({"image_paths": ["image1.png", "image2.png", "image3.png"]})
26
26
  ds = va.data.DataStore(df)
27
- ds = ds.add_lmm(va.lmm.get_model("llava"))
27
+ ds = ds.add_lmm(va.lmm.get_lmm("llava"))
28
28
  ds = ds.add_embedder(va.emb.get_embedder("sentence-transformer"))
29
29
 
30
30
  ds = ds.add_column("descriptions", "Describe this image.")
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.0.9"
7
+ version = "0.0.11"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import uuid
4
4
  from pathlib import Path
5
- from typing import Dict, List, Optional, Union, cast
5
+ from typing import Dict, List, Optional, Union, cast, Callable
6
6
 
7
7
  import faiss
8
8
  import numpy as np
@@ -44,18 +44,25 @@ class DataStore:
44
44
  self.lmm = lmm
45
45
  return self
46
46
 
47
- def add_column(self, name: str, prompt: str) -> Self:
47
+ def add_column(
48
+ self, name: str, prompt: str, func: Optional[Callable[[str], str]] = None
49
+ ) -> Self:
48
50
  r"""Adds a new column to the DataFrame containing the generated metadata from the LMM.
49
51
 
50
52
  Args:
51
53
  name (str): The name of the column to be added.
52
54
  prompt (str): The prompt to be used to generate the metadata.
55
+ func (Optional[Callable[[Any], Any]]): A Python function to be applied on the output of `lmm.generate`. Defaults to None.
53
56
  """
54
57
  if self.lmm is None:
55
58
  raise ValueError("LMM not set yet")
56
59
 
57
60
  self.df[name] = self.df["image_paths"].progress_apply( # type: ignore
58
- lambda x: self.lmm.generate(prompt, image=x)
61
+ lambda x: (
62
+ func(self.lmm.generate(prompt, image=x))
63
+ if func
64
+ else self.lmm.generate(prompt, image=x)
65
+ )
59
66
  )
60
67
  return self
61
68
 
@@ -1,11 +1,16 @@
1
1
  import base64
2
+ import logging
2
3
  from abc import ABC, abstractmethod
3
4
  from pathlib import Path
4
5
  from typing import Any, Dict, List, Optional, Union, cast
5
6
 
6
7
  import requests
7
8
 
8
- from vision_agent.config import BASETEN_API_KEY, BASETEN_URL
9
+ logging.basicConfig(level=logging.INFO)
10
+
11
+ _LOGGER = logging.getLogger(__name__)
12
+
13
+ _LLAVA_ENDPOINT = "https://cpvlqoxw6vhpdro27uhkvceady0kvvqk.lambda-url.us-east-2.on.aws"
9
14
 
10
15
 
11
16
  def encode_image(image: Union[str, Path]) -> str:
@@ -26,16 +31,27 @@ class LLaVALMM(LMM):
26
31
  def __init__(self, name: str):
27
32
  self.name = name
28
33
 
29
- def generate(self, prompt: str, image: Optional[Union[str, Path]] = None) -> str:
34
+ def generate(
35
+ self,
36
+ prompt: str,
37
+ image: Optional[Union[str, Path]] = None,
38
+ temperature: float = 0.1,
39
+ max_new_tokens: int = 1500,
40
+ ) -> str:
30
41
  data = {"prompt": prompt}
31
42
  if image:
32
43
  data["image"] = encode_image(image)
44
+ data["temperature"] = temperature # type: ignore
45
+ data["max_new_tokens"] = max_new_tokens # type: ignore
33
46
  res = requests.post(
34
- BASETEN_URL,
35
- headers={"Authorization": f"Api-Key {BASETEN_API_KEY}"},
47
+ _LLAVA_ENDPOINT,
48
+ headers={"Content-Type": "application/json"},
36
49
  json=data,
37
50
  )
38
- return res.text
51
+ resp_json: Dict[str, Any] = res.json()
52
+ if resp_json["statusCode"] != 200:
53
+ _LOGGER.error(f"Request failed: {resp_json['data']}")
54
+ return cast(str, resp_json["data"])
39
55
 
40
56
 
41
57
  class OpenAILMM(LMM):
@@ -1,2 +0,0 @@
1
- BASETEN_API_KEY = "PRxjuebe.VQJQ7rCvswimP5y8GeSmZA03I4zw6dgB"
2
- BASETEN_URL = "https://model-232pg41q.api.baseten.co/production/predict"
File without changes