vision-agent 0.0.9__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.0.9 → vision_agent-0.0.11}/PKG-INFO +3 -3
- {vision_agent-0.0.9 → vision_agent-0.0.11}/README.md +2 -2
- {vision_agent-0.0.9 → vision_agent-0.0.11}/pyproject.toml +1 -1
- {vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/data/data.py +10 -3
- {vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/lmm/lmm.py +21 -5
- vision_agent-0.0.9/vision_agent/config.py +0 -2
- {vision_agent-0.0.9 → vision_agent-0.0.11}/LICENSE +0 -0
- {vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/__init__.py +0 -0
- {vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/data/__init__.py +0 -0
- {vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/emb/__init__.py +0 -0
- {vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/emb/emb.py +0 -0
- {vision_agent-0.0.9 → vision_agent-0.0.11}/vision_agent/lmm/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.11
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -33,7 +33,7 @@ To get started you can create an LMM and start generating text from images. The
|
|
33
33
|
```python
|
34
34
|
import vision_agent as va
|
35
35
|
|
36
|
-
model = va.lmm.
|
36
|
+
model = va.lmm.get_lmm("llava")
|
37
37
|
model.generate("Describe this image", "image.png")
|
38
38
|
>>> "A yellow house with a green lawn."
|
39
39
|
```
|
@@ -49,7 +49,7 @@ import pandas as pd
|
|
49
49
|
|
50
50
|
df = pd.DataFrame({"image_paths": ["image1.png", "image2.png", "image3.png"]})
|
51
51
|
ds = va.data.DataStore(df)
|
52
|
-
ds = ds.add_lmm(va.lmm.
|
52
|
+
ds = ds.add_lmm(va.lmm.get_lmm("llava"))
|
53
53
|
ds = ds.add_embedder(va.emb.get_embedder("sentence-transformer"))
|
54
54
|
|
55
55
|
ds = ds.add_column("descriptions", "Describe this image.")
|
@@ -8,7 +8,7 @@ To get started you can create an LMM and start generating text from images. The
|
|
8
8
|
```python
|
9
9
|
import vision_agent as va
|
10
10
|
|
11
|
-
model = va.lmm.
|
11
|
+
model = va.lmm.get_lmm("llava")
|
12
12
|
model.generate("Describe this image", "image.png")
|
13
13
|
>>> "A yellow house with a green lawn."
|
14
14
|
```
|
@@ -24,7 +24,7 @@ import pandas as pd
|
|
24
24
|
|
25
25
|
df = pd.DataFrame({"image_paths": ["image1.png", "image2.png", "image3.png"]})
|
26
26
|
ds = va.data.DataStore(df)
|
27
|
-
ds = ds.add_lmm(va.lmm.
|
27
|
+
ds = ds.add_lmm(va.lmm.get_lmm("llava"))
|
28
28
|
ds = ds.add_embedder(va.emb.get_embedder("sentence-transformer"))
|
29
29
|
|
30
30
|
ds = ds.add_column("descriptions", "Describe this image.")
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import uuid
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Dict, List, Optional, Union, cast
|
5
|
+
from typing import Dict, List, Optional, Union, cast, Callable
|
6
6
|
|
7
7
|
import faiss
|
8
8
|
import numpy as np
|
@@ -44,18 +44,25 @@ class DataStore:
|
|
44
44
|
self.lmm = lmm
|
45
45
|
return self
|
46
46
|
|
47
|
-
def add_column(
|
47
|
+
def add_column(
|
48
|
+
self, name: str, prompt: str, func: Optional[Callable[[str], str]] = None
|
49
|
+
) -> Self:
|
48
50
|
r"""Adds a new column to the DataFrame containing the generated metadata from the LMM.
|
49
51
|
|
50
52
|
Args:
|
51
53
|
name (str): The name of the column to be added.
|
52
54
|
prompt (str): The prompt to be used to generate the metadata.
|
55
|
+
func (Optional[Callable[[Any], Any]]): A Python function to be applied on the output of `lmm.generate`. Defaults to None.
|
53
56
|
"""
|
54
57
|
if self.lmm is None:
|
55
58
|
raise ValueError("LMM not set yet")
|
56
59
|
|
57
60
|
self.df[name] = self.df["image_paths"].progress_apply( # type: ignore
|
58
|
-
lambda x:
|
61
|
+
lambda x: (
|
62
|
+
func(self.lmm.generate(prompt, image=x))
|
63
|
+
if func
|
64
|
+
else self.lmm.generate(prompt, image=x)
|
65
|
+
)
|
59
66
|
)
|
60
67
|
return self
|
61
68
|
|
@@ -1,11 +1,16 @@
|
|
1
1
|
import base64
|
2
|
+
import logging
|
2
3
|
from abc import ABC, abstractmethod
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Any, Dict, List, Optional, Union, cast
|
5
6
|
|
6
7
|
import requests
|
7
8
|
|
8
|
-
|
9
|
+
logging.basicConfig(level=logging.INFO)
|
10
|
+
|
11
|
+
_LOGGER = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
_LLAVA_ENDPOINT = "https://cpvlqoxw6vhpdro27uhkvceady0kvvqk.lambda-url.us-east-2.on.aws"
|
9
14
|
|
10
15
|
|
11
16
|
def encode_image(image: Union[str, Path]) -> str:
|
@@ -26,16 +31,27 @@ class LLaVALMM(LMM):
|
|
26
31
|
def __init__(self, name: str):
|
27
32
|
self.name = name
|
28
33
|
|
29
|
-
def generate(
|
34
|
+
def generate(
|
35
|
+
self,
|
36
|
+
prompt: str,
|
37
|
+
image: Optional[Union[str, Path]] = None,
|
38
|
+
temperature: float = 0.1,
|
39
|
+
max_new_tokens: int = 1500,
|
40
|
+
) -> str:
|
30
41
|
data = {"prompt": prompt}
|
31
42
|
if image:
|
32
43
|
data["image"] = encode_image(image)
|
44
|
+
data["temperature"] = temperature # type: ignore
|
45
|
+
data["max_new_tokens"] = max_new_tokens # type: ignore
|
33
46
|
res = requests.post(
|
34
|
-
|
35
|
-
headers={"
|
47
|
+
_LLAVA_ENDPOINT,
|
48
|
+
headers={"Content-Type": "application/json"},
|
36
49
|
json=data,
|
37
50
|
)
|
38
|
-
|
51
|
+
resp_json: Dict[str, Any] = res.json()
|
52
|
+
if resp_json["statusCode"] != 200:
|
53
|
+
_LOGGER.error(f"Request failed: {resp_json['data']}")
|
54
|
+
return cast(str, resp_json["data"])
|
39
55
|
|
40
56
|
|
41
57
|
class OpenAILMM(LMM):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|