vision-agent 0.1.6__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/__init__.py +0 -2
- vision_agent/agent/vision_agent.py +23 -4
- vision_agent/image_utils.py +2 -2
- {vision_agent-0.1.6.dist-info → vision_agent-0.2.2.dist-info}/METADATA +2 -5
- {vision_agent-0.1.6.dist-info → vision_agent-0.2.2.dist-info}/RECORD +7 -11
- vision_agent/data/__init__.py +0 -1
- vision_agent/data/data.py +0 -142
- vision_agent/emb/__init__.py +0 -1
- vision_agent/emb/emb.py +0 -47
- {vision_agent-0.1.6.dist-info → vision_agent-0.2.2.dist-info}/LICENSE +0 -0
- {vision_agent-0.1.6.dist-info → vision_agent-0.2.2.dist-info}/WHEEL +0 -0
vision_agent/__init__.py
CHANGED
@@ -8,7 +8,12 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
|
8
8
|
from PIL import Image
|
9
9
|
from tabulate import tabulate
|
10
10
|
|
11
|
-
from vision_agent.image_utils import
|
11
|
+
from vision_agent.image_utils import (
|
12
|
+
convert_to_b64,
|
13
|
+
overlay_bboxes,
|
14
|
+
overlay_heat_map,
|
15
|
+
overlay_masks,
|
16
|
+
)
|
12
17
|
from vision_agent.llm import LLM, OpenAILLM
|
13
18
|
from vision_agent.lmm import LMM, OpenAILMM
|
14
19
|
from vision_agent.tools import TOOLS
|
@@ -481,6 +486,17 @@ class VisionAgent(Agent):
|
|
481
486
|
if self.report_progress_callback:
|
482
487
|
self.report_progress_callback(description)
|
483
488
|
|
489
|
+
def _report_visualization_via_callback(
|
490
|
+
self, images: Sequence[Union[str, Path]]
|
491
|
+
) -> None:
|
492
|
+
"""This is intended for streaming the visualization images via the callback to the client side."""
|
493
|
+
if self.report_progress_callback:
|
494
|
+
self.report_progress_callback("<VIZ>")
|
495
|
+
if images:
|
496
|
+
for img in images:
|
497
|
+
self.report_progress_callback(f"<IMG>{convert_to_b64(img)}</IMG>")
|
498
|
+
self.report_progress_callback("</VIZ>")
|
499
|
+
|
484
500
|
def chat_with_workflow(
|
485
501
|
self,
|
486
502
|
chat: List[Dict[str, str]],
|
@@ -577,9 +593,12 @@ class VisionAgent(Agent):
|
|
577
593
|
)
|
578
594
|
|
579
595
|
if visualize_output:
|
580
|
-
|
581
|
-
|
582
|
-
|
596
|
+
viz_images: Sequence[Union[str, Path]] = all_tool_results[-1][
|
597
|
+
"visualized_output"
|
598
|
+
]
|
599
|
+
self._report_visualization_via_callback(viz_images)
|
600
|
+
for img in viz_images:
|
601
|
+
Image.open(img).show()
|
583
602
|
|
584
603
|
return final_answer, all_tool_results
|
585
604
|
|
vision_agent/image_utils.py
CHANGED
@@ -4,7 +4,7 @@ import base64
|
|
4
4
|
from importlib import resources
|
5
5
|
from io import BytesIO
|
6
6
|
from pathlib import Path
|
7
|
-
from typing import Dict, Tuple, Union
|
7
|
+
from typing import Dict, List, Tuple, Union
|
8
8
|
|
9
9
|
import numpy as np
|
10
10
|
from PIL import Image, ImageDraw, ImageFont
|
@@ -108,7 +108,7 @@ def convert_to_b64(data: Union[str, Path, np.ndarray, ImageType]) -> str:
|
|
108
108
|
data = Image.open(data)
|
109
109
|
if isinstance(data, Image.Image):
|
110
110
|
buffer = BytesIO()
|
111
|
-
data.convert("RGB").save(buffer, format="
|
111
|
+
data.convert("RGB").save(buffer, format="PNG")
|
112
112
|
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
113
113
|
else:
|
114
114
|
arr_bytes = data.tobytes()
|
@@ -1,15 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.2
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
7
|
-
Requires-Python: >=3.9
|
7
|
+
Requires-Python: >=3.9
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: faiss-cpu (>=1.0.0,<2.0.0)
|
13
12
|
Requires-Dist: moviepy (>=1.0.0,<2.0.0)
|
14
13
|
Requires-Dist: numpy (>=1.21.0,<2.0.0)
|
15
14
|
Requires-Dist: openai (>=1.0.0,<2.0.0)
|
@@ -18,9 +17,7 @@ Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
|
18
17
|
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
19
18
|
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
20
19
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
21
|
-
Requires-Dist: sentence-transformers (>=2.0.0,<3.0.0)
|
22
20
|
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
23
|
-
Requires-Dist: torch (>=2.1.0,<2.2.0)
|
24
21
|
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
25
22
|
Requires-Dist: typing_extensions (>=4.0.0,<5.0.0)
|
26
23
|
Project-URL: Homepage, https://landing.ai
|
@@ -1,19 +1,15 @@
|
|
1
|
-
vision_agent/__init__.py,sha256=
|
1
|
+
vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
|
2
2
|
vision_agent/agent/__init__.py,sha256=B4JVrbY4IRVCJfjmrgvcp7h1mTUEk8MZvL0Zmej4Ka0,127
|
3
3
|
vision_agent/agent/agent.py,sha256=X7kON-g9ePUKumCDaYfQNBX_MEFE-ax5PnRp7-Cc5Wo,529
|
4
4
|
vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMVg,11511
|
5
5
|
vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
|
6
6
|
vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
|
7
7
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
8
|
-
vision_agent/agent/vision_agent.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent.py,sha256=2VUMRVI6KAnmaUK-34wrgyfSQ2DAUm4g4QQcpqa2zao,24235
|
9
9
|
vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
|
10
|
-
vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
|
11
|
-
vision_agent/data/data.py,sha256=Z2l76OrT0GgyuN52OeJqDitUcP0q1rhfdXd1of3GsVo,5128
|
12
|
-
vision_agent/emb/__init__.py,sha256=YmCkGrJBtXb6X6Z3lnKiFoQYKXMgHMJp8JJyMLVvqcI,75
|
13
|
-
vision_agent/emb/emb.py,sha256=la9lhEzk7jqUCjYYQ5oRgVNSnC9_EJBJIpE_B9c6PJo,1375
|
14
10
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
11
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
16
|
-
vision_agent/image_utils.py,sha256=
|
12
|
+
vision_agent/image_utils.py,sha256=YvP5KE9NrWdgJKuHW2NR1glzfObkxtcXBknpmj3Gsbs,7554
|
17
13
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
18
14
|
vision_agent/llm/llm.py,sha256=gwDQ9-p9wEn24xi1019e5jzTGQg4xWDSqBCsqIqGcU4,5168
|
19
15
|
vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
|
@@ -23,7 +19,7 @@ vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E
|
|
23
19
|
vision_agent/tools/tools.py,sha256=gCjHs5vJuGNBFsnJWFT7PX3wTyfHgtrgX1Eq9vqknN0,34979
|
24
20
|
vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
25
21
|
vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
|
26
|
-
vision_agent-0.
|
27
|
-
vision_agent-0.
|
28
|
-
vision_agent-0.
|
29
|
-
vision_agent-0.
|
22
|
+
vision_agent-0.2.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
23
|
+
vision_agent-0.2.2.dist-info/METADATA,sha256=dOZ9KWmhuVb5wvschxYBis8x79HwgOD3MmTKqyupggg,6434
|
24
|
+
vision_agent-0.2.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
25
|
+
vision_agent-0.2.2.dist-info/RECORD,,
|
vision_agent/data/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
from .data import DataStore, build_data_store
|
vision_agent/data/data.py
DELETED
@@ -1,142 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import uuid
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import Callable, Dict, List, Optional, Union, cast
|
6
|
-
|
7
|
-
import faiss
|
8
|
-
import numpy as np
|
9
|
-
import numpy.typing as npt
|
10
|
-
import pandas as pd
|
11
|
-
from faiss import read_index, write_index
|
12
|
-
from tqdm import tqdm
|
13
|
-
from typing_extensions import Self
|
14
|
-
|
15
|
-
from vision_agent.emb import Embedder
|
16
|
-
from vision_agent.lmm import LMM
|
17
|
-
|
18
|
-
tqdm.pandas()
|
19
|
-
|
20
|
-
|
21
|
-
class DataStore:
|
22
|
-
r"""A class to store and manage image data along with its generated metadata from an LMM."""
|
23
|
-
|
24
|
-
def __init__(self, df: pd.DataFrame):
|
25
|
-
r"""Initializes the DataStore with a DataFrame containing image paths and image
|
26
|
-
IDs. If the image IDs are not present, they are generated using UUID4. The
|
27
|
-
DataFrame must contain an 'image_paths' column.
|
28
|
-
|
29
|
-
Args:
|
30
|
-
df: The DataFrame containing "image_paths" and "image_id" columns.
|
31
|
-
"""
|
32
|
-
self.df = df
|
33
|
-
self.lmm: Optional[LMM] = None
|
34
|
-
self.emb: Optional[Embedder] = None
|
35
|
-
self.index: Optional[faiss.IndexFlatIP] = None # type: ignore
|
36
|
-
if "image_paths" not in self.df.columns:
|
37
|
-
raise ValueError("image_paths column must be present in DataFrame")
|
38
|
-
if "image_id" not in self.df.columns:
|
39
|
-
self.df["image_id"] = [str(uuid.uuid4()) for _ in range(len(df))]
|
40
|
-
|
41
|
-
def add_embedder(self, emb: Embedder) -> Self:
|
42
|
-
self.emb = emb
|
43
|
-
return self
|
44
|
-
|
45
|
-
def add_lmm(self, lmm: LMM) -> Self:
|
46
|
-
self.lmm = lmm
|
47
|
-
return self
|
48
|
-
|
49
|
-
def add_column(
|
50
|
-
self, name: str, prompt: str, func: Optional[Callable[[str], str]] = None
|
51
|
-
) -> Self:
|
52
|
-
r"""Adds a new column to the DataFrame containing the generated metadata from
|
53
|
-
the LMM.
|
54
|
-
|
55
|
-
Args:
|
56
|
-
name: The name of the column to be added.
|
57
|
-
prompt: The prompt to be used to generate the metadata.
|
58
|
-
func: A Python function to be applied on the output of `lmm.generate`.
|
59
|
-
Defaults to None.
|
60
|
-
"""
|
61
|
-
if self.lmm is None:
|
62
|
-
raise ValueError("LMM not set yet")
|
63
|
-
|
64
|
-
self.df[name] = self.df["image_paths"].progress_apply( # type: ignore
|
65
|
-
lambda x: (
|
66
|
-
func(self.lmm.generate(prompt, images=[x]))
|
67
|
-
if func
|
68
|
-
else self.lmm.generate(prompt, images=[x])
|
69
|
-
)
|
70
|
-
)
|
71
|
-
return self
|
72
|
-
|
73
|
-
def build_index(self, target_col: str) -> Self:
|
74
|
-
r"""This will generate embeddings for the `target_col` and build a searchable
|
75
|
-
index over them, so next time you run search it will search over this index.
|
76
|
-
|
77
|
-
Args:
|
78
|
-
target_col: The column name containing the data to be indexed."""
|
79
|
-
if self.emb is None:
|
80
|
-
raise ValueError("Embedder not set yet")
|
81
|
-
|
82
|
-
embeddings: pd.Series = self.df[target_col].progress_apply(lambda x: self.emb.embed(x)) # type: ignore
|
83
|
-
embeddings_np = np.array(embeddings.tolist()).astype(np.float32)
|
84
|
-
self.index = faiss.IndexFlatIP(embeddings_np.shape[1])
|
85
|
-
self.index.add(embeddings_np)
|
86
|
-
return self
|
87
|
-
|
88
|
-
def get_embeddings(self) -> npt.NDArray[np.float32]:
|
89
|
-
if self.index is None:
|
90
|
-
raise ValueError("Index not built yet")
|
91
|
-
|
92
|
-
ntotal = self.index.ntotal
|
93
|
-
d: int = self.index.d
|
94
|
-
return cast(
|
95
|
-
npt.NDArray[np.float32],
|
96
|
-
faiss.rev_swig_ptr(self.index.get_xb(), ntotal * d).reshape(ntotal, d),
|
97
|
-
)
|
98
|
-
|
99
|
-
def search(self, query: str, top_k: int = 10) -> List[Dict]:
|
100
|
-
r"""Searches the index for the most similar images to the query and returns
|
101
|
-
the top_k results.
|
102
|
-
|
103
|
-
Args:
|
104
|
-
query: The query to search for.
|
105
|
-
top_k: The number of results to return. Defaults to 10."""
|
106
|
-
if self.index is None:
|
107
|
-
raise ValueError("Index not built yet")
|
108
|
-
if self.emb is None:
|
109
|
-
raise ValueError("Embedder not set yet")
|
110
|
-
|
111
|
-
query_embedding: npt.NDArray[np.float32] = self.emb.embed(query)
|
112
|
-
_, idx = self.index.search(query_embedding.reshape(1, -1), top_k)
|
113
|
-
return cast(List[Dict], self.df.iloc[idx[0]].to_dict(orient="records"))
|
114
|
-
|
115
|
-
def save(self, path: Union[str, Path]) -> None:
|
116
|
-
path = Path(path)
|
117
|
-
path.mkdir(parents=True)
|
118
|
-
self.df.to_csv(path / "data.csv")
|
119
|
-
if self.index is not None:
|
120
|
-
write_index(self.index, str(path / "data.index"))
|
121
|
-
|
122
|
-
@classmethod
|
123
|
-
def load(cls, path: Union[str, Path]) -> DataStore:
|
124
|
-
path = Path(path)
|
125
|
-
df = pd.read_csv(path / "data.csv", index_col=0)
|
126
|
-
ds = DataStore(df)
|
127
|
-
if Path(path / "data.index").exists():
|
128
|
-
ds.index = read_index(str(path / "data.index"))
|
129
|
-
return ds
|
130
|
-
|
131
|
-
|
132
|
-
def build_data_store(data: Union[str, Path, list[Union[str, Path]]]) -> DataStore:
|
133
|
-
if isinstance(data, Path) or isinstance(data, str):
|
134
|
-
data = Path(data)
|
135
|
-
data_files = list(Path(data).glob("*"))
|
136
|
-
elif isinstance(data, list):
|
137
|
-
data_files = [Path(d) for d in data]
|
138
|
-
|
139
|
-
df = pd.DataFrame()
|
140
|
-
df["image_paths"] = data_files
|
141
|
-
df["image_id"] = [uuid.uuid4() for _ in range(len(data_files))]
|
142
|
-
return DataStore(df)
|
vision_agent/emb/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
from .emb import Embedder, OpenAIEmb, SentenceTransformerEmb, get_embedder
|
vision_agent/emb/emb.py
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import cast
|
3
|
-
|
4
|
-
import numpy as np
|
5
|
-
import numpy.typing as npt
|
6
|
-
|
7
|
-
|
8
|
-
class Embedder(ABC):
|
9
|
-
@abstractmethod
|
10
|
-
def embed(self, text: str) -> npt.NDArray[np.float32]:
|
11
|
-
pass
|
12
|
-
|
13
|
-
|
14
|
-
class SentenceTransformerEmb(Embedder):
|
15
|
-
def __init__(self, model_name: str = "BAAI/bge-small-en-v1.5"):
|
16
|
-
from sentence_transformers import SentenceTransformer
|
17
|
-
|
18
|
-
self.model = SentenceTransformer(model_name)
|
19
|
-
|
20
|
-
def embed(self, text: str) -> npt.NDArray[np.float32]:
|
21
|
-
return cast(
|
22
|
-
npt.NDArray[np.float32],
|
23
|
-
self.model.encode([text]).flatten().astype(np.float32),
|
24
|
-
)
|
25
|
-
|
26
|
-
|
27
|
-
class OpenAIEmb(Embedder):
|
28
|
-
def __init__(self, model_name: str = "text-embedding-3-small"):
|
29
|
-
from openai import OpenAI
|
30
|
-
|
31
|
-
self.client = OpenAI()
|
32
|
-
self.model_name = model_name
|
33
|
-
|
34
|
-
def embed(self, text: str) -> npt.NDArray[np.float32]:
|
35
|
-
response = self.client.embeddings.create(input=text, model=self.model_name)
|
36
|
-
return np.array(response.data[0].embedding).astype(np.float32)
|
37
|
-
|
38
|
-
|
39
|
-
def get_embedder(name: str) -> Embedder:
|
40
|
-
if name == "sentence-transformer":
|
41
|
-
return SentenceTransformerEmb()
|
42
|
-
elif name == "openai":
|
43
|
-
return OpenAIEmb()
|
44
|
-
else:
|
45
|
-
raise ValueError(
|
46
|
-
f"Unknown embedder name: {name}, currently support sentence-transformer, openai."
|
47
|
-
)
|
File without changes
|
File without changes
|