vision-agent 0.2.216__tar.gz → 0.2.218__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.216 → vision_agent-0.2.218}/PKG-INFO +2 -1
- {vision_agent-0.2.216 → vision_agent-0.2.218}/pyproject.toml +2 -1
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/tools.py +71 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/execute.py +40 -17
- {vision_agent-0.2.216 → vision_agent-0.2.218}/LICENSE +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/README.md +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/.sim_tools/df.csv +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/.sim_tools/embs.npy +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/README.md +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/types.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_v2.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/planner_tools.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.218
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
|
|
23
23
|
Requires-Dist: numpy (>=1.21.0,<2.0.0)
|
24
24
|
Requires-Dist: openai (>=1.0.0,<2.0.0)
|
25
25
|
Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
|
26
|
+
Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
|
26
27
|
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
27
28
|
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
28
29
|
Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "vision-agent"
|
7
|
-
version = "0.2.
|
7
|
+
version = "0.2.218"
|
8
8
|
description = "Toolset for Vision Agent"
|
9
9
|
authors = ["Landing AI <dev@landing.ai>"]
|
10
10
|
readme = "README.md"
|
@@ -47,6 +47,7 @@ av = "^11.0.0"
|
|
47
47
|
libcst = "^1.5.0"
|
48
48
|
matplotlib = "^3.9.2"
|
49
49
|
scikit-learn = "^1.5.2"
|
50
|
+
opentelemetry-api = "^1.29.0"
|
50
51
|
|
51
52
|
[tool.poetry.group.dev.dependencies]
|
52
53
|
autoflake = "1.*"
|
@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
|
|
2174
2174
|
return data
|
2175
2175
|
|
2176
2176
|
|
2177
|
+
def document_qa(
|
2178
|
+
prompt: str,
|
2179
|
+
image: np.ndarray,
|
2180
|
+
) -> str:
|
2181
|
+
"""'document_qa' is a tool that can answer any questions about arbitrary
|
2182
|
+
images of documents or presentations. It answers by analyzing the contextual document data
|
2183
|
+
and then using a model to answer specific questions. It returns text as an answer to the question.
|
2184
|
+
|
2185
|
+
Parameters:
|
2186
|
+
prompt (str): The question to be answered about the document image
|
2187
|
+
image (np.ndarray): The document image to analyze
|
2188
|
+
|
2189
|
+
Returns:
|
2190
|
+
str: The answer to the question based on the document's context.
|
2191
|
+
|
2192
|
+
Example
|
2193
|
+
-------
|
2194
|
+
>>> document_qa(image, question)
|
2195
|
+
'The answer to the question ...'
|
2196
|
+
"""
|
2197
|
+
|
2198
|
+
image_file = numpy_to_bytes(image)
|
2199
|
+
|
2200
|
+
files = [("image", image_file)]
|
2201
|
+
|
2202
|
+
payload = {
|
2203
|
+
"model": "document-analysis",
|
2204
|
+
}
|
2205
|
+
|
2206
|
+
data: dict[str, Any] = send_inference_request(
|
2207
|
+
payload=payload,
|
2208
|
+
endpoint_name="document-analysis",
|
2209
|
+
files=files,
|
2210
|
+
v2=True,
|
2211
|
+
metadata_payload={"function_name": "document_qa"},
|
2212
|
+
)
|
2213
|
+
|
2214
|
+
def normalize(data: Any) -> Dict[str, Any]:
|
2215
|
+
if isinstance(data, Dict):
|
2216
|
+
if "bbox" in data:
|
2217
|
+
data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
|
2218
|
+
for key in data:
|
2219
|
+
data[key] = normalize(data[key])
|
2220
|
+
elif isinstance(data, List):
|
2221
|
+
for i in range(len(data)):
|
2222
|
+
data[i] = normalize(data[i])
|
2223
|
+
return data # type: ignore
|
2224
|
+
|
2225
|
+
data = normalize(data)
|
2226
|
+
|
2227
|
+
prompt = f"""
|
2228
|
+
Document Context:
|
2229
|
+
{data}\n
|
2230
|
+
Question: {prompt}\n
|
2231
|
+
Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
|
2232
|
+
"""
|
2233
|
+
|
2234
|
+
lmm = AnthropicLMM()
|
2235
|
+
llm_output = lmm.generate(prompt=prompt)
|
2236
|
+
llm_output = cast(str, llm_output)
|
2237
|
+
|
2238
|
+
_display_tool_trace(
|
2239
|
+
document_qa.__name__,
|
2240
|
+
payload,
|
2241
|
+
llm_output,
|
2242
|
+
files,
|
2243
|
+
)
|
2244
|
+
|
2245
|
+
return llm_output
|
2246
|
+
|
2247
|
+
|
2177
2248
|
# Utility and visualization functions
|
2178
2249
|
|
2179
2250
|
|
@@ -30,6 +30,8 @@ from nbclient.util import run_sync
|
|
30
30
|
from nbformat.v4 import new_code_cell
|
31
31
|
from pydantic import BaseModel, field_serializer
|
32
32
|
from typing_extensions import Self
|
33
|
+
from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
|
34
|
+
from opentelemetry.context import get_current
|
33
35
|
|
34
36
|
from vision_agent.utils.exceptions import (
|
35
37
|
RemoteSandboxCreationError,
|
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
|
|
633
635
|
self._new_kernel()
|
634
636
|
|
635
637
|
def exec_cell(self, code: str) -> Execution:
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
638
|
+
# track the exec_cell with opentelemetry trace
|
639
|
+
tracer = get_tracer(__name__)
|
640
|
+
context = get_current()
|
641
|
+
with tracer.start_as_current_span(
|
642
|
+
"notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
|
643
|
+
) as span:
|
644
|
+
try:
|
645
|
+
# Add code as span attribute
|
646
|
+
span.set_attribute("code", code)
|
647
|
+
span.set_attribute("cell_index", len(self.nb.cells))
|
648
|
+
|
649
|
+
self.nb.cells.append(new_code_cell(code))
|
650
|
+
cell = self.nb.cells[-1]
|
651
|
+
self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
|
652
|
+
|
653
|
+
result = _parse_local_code_interpreter_outputs(
|
654
|
+
self.nb.cells[-1].outputs
|
655
|
+
)
|
656
|
+
span.set_status(Status(StatusCode.OK))
|
657
|
+
return result
|
658
|
+
except CellTimeoutError as e:
|
659
|
+
run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
|
660
|
+
sleep(1)
|
661
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
662
|
+
span.record_exception(e)
|
663
|
+
traceback_raw = traceback.format_exc().splitlines()
|
664
|
+
return Execution.from_exception(e, traceback_raw)
|
665
|
+
except DeadKernelError as e:
|
666
|
+
self.restart_kernel()
|
667
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
668
|
+
span.record_exception(e)
|
669
|
+
traceback_raw = traceback.format_exc().splitlines()
|
670
|
+
return Execution.from_exception(e, traceback_raw)
|
671
|
+
except Exception as e:
|
672
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
673
|
+
span.record_exception(e)
|
674
|
+
traceback_raw = traceback.format_exc().splitlines()
|
675
|
+
return Execution.from_exception(e, traceback_raw)
|
653
676
|
|
654
677
|
def upload_file(self, file_path: Union[str, Path]) -> Path:
|
655
678
|
with open(file_path, "rb") as f:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|