vision-agent 0.2.216__py3-none-any.whl → 0.2.218__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/tools/__init__.py +1 -0
- vision_agent/tools/tools.py +71 -0
- vision_agent/utils/execute.py +40 -17
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/METADATA +2 -1
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/RECORD +7 -7
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/WHEEL +0 -0
vision_agent/tools/__init__.py
CHANGED
vision_agent/tools/tools.py
CHANGED
@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
|
|
2174
2174
|
return data
|
2175
2175
|
|
2176
2176
|
|
2177
|
+
def document_qa(
|
2178
|
+
prompt: str,
|
2179
|
+
image: np.ndarray,
|
2180
|
+
) -> str:
|
2181
|
+
"""'document_qa' is a tool that can answer any questions about arbitrary
|
2182
|
+
images of documents or presentations. It answers by analyzing the contextual document data
|
2183
|
+
and then using a model to answer specific questions. It returns text as an answer to the question.
|
2184
|
+
|
2185
|
+
Parameters:
|
2186
|
+
prompt (str): The question to be answered about the document image
|
2187
|
+
image (np.ndarray): The document image to analyze
|
2188
|
+
|
2189
|
+
Returns:
|
2190
|
+
str: The answer to the question based on the document's context.
|
2191
|
+
|
2192
|
+
Example
|
2193
|
+
-------
|
2194
|
+
>>> document_qa(image, question)
|
2195
|
+
'The answer to the question ...'
|
2196
|
+
"""
|
2197
|
+
|
2198
|
+
image_file = numpy_to_bytes(image)
|
2199
|
+
|
2200
|
+
files = [("image", image_file)]
|
2201
|
+
|
2202
|
+
payload = {
|
2203
|
+
"model": "document-analysis",
|
2204
|
+
}
|
2205
|
+
|
2206
|
+
data: dict[str, Any] = send_inference_request(
|
2207
|
+
payload=payload,
|
2208
|
+
endpoint_name="document-analysis",
|
2209
|
+
files=files,
|
2210
|
+
v2=True,
|
2211
|
+
metadata_payload={"function_name": "document_qa"},
|
2212
|
+
)
|
2213
|
+
|
2214
|
+
def normalize(data: Any) -> Dict[str, Any]:
|
2215
|
+
if isinstance(data, Dict):
|
2216
|
+
if "bbox" in data:
|
2217
|
+
data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
|
2218
|
+
for key in data:
|
2219
|
+
data[key] = normalize(data[key])
|
2220
|
+
elif isinstance(data, List):
|
2221
|
+
for i in range(len(data)):
|
2222
|
+
data[i] = normalize(data[i])
|
2223
|
+
return data # type: ignore
|
2224
|
+
|
2225
|
+
data = normalize(data)
|
2226
|
+
|
2227
|
+
prompt = f"""
|
2228
|
+
Document Context:
|
2229
|
+
{data}\n
|
2230
|
+
Question: {prompt}\n
|
2231
|
+
Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
|
2232
|
+
"""
|
2233
|
+
|
2234
|
+
lmm = AnthropicLMM()
|
2235
|
+
llm_output = lmm.generate(prompt=prompt)
|
2236
|
+
llm_output = cast(str, llm_output)
|
2237
|
+
|
2238
|
+
_display_tool_trace(
|
2239
|
+
document_qa.__name__,
|
2240
|
+
payload,
|
2241
|
+
llm_output,
|
2242
|
+
files,
|
2243
|
+
)
|
2244
|
+
|
2245
|
+
return llm_output
|
2246
|
+
|
2247
|
+
|
2177
2248
|
# Utility and visualization functions
|
2178
2249
|
|
2179
2250
|
|
vision_agent/utils/execute.py
CHANGED
@@ -30,6 +30,8 @@ from nbclient.util import run_sync
|
|
30
30
|
from nbformat.v4 import new_code_cell
|
31
31
|
from pydantic import BaseModel, field_serializer
|
32
32
|
from typing_extensions import Self
|
33
|
+
from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
|
34
|
+
from opentelemetry.context import get_current
|
33
35
|
|
34
36
|
from vision_agent.utils.exceptions import (
|
35
37
|
RemoteSandboxCreationError,
|
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
|
|
633
635
|
self._new_kernel()
|
634
636
|
|
635
637
|
def exec_cell(self, code: str) -> Execution:
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
638
|
+
# track the exec_cell with opentelemetry trace
|
639
|
+
tracer = get_tracer(__name__)
|
640
|
+
context = get_current()
|
641
|
+
with tracer.start_as_current_span(
|
642
|
+
"notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
|
643
|
+
) as span:
|
644
|
+
try:
|
645
|
+
# Add code as span attribute
|
646
|
+
span.set_attribute("code", code)
|
647
|
+
span.set_attribute("cell_index", len(self.nb.cells))
|
648
|
+
|
649
|
+
self.nb.cells.append(new_code_cell(code))
|
650
|
+
cell = self.nb.cells[-1]
|
651
|
+
self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
|
652
|
+
|
653
|
+
result = _parse_local_code_interpreter_outputs(
|
654
|
+
self.nb.cells[-1].outputs
|
655
|
+
)
|
656
|
+
span.set_status(Status(StatusCode.OK))
|
657
|
+
return result
|
658
|
+
except CellTimeoutError as e:
|
659
|
+
run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
|
660
|
+
sleep(1)
|
661
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
662
|
+
span.record_exception(e)
|
663
|
+
traceback_raw = traceback.format_exc().splitlines()
|
664
|
+
return Execution.from_exception(e, traceback_raw)
|
665
|
+
except DeadKernelError as e:
|
666
|
+
self.restart_kernel()
|
667
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
668
|
+
span.record_exception(e)
|
669
|
+
traceback_raw = traceback.format_exc().splitlines()
|
670
|
+
return Execution.from_exception(e, traceback_raw)
|
671
|
+
except Exception as e:
|
672
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
673
|
+
span.record_exception(e)
|
674
|
+
traceback_raw = traceback.format_exc().splitlines()
|
675
|
+
return Execution.from_exception(e, traceback_raw)
|
653
676
|
|
654
677
|
def upload_file(self, file_path: Union[str, Path]) -> Path:
|
655
678
|
with open(file_path, "rb") as f:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.218
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
|
|
23
23
|
Requires-Dist: numpy (>=1.21.0,<2.0.0)
|
24
24
|
Requires-Dist: openai (>=1.0.0,<2.0.0)
|
25
25
|
Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
|
26
|
+
Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
|
26
27
|
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
27
28
|
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
28
29
|
Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
|
@@ -26,21 +26,21 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
|
|
26
26
|
vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
|
27
27
|
vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
|
28
28
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
29
|
-
vision_agent/tools/__init__.py,sha256=
|
29
|
+
vision_agent/tools/__init__.py,sha256=Jdq34jMw_KuYwk4Wexqm4DRjuLLoL1Q8wukm0NBv1Tc,2812
|
30
30
|
vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
|
31
31
|
vision_agent/tools/planner_tools.py,sha256=tU1qz_VIQM_yPKDmuxjMWu68ZlAZ7ePWI1g7zswyWhI,13540
|
32
32
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
33
33
|
vision_agent/tools/tool_utils.py,sha256=LAnrb_nY6PNVamqJahRN-J0cuOy4gsKvCtSuXJf0RsI,10075
|
34
|
-
vision_agent/tools/tools.py,sha256=
|
34
|
+
vision_agent/tools/tools.py,sha256=Xcm_9EQdDCR9X5FhIm7VJaTL0qWqhnJUVTRVrRtETrA,96112
|
35
35
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
36
36
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
37
37
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
38
|
-
vision_agent/utils/execute.py,sha256=
|
38
|
+
vision_agent/utils/execute.py,sha256=Qs-C9lnRBc3frUH_bmrwHLuJ9qjPykIytex8y4E0f7s,29356
|
39
39
|
vision_agent/utils/image_utils.py,sha256=5uoYgXa6E0-lVrXR7K2XE7fe6r_n7pvK64HYQ50vG3w,12182
|
40
40
|
vision_agent/utils/sim.py,sha256=f1emBQM8SmyVKSrhj0NHItnfMHSeTw-Nk2pw-0eBZ5c,7462
|
41
41
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
42
42
|
vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
|
43
|
-
vision_agent-0.2.
|
44
|
-
vision_agent-0.2.
|
45
|
-
vision_agent-0.2.
|
46
|
-
vision_agent-0.2.
|
43
|
+
vision_agent-0.2.218.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
44
|
+
vision_agent-0.2.218.dist-info/METADATA,sha256=Bh9yQRcNSytsUOIqztuXkUhSprPu-le7ncfb7owkc24,19122
|
45
|
+
vision_agent-0.2.218.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
46
|
+
vision_agent-0.2.218.dist-info/RECORD,,
|
File without changes
|
File without changes
|