vision-agent 0.2.216__py3-none-any.whl → 0.2.218__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/__init__.py +1 -0
- vision_agent/tools/tools.py +71 -0
- vision_agent/utils/execute.py +40 -17
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/METADATA +2 -1
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/RECORD +7 -7
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/WHEEL +0 -0
vision_agent/tools/__init__.py
CHANGED
vision_agent/tools/tools.py
CHANGED
@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
|
|
2174
2174
|
return data
|
2175
2175
|
|
2176
2176
|
|
2177
|
+
def document_qa(
|
2178
|
+
prompt: str,
|
2179
|
+
image: np.ndarray,
|
2180
|
+
) -> str:
|
2181
|
+
"""'document_qa' is a tool that can answer any questions about arbitrary
|
2182
|
+
images of documents or presentations. It answers by analyzing the contextual document data
|
2183
|
+
and then using a model to answer specific questions. It returns text as an answer to the question.
|
2184
|
+
|
2185
|
+
Parameters:
|
2186
|
+
prompt (str): The question to be answered about the document image
|
2187
|
+
image (np.ndarray): The document image to analyze
|
2188
|
+
|
2189
|
+
Returns:
|
2190
|
+
str: The answer to the question based on the document's context.
|
2191
|
+
|
2192
|
+
Example
|
2193
|
+
-------
|
2194
|
+
>>> document_qa(image, question)
|
2195
|
+
'The answer to the question ...'
|
2196
|
+
"""
|
2197
|
+
|
2198
|
+
image_file = numpy_to_bytes(image)
|
2199
|
+
|
2200
|
+
files = [("image", image_file)]
|
2201
|
+
|
2202
|
+
payload = {
|
2203
|
+
"model": "document-analysis",
|
2204
|
+
}
|
2205
|
+
|
2206
|
+
data: dict[str, Any] = send_inference_request(
|
2207
|
+
payload=payload,
|
2208
|
+
endpoint_name="document-analysis",
|
2209
|
+
files=files,
|
2210
|
+
v2=True,
|
2211
|
+
metadata_payload={"function_name": "document_qa"},
|
2212
|
+
)
|
2213
|
+
|
2214
|
+
def normalize(data: Any) -> Dict[str, Any]:
|
2215
|
+
if isinstance(data, Dict):
|
2216
|
+
if "bbox" in data:
|
2217
|
+
data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
|
2218
|
+
for key in data:
|
2219
|
+
data[key] = normalize(data[key])
|
2220
|
+
elif isinstance(data, List):
|
2221
|
+
for i in range(len(data)):
|
2222
|
+
data[i] = normalize(data[i])
|
2223
|
+
return data # type: ignore
|
2224
|
+
|
2225
|
+
data = normalize(data)
|
2226
|
+
|
2227
|
+
prompt = f"""
|
2228
|
+
Document Context:
|
2229
|
+
{data}\n
|
2230
|
+
Question: {prompt}\n
|
2231
|
+
Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
|
2232
|
+
"""
|
2233
|
+
|
2234
|
+
lmm = AnthropicLMM()
|
2235
|
+
llm_output = lmm.generate(prompt=prompt)
|
2236
|
+
llm_output = cast(str, llm_output)
|
2237
|
+
|
2238
|
+
_display_tool_trace(
|
2239
|
+
document_qa.__name__,
|
2240
|
+
payload,
|
2241
|
+
llm_output,
|
2242
|
+
files,
|
2243
|
+
)
|
2244
|
+
|
2245
|
+
return llm_output
|
2246
|
+
|
2247
|
+
|
2177
2248
|
# Utility and visualization functions
|
2178
2249
|
|
2179
2250
|
|
vision_agent/utils/execute.py
CHANGED
@@ -30,6 +30,8 @@ from nbclient.util import run_sync
|
|
30
30
|
from nbformat.v4 import new_code_cell
|
31
31
|
from pydantic import BaseModel, field_serializer
|
32
32
|
from typing_extensions import Self
|
33
|
+
from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
|
34
|
+
from opentelemetry.context import get_current
|
33
35
|
|
34
36
|
from vision_agent.utils.exceptions import (
|
35
37
|
RemoteSandboxCreationError,
|
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
|
|
633
635
|
self._new_kernel()
|
634
636
|
|
635
637
|
def exec_cell(self, code: str) -> Execution:
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
638
|
+
# track the exec_cell with opentelemetry trace
|
639
|
+
tracer = get_tracer(__name__)
|
640
|
+
context = get_current()
|
641
|
+
with tracer.start_as_current_span(
|
642
|
+
"notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
|
643
|
+
) as span:
|
644
|
+
try:
|
645
|
+
# Add code as span attribute
|
646
|
+
span.set_attribute("code", code)
|
647
|
+
span.set_attribute("cell_index", len(self.nb.cells))
|
648
|
+
|
649
|
+
self.nb.cells.append(new_code_cell(code))
|
650
|
+
cell = self.nb.cells[-1]
|
651
|
+
self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
|
652
|
+
|
653
|
+
result = _parse_local_code_interpreter_outputs(
|
654
|
+
self.nb.cells[-1].outputs
|
655
|
+
)
|
656
|
+
span.set_status(Status(StatusCode.OK))
|
657
|
+
return result
|
658
|
+
except CellTimeoutError as e:
|
659
|
+
run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
|
660
|
+
sleep(1)
|
661
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
662
|
+
span.record_exception(e)
|
663
|
+
traceback_raw = traceback.format_exc().splitlines()
|
664
|
+
return Execution.from_exception(e, traceback_raw)
|
665
|
+
except DeadKernelError as e:
|
666
|
+
self.restart_kernel()
|
667
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
668
|
+
span.record_exception(e)
|
669
|
+
traceback_raw = traceback.format_exc().splitlines()
|
670
|
+
return Execution.from_exception(e, traceback_raw)
|
671
|
+
except Exception as e:
|
672
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
673
|
+
span.record_exception(e)
|
674
|
+
traceback_raw = traceback.format_exc().splitlines()
|
675
|
+
return Execution.from_exception(e, traceback_raw)
|
653
676
|
|
654
677
|
def upload_file(self, file_path: Union[str, Path]) -> Path:
|
655
678
|
with open(file_path, "rb") as f:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.218
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
|
|
23
23
|
Requires-Dist: numpy (>=1.21.0,<2.0.0)
|
24
24
|
Requires-Dist: openai (>=1.0.0,<2.0.0)
|
25
25
|
Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
|
26
|
+
Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
|
26
27
|
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
27
28
|
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
28
29
|
Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
|
@@ -26,21 +26,21 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
|
|
26
26
|
vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
|
27
27
|
vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
|
28
28
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
29
|
-
vision_agent/tools/__init__.py,sha256=
|
29
|
+
vision_agent/tools/__init__.py,sha256=Jdq34jMw_KuYwk4Wexqm4DRjuLLoL1Q8wukm0NBv1Tc,2812
|
30
30
|
vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
|
31
31
|
vision_agent/tools/planner_tools.py,sha256=tU1qz_VIQM_yPKDmuxjMWu68ZlAZ7ePWI1g7zswyWhI,13540
|
32
32
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
33
33
|
vision_agent/tools/tool_utils.py,sha256=LAnrb_nY6PNVamqJahRN-J0cuOy4gsKvCtSuXJf0RsI,10075
|
34
|
-
vision_agent/tools/tools.py,sha256=
|
34
|
+
vision_agent/tools/tools.py,sha256=Xcm_9EQdDCR9X5FhIm7VJaTL0qWqhnJUVTRVrRtETrA,96112
|
35
35
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
36
36
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
37
37
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
38
|
-
vision_agent/utils/execute.py,sha256=
|
38
|
+
vision_agent/utils/execute.py,sha256=Qs-C9lnRBc3frUH_bmrwHLuJ9qjPykIytex8y4E0f7s,29356
|
39
39
|
vision_agent/utils/image_utils.py,sha256=5uoYgXa6E0-lVrXR7K2XE7fe6r_n7pvK64HYQ50vG3w,12182
|
40
40
|
vision_agent/utils/sim.py,sha256=f1emBQM8SmyVKSrhj0NHItnfMHSeTw-Nk2pw-0eBZ5c,7462
|
41
41
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
42
42
|
vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
|
43
|
-
vision_agent-0.2.
|
44
|
-
vision_agent-0.2.
|
45
|
-
vision_agent-0.2.
|
46
|
-
vision_agent-0.2.
|
43
|
+
vision_agent-0.2.218.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
44
|
+
vision_agent-0.2.218.dist-info/METADATA,sha256=Bh9yQRcNSytsUOIqztuXkUhSprPu-le7ncfb7owkc24,19122
|
45
|
+
vision_agent-0.2.218.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
46
|
+
vision_agent-0.2.218.dist-info/RECORD,,
|
File without changes
|
File without changes
|