vision-agent 0.2.117__py3-none-any.whl → 0.2.119__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/agent.py +1 -1
- vision_agent/agent/vision_agent.py +107 -49
- vision_agent/agent/vision_agent_coder.py +46 -23
- vision_agent/agent/vision_agent_prompts.py +43 -22
- vision_agent/clients/landing_public_api.py +2 -2
- vision_agent/lmm/lmm.py +15 -6
- vision_agent/lmm/types.py +3 -1
- vision_agent/tools/__init__.py +2 -2
- vision_agent/tools/meta_tools.py +281 -273
- vision_agent/tools/tools.py +36 -14
- vision_agent/tools/tools_types.py +3 -3
- vision_agent/utils/execute.py +69 -22
- vision_agent/utils/image_utils.py +2 -2
- {vision_agent-0.2.117.dist-info → vision_agent-0.2.119.dist-info}/METADATA +12 -8
- {vision_agent-0.2.117.dist-info → vision_agent-0.2.119.dist-info}/RECORD +17 -17
- {vision_agent-0.2.117.dist-info → vision_agent-0.2.119.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.117.dist-info → vision_agent-0.2.119.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
import io
|
2
2
|
import json
|
3
3
|
import logging
|
4
|
+
import os
|
4
5
|
import tempfile
|
6
|
+
import urllib.request
|
5
7
|
from importlib import resources
|
6
8
|
from pathlib import Path
|
7
9
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
@@ -760,10 +762,10 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s
|
|
760
762
|
return answer[task] # type: ignore
|
761
763
|
|
762
764
|
|
763
|
-
def
|
764
|
-
"""'
|
765
|
-
objects given a text prompt
|
766
|
-
can optionally separate the
|
765
|
+
def florence2_phrase_grounding(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
|
766
|
+
"""'florence2_phrase_grounding' is a tool that can detect multiple
|
767
|
+
objects given a text prompt which can be object names or caption. You
|
768
|
+
can optionally separate the object names in the text with commas. It returns a list
|
767
769
|
of bounding boxes with normalized coordinates, label names and associated
|
768
770
|
probability scores of 1.0.
|
769
771
|
|
@@ -780,7 +782,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
|
|
780
782
|
|
781
783
|
Example
|
782
784
|
-------
|
783
|
-
>>>
|
785
|
+
>>> florence2_phrase_grounding('person looking at a coyote', image)
|
784
786
|
[
|
785
787
|
{'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
786
788
|
{'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
|
@@ -792,7 +794,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
|
|
792
794
|
"image": image_b64,
|
793
795
|
"task": "<CAPTION_TO_PHRASE_GROUNDING>",
|
794
796
|
"prompt": prompt,
|
795
|
-
"function_name": "
|
797
|
+
"function_name": "florence2_phrase_grounding",
|
796
798
|
}
|
797
799
|
|
798
800
|
detections = send_inference_request(data, "florence2", v2=True)
|
@@ -1220,6 +1222,13 @@ def extract_frames(
|
|
1220
1222
|
video_file_path = video.download(output_path=temp_dir)
|
1221
1223
|
|
1222
1224
|
return extract_frames_from_video(video_file_path, fps)
|
1225
|
+
elif str(video_uri).startswith(("http", "https")):
|
1226
|
+
_, image_suffix = os.path.splitext(video_uri)
|
1227
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=image_suffix) as tmp_file:
|
1228
|
+
# Download the video and save it to the temporary file
|
1229
|
+
with urllib.request.urlopen(str(video_uri)) as response:
|
1230
|
+
tmp_file.write(response.read())
|
1231
|
+
return extract_frames_from_video(tmp_file.name, fps)
|
1223
1232
|
|
1224
1233
|
return extract_frames_from_video(str(video_uri), fps)
|
1225
1234
|
|
@@ -1250,10 +1259,10 @@ def save_json(data: Any, file_path: str) -> None:
|
|
1250
1259
|
|
1251
1260
|
|
1252
1261
|
def load_image(image_path: str) -> np.ndarray:
|
1253
|
-
"""'load_image' is a utility function that loads an image from the given file path string.
|
1262
|
+
"""'load_image' is a utility function that loads an image from the given file path string or an URL.
|
1254
1263
|
|
1255
1264
|
Parameters:
|
1256
|
-
image_path (str): The path to the image.
|
1265
|
+
image_path (str): The path or URL to the image.
|
1257
1266
|
|
1258
1267
|
Returns:
|
1259
1268
|
np.ndarray: The image as a NumPy array.
|
@@ -1265,6 +1274,13 @@ def load_image(image_path: str) -> np.ndarray:
|
|
1265
1274
|
# NOTE: sometimes the generated code pass in a NumPy array
|
1266
1275
|
if isinstance(image_path, np.ndarray):
|
1267
1276
|
return image_path
|
1277
|
+
if image_path.startswith(("http", "https")):
|
1278
|
+
_, image_suffix = os.path.splitext(image_path)
|
1279
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=image_suffix) as tmp_file:
|
1280
|
+
# Download the image and save it to the temporary file
|
1281
|
+
with urllib.request.urlopen(image_path) as response:
|
1282
|
+
tmp_file.write(response.read())
|
1283
|
+
image_path = tmp_file.name
|
1268
1284
|
image = Image.open(image_path).convert("RGB")
|
1269
1285
|
return np.array(image)
|
1270
1286
|
|
@@ -1316,7 +1332,7 @@ def save_video(
|
|
1316
1332
|
video.write_videofile(f.name, codec="libx264")
|
1317
1333
|
f.close()
|
1318
1334
|
_save_video_to_result(f.name)
|
1319
|
-
|
1335
|
+
return f.name
|
1320
1336
|
|
1321
1337
|
|
1322
1338
|
def _save_video_to_result(video_uri: str) -> None:
|
@@ -1418,6 +1434,7 @@ def overlay_segmentation_masks(
|
|
1418
1434
|
medias: Union[np.ndarray, List[np.ndarray]],
|
1419
1435
|
masks: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
|
1420
1436
|
draw_label: bool = True,
|
1437
|
+
secondary_label_key: str = "tracking_label",
|
1421
1438
|
) -> Union[np.ndarray, List[np.ndarray]]:
|
1422
1439
|
"""'overlay_segmentation_masks' is a utility function that displays segmentation
|
1423
1440
|
masks.
|
@@ -1426,7 +1443,10 @@ def overlay_segmentation_masks(
|
|
1426
1443
|
medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
|
1427
1444
|
the masks on.
|
1428
1445
|
masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
|
1429
|
-
dictionaries containing the masks.
|
1446
|
+
dictionaries containing the masks, labels and scores.
|
1447
|
+
draw_label (bool, optional): If True, the labels will be displayed on the image.
|
1448
|
+
secondary_label_key (str, optional): The key to use for the secondary
|
1449
|
+
tracking label which is needed in videos to display tracking information.
|
1430
1450
|
|
1431
1451
|
Returns:
|
1432
1452
|
np.ndarray: The image with the masks displayed.
|
@@ -1471,6 +1491,7 @@ def overlay_segmentation_masks(
|
|
1471
1491
|
for elt in masks_int[i]:
|
1472
1492
|
mask = elt["mask"]
|
1473
1493
|
label = elt["label"]
|
1494
|
+
tracking_lbl = elt.get(secondary_label_key, None)
|
1474
1495
|
np_mask = np.zeros((pil_image.size[1], pil_image.size[0], 4))
|
1475
1496
|
np_mask[mask > 0, :] = color[label] + (255 * 0.5,)
|
1476
1497
|
mask_img = Image.fromarray(np_mask.astype(np.uint8))
|
@@ -1478,16 +1499,17 @@ def overlay_segmentation_masks(
|
|
1478
1499
|
|
1479
1500
|
if draw_label:
|
1480
1501
|
draw = ImageDraw.Draw(pil_image)
|
1481
|
-
|
1502
|
+
text = tracking_lbl if tracking_lbl else label
|
1503
|
+
text_box = draw.textbbox((0, 0), text=text, font=font)
|
1482
1504
|
x, y = _get_text_coords_from_mask(
|
1483
1505
|
mask,
|
1484
1506
|
v_gap=(text_box[3] - text_box[1]) + 10,
|
1485
1507
|
h_gap=(text_box[2] - text_box[0]) // 2,
|
1486
1508
|
)
|
1487
1509
|
if x != 0 and y != 0:
|
1488
|
-
text_box = draw.textbbox((x, y), text=
|
1510
|
+
text_box = draw.textbbox((x, y), text=text, font=font)
|
1489
1511
|
draw.rectangle((x, y, text_box[2], text_box[3]), fill=color[label])
|
1490
|
-
draw.text((x, y),
|
1512
|
+
draw.text((x, y), text, fill="black", font=font)
|
1491
1513
|
frame_out.append(np.array(pil_image))
|
1492
1514
|
return frame_out[0] if len(frame_out) == 1 else frame_out
|
1493
1515
|
|
@@ -1663,7 +1685,7 @@ FUNCTION_TOOLS = [
|
|
1663
1685
|
florence2_ocr,
|
1664
1686
|
florence2_sam2_image,
|
1665
1687
|
florence2_sam2_video,
|
1666
|
-
|
1688
|
+
florence2_phrase_grounding,
|
1667
1689
|
ixc25_image_vqa,
|
1668
1690
|
ixc25_video_vqa,
|
1669
1691
|
detr_segmentation,
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from uuid import UUID
|
2
1
|
from enum import Enum
|
3
|
-
from typing import List,
|
2
|
+
from typing import List, Optional, Tuple
|
3
|
+
from uuid import UUID
|
4
4
|
|
5
|
-
from pydantic import BaseModel, ConfigDict, Field,
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
|
6
6
|
|
7
7
|
|
8
8
|
class BboxInput(BaseModel):
|
vision_agent/utils/execute.py
CHANGED
@@ -5,7 +5,6 @@ import os
|
|
5
5
|
import platform
|
6
6
|
import re
|
7
7
|
import sys
|
8
|
-
import tempfile
|
9
8
|
import traceback
|
10
9
|
import warnings
|
11
10
|
from enum import Enum
|
@@ -40,6 +39,7 @@ from vision_agent.utils.exceptions import (
|
|
40
39
|
load_dotenv()
|
41
40
|
_LOGGER = logging.getLogger(__name__)
|
42
41
|
_SESSION_TIMEOUT = 600 # 10 minutes
|
42
|
+
WORKSPACE = Path(os.getenv("WORKSPACE", ""))
|
43
43
|
|
44
44
|
|
45
45
|
class MimeType(str, Enum):
|
@@ -384,8 +384,15 @@ class Execution(BaseModel):
|
|
384
384
|
class CodeInterpreter(abc.ABC):
|
385
385
|
"""Code interpreter interface."""
|
386
386
|
|
387
|
-
def __init__(
|
387
|
+
def __init__(
|
388
|
+
self,
|
389
|
+
timeout: int,
|
390
|
+
remote_path: Optional[Union[str, Path]] = None,
|
391
|
+
*args: Any,
|
392
|
+
**kwargs: Any,
|
393
|
+
) -> None:
|
388
394
|
self.timeout = timeout
|
395
|
+
self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
|
389
396
|
|
390
397
|
def __enter__(self) -> Self:
|
391
398
|
return self
|
@@ -406,17 +413,21 @@ class CodeInterpreter(abc.ABC):
|
|
406
413
|
self.restart_kernel()
|
407
414
|
return self.exec_cell(code)
|
408
415
|
|
409
|
-
def upload_file(self, file: Union[str, Path]) ->
|
416
|
+
def upload_file(self, file: Union[str, Path]) -> Path:
|
410
417
|
# Default behavior is a no-op (for local code interpreter)
|
411
|
-
return
|
418
|
+
return Path(file)
|
412
419
|
|
413
|
-
def download_file(
|
420
|
+
def download_file(
|
421
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
422
|
+
) -> Path:
|
414
423
|
# Default behavior is a no-op (for local code interpreter)
|
415
|
-
return Path(
|
424
|
+
return Path(local_file_path)
|
416
425
|
|
417
426
|
|
418
427
|
class E2BCodeInterpreter(CodeInterpreter):
|
419
|
-
def __init__(
|
428
|
+
def __init__(
|
429
|
+
self, remote_path: Optional[Union[str, Path]] = None, *args: Any, **kwargs: Any
|
430
|
+
) -> None:
|
420
431
|
super().__init__(*args, **kwargs)
|
421
432
|
assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
|
422
433
|
try:
|
@@ -443,6 +454,9 @@ print(f"Vision Agent version: {va_version}")"""
|
|
443
454
|
_LOGGER.info(
|
444
455
|
f"E2BCodeInterpreter (sandbox id: {self.interpreter.sandbox_id}) initialized:\n{sys_versions}"
|
445
456
|
)
|
457
|
+
self.remote_path = Path(
|
458
|
+
remote_path if remote_path is not None else "/home/user"
|
459
|
+
)
|
446
460
|
|
447
461
|
def close(self, *args: Any, **kwargs: Any) -> None:
|
448
462
|
try:
|
@@ -516,19 +530,22 @@ print(f"Vision Agent version: {va_version}")"""
|
|
516
530
|
before_sleep=tenacity.before_sleep_log(_LOGGER, logging.INFO),
|
517
531
|
after=tenacity.after_log(_LOGGER, logging.INFO),
|
518
532
|
)
|
519
|
-
def upload_file(self, file: Union[str, Path]) ->
|
533
|
+
def upload_file(self, file: Union[str, Path]) -> Path:
|
520
534
|
file_name = Path(file).name
|
521
|
-
remote_path = f"/home/user/{file_name}"
|
522
535
|
with open(file, "rb") as f:
|
523
|
-
self.interpreter.files.write(path=remote_path, data=f)
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
def download_file(
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
536
|
+
self.interpreter.files.write(path=str(self.remote_path / file_name), data=f)
|
537
|
+
_LOGGER.info(f"File ({file}) is uploaded to: {str(self.remote_path)}")
|
538
|
+
return self.remote_path / file_name
|
539
|
+
|
540
|
+
def download_file(
|
541
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
542
|
+
) -> Path:
|
543
|
+
with open(local_file_path, "w+b") as f:
|
544
|
+
f.write(
|
545
|
+
self.interpreter.files.read(path=str(remote_file_path), format="bytes")
|
546
|
+
)
|
547
|
+
_LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
|
548
|
+
return Path(local_file_path)
|
532
549
|
|
533
550
|
@staticmethod
|
534
551
|
def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
|
@@ -540,7 +557,11 @@ print(f"Vision Agent version: {va_version}")"""
|
|
540
557
|
|
541
558
|
|
542
559
|
class LocalCodeInterpreter(CodeInterpreter):
|
543
|
-
def __init__(
|
560
|
+
def __init__(
|
561
|
+
self,
|
562
|
+
timeout: int = _SESSION_TIMEOUT,
|
563
|
+
remote_path: Optional[Union[str, Path]] = None,
|
564
|
+
) -> None:
|
544
565
|
super().__init__(timeout=timeout)
|
545
566
|
self.nb = nbformat.v4.new_notebook()
|
546
567
|
self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
|
@@ -554,6 +575,7 @@ Timeout: {self.timeout}"""
|
|
554
575
|
)
|
555
576
|
sleep(1)
|
556
577
|
self._new_kernel()
|
578
|
+
self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
|
557
579
|
|
558
580
|
def _new_kernel(self) -> None:
|
559
581
|
if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)(): # type: ignore
|
@@ -607,6 +629,25 @@ Timeout: {self.timeout}"""
|
|
607
629
|
traceback_raw = traceback.format_exc().splitlines()
|
608
630
|
return Execution.from_exception(e, traceback_raw)
|
609
631
|
|
632
|
+
def upload_file(self, file_path: Union[str, Path]) -> Path:
|
633
|
+
with open(file_path, "rb") as f:
|
634
|
+
contents = f.read()
|
635
|
+
with open(self.remote_path / Path(file_path).name, "wb") as f:
|
636
|
+
f.write(contents)
|
637
|
+
_LOGGER.info(f"File ({file_path}) is uploaded to: {str(self.remote_path)}")
|
638
|
+
|
639
|
+
return Path(self.remote_path / file_path)
|
640
|
+
|
641
|
+
def download_file(
|
642
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
643
|
+
) -> Path:
|
644
|
+
with open(self.remote_path / remote_file_path, "rb") as f:
|
645
|
+
contents = f.read()
|
646
|
+
with open(local_file_path, "wb") as f:
|
647
|
+
f.write(contents)
|
648
|
+
_LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
|
649
|
+
return Path(local_file_path)
|
650
|
+
|
610
651
|
|
611
652
|
class CodeInterpreterFactory:
|
612
653
|
"""Factory class for creating code interpreters.
|
@@ -630,13 +671,19 @@ class CodeInterpreterFactory:
|
|
630
671
|
return instance
|
631
672
|
|
632
673
|
@staticmethod
|
633
|
-
def new_instance(
|
674
|
+
def new_instance(
|
675
|
+
code_sandbox_runtime: Optional[str] = None, remote_path: Optional[str] = None
|
676
|
+
) -> CodeInterpreter:
|
634
677
|
if not code_sandbox_runtime:
|
635
678
|
code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
|
636
679
|
if code_sandbox_runtime == "e2b":
|
637
|
-
instance: CodeInterpreter = E2BCodeInterpreter(
|
680
|
+
instance: CodeInterpreter = E2BCodeInterpreter(
|
681
|
+
timeout=_SESSION_TIMEOUT, remote_path=remote_path
|
682
|
+
)
|
638
683
|
elif code_sandbox_runtime == "local":
|
639
|
-
instance = LocalCodeInterpreter(
|
684
|
+
instance = LocalCodeInterpreter(
|
685
|
+
timeout=_SESSION_TIMEOUT, remote_path=remote_path
|
686
|
+
)
|
640
687
|
else:
|
641
688
|
raise ValueError(
|
642
689
|
f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
|
@@ -70,7 +70,7 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
|
|
70
70
|
r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
|
71
71
|
|
72
72
|
Parameters:
|
73
|
-
|
73
|
+
rle: The run-length encoded mask.
|
74
74
|
"""
|
75
75
|
size = rle["size"]
|
76
76
|
counts = rle["counts"]
|
@@ -100,7 +100,7 @@ def frames_to_bytes(
|
|
100
100
|
"""
|
101
101
|
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
|
102
102
|
clip = ImageSequenceClip(frames, fps=fps)
|
103
|
-
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
|
103
|
+
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
|
104
104
|
with open(temp_file.name + f".{file_ext}", "rb") as f:
|
105
105
|
buffer_bytes = f.read()
|
106
106
|
return buffer_bytes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.119
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -81,15 +81,15 @@ export OPENAI_API_KEY="your-api-key"
|
|
81
81
|
```
|
82
82
|
|
83
83
|
### Vision Agent
|
84
|
-
There are two agents that you can use.
|
84
|
+
There are two agents that you can use. `VisionAgent` is a conversational agent that has
|
85
85
|
access to tools that allow it to write an navigate python code and file systems. It can
|
86
|
-
converse with the user in natural language. VisionAgentCoder is an agent
|
87
|
-
code for vision tasks, such as counting people in an image. However, it
|
88
|
-
and can only respond with code. VisionAgent can call
|
89
|
-
code.
|
86
|
+
converse with the user in natural language. `VisionAgentCoder` is an agent specifically
|
87
|
+
for writing code for vision tasks, such as counting people in an image. However, it
|
88
|
+
cannot chat with you and can only respond with code. `VisionAgent` can call
|
89
|
+
`VisionAgentCoder` to write vision code.
|
90
90
|
|
91
91
|
#### Basic Usage
|
92
|
-
To run the streamlit app locally to chat with
|
92
|
+
To run the streamlit app locally to chat with `VisionAgent`, you can run the following
|
93
93
|
command:
|
94
94
|
|
95
95
|
```bash
|
@@ -186,7 +186,7 @@ the code and having it update. You just need to add the code as a response from
|
|
186
186
|
assistant:
|
187
187
|
|
188
188
|
```python
|
189
|
-
agent = va.agent.
|
189
|
+
agent = va.agent.VisionAgentCoder(verbosity=2)
|
190
190
|
conv = [
|
191
191
|
{
|
192
192
|
"role": "user",
|
@@ -252,6 +252,10 @@ function. Make sure the documentation is in the same format above with descripti
|
|
252
252
|
`Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
|
253
253
|
[here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
|
254
254
|
|
255
|
+
Can't find the tool you need and want add it to `VisionAgent`? Check out our
|
256
|
+
[vision-agent-tools](https://github.com/landing-ai/vision-agent-tools) repository where
|
257
|
+
we add the source code for all the tools used in `VisionAgent`.
|
258
|
+
|
255
259
|
## Additional Backends
|
256
260
|
### Ollama
|
257
261
|
We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
|
@@ -1,33 +1,33 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
2
|
vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
|
3
|
-
vision_agent/agent/agent.py,sha256=
|
3
|
+
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
6
|
-
vision_agent/agent/vision_agent_coder.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=IEyXT_JPCuWmBHdEnM1Wrsj7hmCe5pKLf0gnZFJTddI,11046
|
6
|
+
vision_agent/agent/vision_agent_coder.py,sha256=DOTmDdGPxcI06Jp6yx4ekRMP0vhiVaK9B9Dl8UyJHeo,34396
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
|
8
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=0GliXFtBf32aPu2ClU63FI5ii5CTxWYsvrsmnnDp-gs,7134
|
9
9
|
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
11
|
-
vision_agent/clients/landing_public_api.py,sha256=
|
11
|
+
vision_agent/clients/landing_public_api.py,sha256=rGtACkr8o5egDuMHQ5MBO4NuvsgPTp9Ew3rbq4R-vs0,1507
|
12
12
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
14
|
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
|
-
vision_agent/lmm/lmm.py,sha256=
|
16
|
-
vision_agent/lmm/types.py,sha256=
|
17
|
-
vision_agent/tools/__init__.py,sha256=
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
15
|
+
vision_agent/lmm/lmm.py,sha256=AYrZNdhghG293wd3aKZ1jK1lUm2NLWwALktbM4wNais,20862
|
16
|
+
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
|
+
vision_agent/tools/__init__.py,sha256=i7JOLxRaLdcY7-vCNOGAeOFMBfiAUIwWhnT32FO97VE,2201
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=Vu9WnKicGhafx9dPzDbQjQdcIzRCYYFPF68o79hDP-8,14616
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=qMsb9d8QtpXGgF9rpPO2dA390BewKdYO68oWKDu-TGg,6504
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
22
|
-
vision_agent/tools/tools_types.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=kbbMToAaHxl42dDEvyz9Mvtpqts0l0hGoC5YQQyozr8,59953
|
22
|
+
vision_agent/tools/tools_types.py,sha256=iLWSirheC87fKQolIhx_O4Jk8Lv7DRiLuE8PJqLGiVQ,2216
|
23
23
|
vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
|
-
vision_agent/utils/execute.py,sha256=
|
26
|
-
vision_agent/utils/image_utils.py,sha256=
|
25
|
+
vision_agent/utils/execute.py,sha256=Ap8Yx80spQq5f2QtKGx1MK03BR45mJKhlp1kfh-rIao,26751
|
26
|
+
vision_agent/utils/image_utils.py,sha256=eNghu_2L8624jEXy8ZZS9OX46Mv0DT9bcvLForujwTs,9848
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.119.dist-info/METADATA,sha256=ag8Cf800dZJtJqJtwEcf4gqf7Qjf-K1JMoeisDI7RWQ,12255
|
32
|
+
vision_agent-0.2.119.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.119.dist-info/RECORD,,
|
File without changes
|
File without changes
|