vision-agent 0.2.117__py3-none-any.whl → 0.2.119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,9 @@
1
1
  import io
2
2
  import json
3
3
  import logging
4
+ import os
4
5
  import tempfile
6
+ import urllib.request
5
7
  from importlib import resources
6
8
  from pathlib import Path
7
9
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -760,10 +762,10 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s
760
762
  return answer[task] # type: ignore
761
763
 
762
764
 
763
- def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
764
- """'florencev2_object_detection' is a tool that can detect and count multiple
765
- objects given a text prompt such as category names or referring expressions. You
766
- can optionally separate the categories in the text with commas. It returns a list
765
+ def florence2_phrase_grounding(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
766
+ """'florence2_phrase_grounding' is a tool that can detect multiple
767
+ objects given a text prompt which can be object names or caption. You
768
+ can optionally separate the object names in the text with commas. It returns a list
767
769
  of bounding boxes with normalized coordinates, label names and associated
768
770
  probability scores of 1.0.
769
771
 
@@ -780,7 +782,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
780
782
 
781
783
  Example
782
784
  -------
783
- >>> florence2_object_detection('person looking at a coyote', image)
785
+ >>> florence2_phrase_grounding('person looking at a coyote', image)
784
786
  [
785
787
  {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
786
788
  {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
@@ -792,7 +794,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
792
794
  "image": image_b64,
793
795
  "task": "<CAPTION_TO_PHRASE_GROUNDING>",
794
796
  "prompt": prompt,
795
- "function_name": "florence2_object_detection",
797
+ "function_name": "florence2_phrase_grounding",
796
798
  }
797
799
 
798
800
  detections = send_inference_request(data, "florence2", v2=True)
@@ -1220,6 +1222,13 @@ def extract_frames(
1220
1222
  video_file_path = video.download(output_path=temp_dir)
1221
1223
 
1222
1224
  return extract_frames_from_video(video_file_path, fps)
1225
+ elif str(video_uri).startswith(("http", "https")):
1226
+ _, image_suffix = os.path.splitext(video_uri)
1227
+ with tempfile.NamedTemporaryFile(delete=False, suffix=image_suffix) as tmp_file:
1228
+ # Download the video and save it to the temporary file
1229
+ with urllib.request.urlopen(str(video_uri)) as response:
1230
+ tmp_file.write(response.read())
1231
+ return extract_frames_from_video(tmp_file.name, fps)
1223
1232
 
1224
1233
  return extract_frames_from_video(str(video_uri), fps)
1225
1234
 
@@ -1250,10 +1259,10 @@ def save_json(data: Any, file_path: str) -> None:
1250
1259
 
1251
1260
 
1252
1261
  def load_image(image_path: str) -> np.ndarray:
1253
- """'load_image' is a utility function that loads an image from the given file path string.
1262
+ """'load_image' is a utility function that loads an image from the given file path string or an URL.
1254
1263
 
1255
1264
  Parameters:
1256
- image_path (str): The path to the image.
1265
+ image_path (str): The path or URL to the image.
1257
1266
 
1258
1267
  Returns:
1259
1268
  np.ndarray: The image as a NumPy array.
@@ -1265,6 +1274,13 @@ def load_image(image_path: str) -> np.ndarray:
1265
1274
  # NOTE: sometimes the generated code pass in a NumPy array
1266
1275
  if isinstance(image_path, np.ndarray):
1267
1276
  return image_path
1277
+ if image_path.startswith(("http", "https")):
1278
+ _, image_suffix = os.path.splitext(image_path)
1279
+ with tempfile.NamedTemporaryFile(delete=False, suffix=image_suffix) as tmp_file:
1280
+ # Download the image and save it to the temporary file
1281
+ with urllib.request.urlopen(image_path) as response:
1282
+ tmp_file.write(response.read())
1283
+ image_path = tmp_file.name
1268
1284
  image = Image.open(image_path).convert("RGB")
1269
1285
  return np.array(image)
1270
1286
 
@@ -1316,7 +1332,7 @@ def save_video(
1316
1332
  video.write_videofile(f.name, codec="libx264")
1317
1333
  f.close()
1318
1334
  _save_video_to_result(f.name)
1319
- return f.name
1335
+ return f.name
1320
1336
 
1321
1337
 
1322
1338
  def _save_video_to_result(video_uri: str) -> None:
@@ -1418,6 +1434,7 @@ def overlay_segmentation_masks(
1418
1434
  medias: Union[np.ndarray, List[np.ndarray]],
1419
1435
  masks: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
1420
1436
  draw_label: bool = True,
1437
+ secondary_label_key: str = "tracking_label",
1421
1438
  ) -> Union[np.ndarray, List[np.ndarray]]:
1422
1439
  """'overlay_segmentation_masks' is a utility function that displays segmentation
1423
1440
  masks.
@@ -1426,7 +1443,10 @@ def overlay_segmentation_masks(
1426
1443
  medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
1427
1444
  the masks on.
1428
1445
  masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
1429
- dictionaries containing the masks.
1446
+ dictionaries containing the masks, labels and scores.
1447
+ draw_label (bool, optional): If True, the labels will be displayed on the image.
1448
+ secondary_label_key (str, optional): The key to use for the secondary
1449
+ tracking label which is needed in videos to display tracking information.
1430
1450
 
1431
1451
  Returns:
1432
1452
  np.ndarray: The image with the masks displayed.
@@ -1471,6 +1491,7 @@ def overlay_segmentation_masks(
1471
1491
  for elt in masks_int[i]:
1472
1492
  mask = elt["mask"]
1473
1493
  label = elt["label"]
1494
+ tracking_lbl = elt.get(secondary_label_key, None)
1474
1495
  np_mask = np.zeros((pil_image.size[1], pil_image.size[0], 4))
1475
1496
  np_mask[mask > 0, :] = color[label] + (255 * 0.5,)
1476
1497
  mask_img = Image.fromarray(np_mask.astype(np.uint8))
@@ -1478,16 +1499,17 @@ def overlay_segmentation_masks(
1478
1499
 
1479
1500
  if draw_label:
1480
1501
  draw = ImageDraw.Draw(pil_image)
1481
- text_box = draw.textbbox((0, 0), text=label, font=font)
1502
+ text = tracking_lbl if tracking_lbl else label
1503
+ text_box = draw.textbbox((0, 0), text=text, font=font)
1482
1504
  x, y = _get_text_coords_from_mask(
1483
1505
  mask,
1484
1506
  v_gap=(text_box[3] - text_box[1]) + 10,
1485
1507
  h_gap=(text_box[2] - text_box[0]) // 2,
1486
1508
  )
1487
1509
  if x != 0 and y != 0:
1488
- text_box = draw.textbbox((x, y), text=label, font=font)
1510
+ text_box = draw.textbbox((x, y), text=text, font=font)
1489
1511
  draw.rectangle((x, y, text_box[2], text_box[3]), fill=color[label])
1490
- draw.text((x, y), label, fill="black", font=font)
1512
+ draw.text((x, y), text, fill="black", font=font)
1491
1513
  frame_out.append(np.array(pil_image))
1492
1514
  return frame_out[0] if len(frame_out) == 1 else frame_out
1493
1515
 
@@ -1663,7 +1685,7 @@ FUNCTION_TOOLS = [
1663
1685
  florence2_ocr,
1664
1686
  florence2_sam2_image,
1665
1687
  florence2_sam2_video,
1666
- florence2_object_detection,
1688
+ florence2_phrase_grounding,
1667
1689
  ixc25_image_vqa,
1668
1690
  ixc25_video_vqa,
1669
1691
  detr_segmentation,
@@ -1,8 +1,8 @@
1
- from uuid import UUID
2
1
  from enum import Enum
3
- from typing import List, Tuple, Optional
2
+ from typing import List, Optional, Tuple
3
+ from uuid import UUID
4
4
 
5
- from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
5
+ from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
6
6
 
7
7
 
8
8
  class BboxInput(BaseModel):
@@ -5,7 +5,6 @@ import os
5
5
  import platform
6
6
  import re
7
7
  import sys
8
- import tempfile
9
8
  import traceback
10
9
  import warnings
11
10
  from enum import Enum
@@ -40,6 +39,7 @@ from vision_agent.utils.exceptions import (
40
39
  load_dotenv()
41
40
  _LOGGER = logging.getLogger(__name__)
42
41
  _SESSION_TIMEOUT = 600 # 10 minutes
42
+ WORKSPACE = Path(os.getenv("WORKSPACE", ""))
43
43
 
44
44
 
45
45
  class MimeType(str, Enum):
@@ -384,8 +384,15 @@ class Execution(BaseModel):
384
384
  class CodeInterpreter(abc.ABC):
385
385
  """Code interpreter interface."""
386
386
 
387
- def __init__(self, timeout: int, *args: Any, **kwargs: Any) -> None:
387
+ def __init__(
388
+ self,
389
+ timeout: int,
390
+ remote_path: Optional[Union[str, Path]] = None,
391
+ *args: Any,
392
+ **kwargs: Any,
393
+ ) -> None:
388
394
  self.timeout = timeout
395
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
389
396
 
390
397
  def __enter__(self) -> Self:
391
398
  return self
@@ -406,17 +413,21 @@ class CodeInterpreter(abc.ABC):
406
413
  self.restart_kernel()
407
414
  return self.exec_cell(code)
408
415
 
409
- def upload_file(self, file: Union[str, Path]) -> str:
416
+ def upload_file(self, file: Union[str, Path]) -> Path:
410
417
  # Default behavior is a no-op (for local code interpreter)
411
- return str(file)
418
+ return Path(file)
412
419
 
413
- def download_file(self, file_path: str) -> Path:
420
+ def download_file(
421
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
422
+ ) -> Path:
414
423
  # Default behavior is a no-op (for local code interpreter)
415
- return Path(file_path)
424
+ return Path(local_file_path)
416
425
 
417
426
 
418
427
  class E2BCodeInterpreter(CodeInterpreter):
419
- def __init__(self, *args: Any, **kwargs: Any) -> None:
428
+ def __init__(
429
+ self, remote_path: Optional[Union[str, Path]] = None, *args: Any, **kwargs: Any
430
+ ) -> None:
420
431
  super().__init__(*args, **kwargs)
421
432
  assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
422
433
  try:
@@ -443,6 +454,9 @@ print(f"Vision Agent version: {va_version}")"""
443
454
  _LOGGER.info(
444
455
  f"E2BCodeInterpreter (sandbox id: {self.interpreter.sandbox_id}) initialized:\n{sys_versions}"
445
456
  )
457
+ self.remote_path = Path(
458
+ remote_path if remote_path is not None else "/home/user"
459
+ )
446
460
 
447
461
  def close(self, *args: Any, **kwargs: Any) -> None:
448
462
  try:
@@ -516,19 +530,22 @@ print(f"Vision Agent version: {va_version}")"""
516
530
  before_sleep=tenacity.before_sleep_log(_LOGGER, logging.INFO),
517
531
  after=tenacity.after_log(_LOGGER, logging.INFO),
518
532
  )
519
- def upload_file(self, file: Union[str, Path]) -> str:
533
+ def upload_file(self, file: Union[str, Path]) -> Path:
520
534
  file_name = Path(file).name
521
- remote_path = f"/home/user/{file_name}"
522
535
  with open(file, "rb") as f:
523
- self.interpreter.files.write(path=remote_path, data=f)
524
- _LOGGER.info(f"File ({file}) is uploaded to: {remote_path}")
525
- return remote_path
526
-
527
- def download_file(self, file_path: str) -> Path:
528
- with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as file:
529
- file.write(self.interpreter.files.read(path=file_path, format="bytes"))
530
- _LOGGER.info(f"File ({file_path}) is downloaded to: {file.name}")
531
- return Path(file.name)
536
+ self.interpreter.files.write(path=str(self.remote_path / file_name), data=f)
537
+ _LOGGER.info(f"File ({file}) is uploaded to: {str(self.remote_path)}")
538
+ return self.remote_path / file_name
539
+
540
+ def download_file(
541
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
542
+ ) -> Path:
543
+ with open(local_file_path, "w+b") as f:
544
+ f.write(
545
+ self.interpreter.files.read(path=str(remote_file_path), format="bytes")
546
+ )
547
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
548
+ return Path(local_file_path)
532
549
 
533
550
  @staticmethod
534
551
  def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
@@ -540,7 +557,11 @@ print(f"Vision Agent version: {va_version}")"""
540
557
 
541
558
 
542
559
  class LocalCodeInterpreter(CodeInterpreter):
543
- def __init__(self, timeout: int = _SESSION_TIMEOUT) -> None:
560
+ def __init__(
561
+ self,
562
+ timeout: int = _SESSION_TIMEOUT,
563
+ remote_path: Optional[Union[str, Path]] = None,
564
+ ) -> None:
544
565
  super().__init__(timeout=timeout)
545
566
  self.nb = nbformat.v4.new_notebook()
546
567
  self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
@@ -554,6 +575,7 @@ Timeout: {self.timeout}"""
554
575
  )
555
576
  sleep(1)
556
577
  self._new_kernel()
578
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
557
579
 
558
580
  def _new_kernel(self) -> None:
559
581
  if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)(): # type: ignore
@@ -607,6 +629,25 @@ Timeout: {self.timeout}"""
607
629
  traceback_raw = traceback.format_exc().splitlines()
608
630
  return Execution.from_exception(e, traceback_raw)
609
631
 
632
+ def upload_file(self, file_path: Union[str, Path]) -> Path:
633
+ with open(file_path, "rb") as f:
634
+ contents = f.read()
635
+ with open(self.remote_path / Path(file_path).name, "wb") as f:
636
+ f.write(contents)
637
+ _LOGGER.info(f"File ({file_path}) is uploaded to: {str(self.remote_path)}")
638
+
639
+ return Path(self.remote_path / file_path)
640
+
641
+ def download_file(
642
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
643
+ ) -> Path:
644
+ with open(self.remote_path / remote_file_path, "rb") as f:
645
+ contents = f.read()
646
+ with open(local_file_path, "wb") as f:
647
+ f.write(contents)
648
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
649
+ return Path(local_file_path)
650
+
610
651
 
611
652
  class CodeInterpreterFactory:
612
653
  """Factory class for creating code interpreters.
@@ -630,13 +671,19 @@ class CodeInterpreterFactory:
630
671
  return instance
631
672
 
632
673
  @staticmethod
633
- def new_instance(code_sandbox_runtime: Optional[str] = None) -> CodeInterpreter:
674
+ def new_instance(
675
+ code_sandbox_runtime: Optional[str] = None, remote_path: Optional[str] = None
676
+ ) -> CodeInterpreter:
634
677
  if not code_sandbox_runtime:
635
678
  code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
636
679
  if code_sandbox_runtime == "e2b":
637
- instance: CodeInterpreter = E2BCodeInterpreter(timeout=_SESSION_TIMEOUT)
680
+ instance: CodeInterpreter = E2BCodeInterpreter(
681
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
682
+ )
638
683
  elif code_sandbox_runtime == "local":
639
- instance = LocalCodeInterpreter(timeout=_SESSION_TIMEOUT)
684
+ instance = LocalCodeInterpreter(
685
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
686
+ )
640
687
  else:
641
688
  raise ValueError(
642
689
  f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
@@ -70,7 +70,7 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
70
70
  r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
71
71
 
72
72
  Parameters:
73
- mask: The mask in run-length encoded as an array.
73
+ rle: The run-length encoded mask.
74
74
  """
75
75
  size = rle["size"]
76
76
  counts = rle["counts"]
@@ -100,7 +100,7 @@ def frames_to_bytes(
100
100
  """
101
101
  with tempfile.NamedTemporaryFile(delete=True) as temp_file:
102
102
  clip = ImageSequenceClip(frames, fps=fps)
103
- clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
103
+ clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
104
104
  with open(temp_file.name + f".{file_ext}", "rb") as f:
105
105
  buffer_bytes = f.read()
106
106
  return buffer_bytes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.117
3
+ Version: 0.2.119
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -81,15 +81,15 @@ export OPENAI_API_KEY="your-api-key"
81
81
  ```
82
82
 
83
83
  ### Vision Agent
84
- There are two agents that you can use. Vision Agent is a conversational agent that has
84
+ There are two agents that you can use. `VisionAgent` is a conversational agent that has
85
85
  access to tools that allow it to write an navigate python code and file systems. It can
86
- converse with the user in natural language. VisionAgentCoder is an agent that can write
87
- code for vision tasks, such as counting people in an image. However, it cannot converse
88
- and can only respond with code. VisionAgent can call VisionAgentCoder to write vision
89
- code.
86
+ converse with the user in natural language. `VisionAgentCoder` is an agent specifically
87
+ for writing code for vision tasks, such as counting people in an image. However, it
88
+ cannot chat with you and can only respond with code. `VisionAgent` can call
89
+ `VisionAgentCoder` to write vision code.
90
90
 
91
91
  #### Basic Usage
92
- To run the streamlit app locally to chat with Vision Agent, you can run the following
92
+ To run the streamlit app locally to chat with `VisionAgent`, you can run the following
93
93
  command:
94
94
 
95
95
  ```bash
@@ -186,7 +186,7 @@ the code and having it update. You just need to add the code as a response from
186
186
  assistant:
187
187
 
188
188
  ```python
189
- agent = va.agent.VisionAgent(verbosity=2)
189
+ agent = va.agent.VisionAgentCoder(verbosity=2)
190
190
  conv = [
191
191
  {
192
192
  "role": "user",
@@ -252,6 +252,10 @@ function. Make sure the documentation is in the same format above with descripti
252
252
  `Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
253
253
  [here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
254
254
 
255
+ Can't find the tool you need and want add it to `VisionAgent`? Check out our
256
+ [vision-agent-tools](https://github.com/landing-ai/vision-agent-tools) repository where
257
+ we add the source code for all the tools used in `VisionAgent`.
258
+
255
259
  ## Additional Backends
256
260
  ### Ollama
257
261
  We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
@@ -1,33 +1,33 @@
1
1
  vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
3
- vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
3
+ vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
5
- vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
- vision_agent/agent/vision_agent_coder.py,sha256=qRSv_krY6-uHJC8exo3Nw0dPJ81jSzhKw2WTCHw1XVE,33733
5
+ vision_agent/agent/vision_agent.py,sha256=IEyXT_JPCuWmBHdEnM1Wrsj7hmCe5pKLf0gnZFJTddI,11046
6
+ vision_agent/agent/vision_agent_coder.py,sha256=DOTmDdGPxcI06Jp6yx4ekRMP0vhiVaK9B9Dl8UyJHeo,34396
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
8
- vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
8
+ vision_agent/agent/vision_agent_prompts.py,sha256=0GliXFtBf32aPu2ClU63FI5ii5CTxWYsvrsmnnDp-gs,7134
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
- vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
11
+ vision_agent/clients/landing_public_api.py,sha256=rGtACkr8o5egDuMHQ5MBO4NuvsgPTp9Ew3rbq4R-vs0,1507
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=cuXtfFb7kJwVTyHTeK_t1bYItPiNjmDI2gF8vJs4gsM,20231
16
- vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=Y6Y7McmdC8cm6UsJgExBLEPi4StBkqfY4y8_Mp7LlWU,2190
18
- vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
15
+ vision_agent/lmm/lmm.py,sha256=AYrZNdhghG293wd3aKZ1jK1lUm2NLWwALktbM4wNais,20862
16
+ vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
17
+ vision_agent/tools/__init__.py,sha256=i7JOLxRaLdcY7-vCNOGAeOFMBfiAUIwWhnT32FO97VE,2201
18
+ vision_agent/tools/meta_tools.py,sha256=Vu9WnKicGhafx9dPzDbQjQdcIzRCYYFPF68o79hDP-8,14616
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
20
  vision_agent/tools/tool_utils.py,sha256=qMsb9d8QtpXGgF9rpPO2dA390BewKdYO68oWKDu-TGg,6504
21
- vision_agent/tools/tools.py,sha256=JscejDn05jpYW6psPkRDesegPtZJshNWCncGFPOpI7c,58626
22
- vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
21
+ vision_agent/tools/tools.py,sha256=kbbMToAaHxl42dDEvyz9Mvtpqts0l0hGoC5YQQyozr8,59953
22
+ vision_agent/tools/tools_types.py,sha256=iLWSirheC87fKQolIhx_O4Jk8Lv7DRiLuE8PJqLGiVQ,2216
23
23
  vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
- vision_agent/utils/execute.py,sha256=1_pyu16WRlFD81W8Uy_Sv7_jD_qkrkxHdUNeFstBzaA,25082
26
- vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
25
+ vision_agent/utils/execute.py,sha256=Ap8Yx80spQq5f2QtKGx1MK03BR45mJKhlp1kfh-rIao,26751
26
+ vision_agent/utils/image_utils.py,sha256=eNghu_2L8624jEXy8ZZS9OX46Mv0DT9bcvLForujwTs,9848
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.117.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.117.dist-info/METADATA,sha256=GxjyDhGf-9M3VAM5T2A1DHuR4A4KbceN4q-wGDw8Vhw,11997
32
- vision_agent-0.2.117.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.117.dist-info/RECORD,,
30
+ vision_agent-0.2.119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.119.dist-info/METADATA,sha256=ag8Cf800dZJtJqJtwEcf4gqf7Qjf-K1JMoeisDI7RWQ,12255
32
+ vision_agent-0.2.119.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.119.dist-info/RECORD,,