vision-agent 0.2.117__py3-none-any.whl → 0.2.119__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,9 @@
1
1
  import io
2
2
  import json
3
3
  import logging
4
+ import os
4
5
  import tempfile
6
+ import urllib.request
5
7
  from importlib import resources
6
8
  from pathlib import Path
7
9
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -760,10 +762,10 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s
760
762
  return answer[task] # type: ignore
761
763
 
762
764
 
763
- def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
764
- """'florencev2_object_detection' is a tool that can detect and count multiple
765
- objects given a text prompt such as category names or referring expressions. You
766
- can optionally separate the categories in the text with commas. It returns a list
765
+ def florence2_phrase_grounding(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
766
+ """'florence2_phrase_grounding' is a tool that can detect multiple
767
+ objects given a text prompt which can be object names or caption. You
768
+ can optionally separate the object names in the text with commas. It returns a list
767
769
  of bounding boxes with normalized coordinates, label names and associated
768
770
  probability scores of 1.0.
769
771
 
@@ -780,7 +782,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
780
782
 
781
783
  Example
782
784
  -------
783
- >>> florence2_object_detection('person looking at a coyote', image)
785
+ >>> florence2_phrase_grounding('person looking at a coyote', image)
784
786
  [
785
787
  {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
786
788
  {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
@@ -792,7 +794,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
792
794
  "image": image_b64,
793
795
  "task": "<CAPTION_TO_PHRASE_GROUNDING>",
794
796
  "prompt": prompt,
795
- "function_name": "florence2_object_detection",
797
+ "function_name": "florence2_phrase_grounding",
796
798
  }
797
799
 
798
800
  detections = send_inference_request(data, "florence2", v2=True)
@@ -1220,6 +1222,13 @@ def extract_frames(
1220
1222
  video_file_path = video.download(output_path=temp_dir)
1221
1223
 
1222
1224
  return extract_frames_from_video(video_file_path, fps)
1225
+ elif str(video_uri).startswith(("http", "https")):
1226
+ _, image_suffix = os.path.splitext(video_uri)
1227
+ with tempfile.NamedTemporaryFile(delete=False, suffix=image_suffix) as tmp_file:
1228
+ # Download the video and save it to the temporary file
1229
+ with urllib.request.urlopen(str(video_uri)) as response:
1230
+ tmp_file.write(response.read())
1231
+ return extract_frames_from_video(tmp_file.name, fps)
1223
1232
 
1224
1233
  return extract_frames_from_video(str(video_uri), fps)
1225
1234
 
@@ -1250,10 +1259,10 @@ def save_json(data: Any, file_path: str) -> None:
1250
1259
 
1251
1260
 
1252
1261
  def load_image(image_path: str) -> np.ndarray:
1253
- """'load_image' is a utility function that loads an image from the given file path string.
1262
+ """'load_image' is a utility function that loads an image from the given file path string or an URL.
1254
1263
 
1255
1264
  Parameters:
1256
- image_path (str): The path to the image.
1265
+ image_path (str): The path or URL to the image.
1257
1266
 
1258
1267
  Returns:
1259
1268
  np.ndarray: The image as a NumPy array.
@@ -1265,6 +1274,13 @@ def load_image(image_path: str) -> np.ndarray:
1265
1274
  # NOTE: sometimes the generated code pass in a NumPy array
1266
1275
  if isinstance(image_path, np.ndarray):
1267
1276
  return image_path
1277
+ if image_path.startswith(("http", "https")):
1278
+ _, image_suffix = os.path.splitext(image_path)
1279
+ with tempfile.NamedTemporaryFile(delete=False, suffix=image_suffix) as tmp_file:
1280
+ # Download the image and save it to the temporary file
1281
+ with urllib.request.urlopen(image_path) as response:
1282
+ tmp_file.write(response.read())
1283
+ image_path = tmp_file.name
1268
1284
  image = Image.open(image_path).convert("RGB")
1269
1285
  return np.array(image)
1270
1286
 
@@ -1316,7 +1332,7 @@ def save_video(
1316
1332
  video.write_videofile(f.name, codec="libx264")
1317
1333
  f.close()
1318
1334
  _save_video_to_result(f.name)
1319
- return f.name
1335
+ return f.name
1320
1336
 
1321
1337
 
1322
1338
  def _save_video_to_result(video_uri: str) -> None:
@@ -1418,6 +1434,7 @@ def overlay_segmentation_masks(
1418
1434
  medias: Union[np.ndarray, List[np.ndarray]],
1419
1435
  masks: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
1420
1436
  draw_label: bool = True,
1437
+ secondary_label_key: str = "tracking_label",
1421
1438
  ) -> Union[np.ndarray, List[np.ndarray]]:
1422
1439
  """'overlay_segmentation_masks' is a utility function that displays segmentation
1423
1440
  masks.
@@ -1426,7 +1443,10 @@ def overlay_segmentation_masks(
1426
1443
  medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
1427
1444
  the masks on.
1428
1445
  masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
1429
- dictionaries containing the masks.
1446
+ dictionaries containing the masks, labels and scores.
1447
+ draw_label (bool, optional): If True, the labels will be displayed on the image.
1448
+ secondary_label_key (str, optional): The key to use for the secondary
1449
+ tracking label which is needed in videos to display tracking information.
1430
1450
 
1431
1451
  Returns:
1432
1452
  np.ndarray: The image with the masks displayed.
@@ -1471,6 +1491,7 @@ def overlay_segmentation_masks(
1471
1491
  for elt in masks_int[i]:
1472
1492
  mask = elt["mask"]
1473
1493
  label = elt["label"]
1494
+ tracking_lbl = elt.get(secondary_label_key, None)
1474
1495
  np_mask = np.zeros((pil_image.size[1], pil_image.size[0], 4))
1475
1496
  np_mask[mask > 0, :] = color[label] + (255 * 0.5,)
1476
1497
  mask_img = Image.fromarray(np_mask.astype(np.uint8))
@@ -1478,16 +1499,17 @@ def overlay_segmentation_masks(
1478
1499
 
1479
1500
  if draw_label:
1480
1501
  draw = ImageDraw.Draw(pil_image)
1481
- text_box = draw.textbbox((0, 0), text=label, font=font)
1502
+ text = tracking_lbl if tracking_lbl else label
1503
+ text_box = draw.textbbox((0, 0), text=text, font=font)
1482
1504
  x, y = _get_text_coords_from_mask(
1483
1505
  mask,
1484
1506
  v_gap=(text_box[3] - text_box[1]) + 10,
1485
1507
  h_gap=(text_box[2] - text_box[0]) // 2,
1486
1508
  )
1487
1509
  if x != 0 and y != 0:
1488
- text_box = draw.textbbox((x, y), text=label, font=font)
1510
+ text_box = draw.textbbox((x, y), text=text, font=font)
1489
1511
  draw.rectangle((x, y, text_box[2], text_box[3]), fill=color[label])
1490
- draw.text((x, y), label, fill="black", font=font)
1512
+ draw.text((x, y), text, fill="black", font=font)
1491
1513
  frame_out.append(np.array(pil_image))
1492
1514
  return frame_out[0] if len(frame_out) == 1 else frame_out
1493
1515
 
@@ -1663,7 +1685,7 @@ FUNCTION_TOOLS = [
1663
1685
  florence2_ocr,
1664
1686
  florence2_sam2_image,
1665
1687
  florence2_sam2_video,
1666
- florence2_object_detection,
1688
+ florence2_phrase_grounding,
1667
1689
  ixc25_image_vqa,
1668
1690
  ixc25_video_vqa,
1669
1691
  detr_segmentation,
@@ -1,8 +1,8 @@
1
- from uuid import UUID
2
1
  from enum import Enum
3
- from typing import List, Tuple, Optional
2
+ from typing import List, Optional, Tuple
3
+ from uuid import UUID
4
4
 
5
- from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
5
+ from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
6
6
 
7
7
 
8
8
  class BboxInput(BaseModel):
@@ -5,7 +5,6 @@ import os
5
5
  import platform
6
6
  import re
7
7
  import sys
8
- import tempfile
9
8
  import traceback
10
9
  import warnings
11
10
  from enum import Enum
@@ -40,6 +39,7 @@ from vision_agent.utils.exceptions import (
40
39
  load_dotenv()
41
40
  _LOGGER = logging.getLogger(__name__)
42
41
  _SESSION_TIMEOUT = 600 # 10 minutes
42
+ WORKSPACE = Path(os.getenv("WORKSPACE", ""))
43
43
 
44
44
 
45
45
  class MimeType(str, Enum):
@@ -384,8 +384,15 @@ class Execution(BaseModel):
384
384
  class CodeInterpreter(abc.ABC):
385
385
  """Code interpreter interface."""
386
386
 
387
- def __init__(self, timeout: int, *args: Any, **kwargs: Any) -> None:
387
+ def __init__(
388
+ self,
389
+ timeout: int,
390
+ remote_path: Optional[Union[str, Path]] = None,
391
+ *args: Any,
392
+ **kwargs: Any,
393
+ ) -> None:
388
394
  self.timeout = timeout
395
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
389
396
 
390
397
  def __enter__(self) -> Self:
391
398
  return self
@@ -406,17 +413,21 @@ class CodeInterpreter(abc.ABC):
406
413
  self.restart_kernel()
407
414
  return self.exec_cell(code)
408
415
 
409
- def upload_file(self, file: Union[str, Path]) -> str:
416
+ def upload_file(self, file: Union[str, Path]) -> Path:
410
417
  # Default behavior is a no-op (for local code interpreter)
411
- return str(file)
418
+ return Path(file)
412
419
 
413
- def download_file(self, file_path: str) -> Path:
420
+ def download_file(
421
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
422
+ ) -> Path:
414
423
  # Default behavior is a no-op (for local code interpreter)
415
- return Path(file_path)
424
+ return Path(local_file_path)
416
425
 
417
426
 
418
427
  class E2BCodeInterpreter(CodeInterpreter):
419
- def __init__(self, *args: Any, **kwargs: Any) -> None:
428
+ def __init__(
429
+ self, remote_path: Optional[Union[str, Path]] = None, *args: Any, **kwargs: Any
430
+ ) -> None:
420
431
  super().__init__(*args, **kwargs)
421
432
  assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
422
433
  try:
@@ -443,6 +454,9 @@ print(f"Vision Agent version: {va_version}")"""
443
454
  _LOGGER.info(
444
455
  f"E2BCodeInterpreter (sandbox id: {self.interpreter.sandbox_id}) initialized:\n{sys_versions}"
445
456
  )
457
+ self.remote_path = Path(
458
+ remote_path if remote_path is not None else "/home/user"
459
+ )
446
460
 
447
461
  def close(self, *args: Any, **kwargs: Any) -> None:
448
462
  try:
@@ -516,19 +530,22 @@ print(f"Vision Agent version: {va_version}")"""
516
530
  before_sleep=tenacity.before_sleep_log(_LOGGER, logging.INFO),
517
531
  after=tenacity.after_log(_LOGGER, logging.INFO),
518
532
  )
519
- def upload_file(self, file: Union[str, Path]) -> str:
533
+ def upload_file(self, file: Union[str, Path]) -> Path:
520
534
  file_name = Path(file).name
521
- remote_path = f"/home/user/{file_name}"
522
535
  with open(file, "rb") as f:
523
- self.interpreter.files.write(path=remote_path, data=f)
524
- _LOGGER.info(f"File ({file}) is uploaded to: {remote_path}")
525
- return remote_path
526
-
527
- def download_file(self, file_path: str) -> Path:
528
- with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as file:
529
- file.write(self.interpreter.files.read(path=file_path, format="bytes"))
530
- _LOGGER.info(f"File ({file_path}) is downloaded to: {file.name}")
531
- return Path(file.name)
536
+ self.interpreter.files.write(path=str(self.remote_path / file_name), data=f)
537
+ _LOGGER.info(f"File ({file}) is uploaded to: {str(self.remote_path)}")
538
+ return self.remote_path / file_name
539
+
540
+ def download_file(
541
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
542
+ ) -> Path:
543
+ with open(local_file_path, "w+b") as f:
544
+ f.write(
545
+ self.interpreter.files.read(path=str(remote_file_path), format="bytes")
546
+ )
547
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
548
+ return Path(local_file_path)
532
549
 
533
550
  @staticmethod
534
551
  def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
@@ -540,7 +557,11 @@ print(f"Vision Agent version: {va_version}")"""
540
557
 
541
558
 
542
559
  class LocalCodeInterpreter(CodeInterpreter):
543
- def __init__(self, timeout: int = _SESSION_TIMEOUT) -> None:
560
+ def __init__(
561
+ self,
562
+ timeout: int = _SESSION_TIMEOUT,
563
+ remote_path: Optional[Union[str, Path]] = None,
564
+ ) -> None:
544
565
  super().__init__(timeout=timeout)
545
566
  self.nb = nbformat.v4.new_notebook()
546
567
  self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
@@ -554,6 +575,7 @@ Timeout: {self.timeout}"""
554
575
  )
555
576
  sleep(1)
556
577
  self._new_kernel()
578
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
557
579
 
558
580
  def _new_kernel(self) -> None:
559
581
  if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)(): # type: ignore
@@ -607,6 +629,25 @@ Timeout: {self.timeout}"""
607
629
  traceback_raw = traceback.format_exc().splitlines()
608
630
  return Execution.from_exception(e, traceback_raw)
609
631
 
632
+ def upload_file(self, file_path: Union[str, Path]) -> Path:
633
+ with open(file_path, "rb") as f:
634
+ contents = f.read()
635
+ with open(self.remote_path / Path(file_path).name, "wb") as f:
636
+ f.write(contents)
637
+ _LOGGER.info(f"File ({file_path}) is uploaded to: {str(self.remote_path)}")
638
+
639
+ return Path(self.remote_path / file_path)
640
+
641
+ def download_file(
642
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
643
+ ) -> Path:
644
+ with open(self.remote_path / remote_file_path, "rb") as f:
645
+ contents = f.read()
646
+ with open(local_file_path, "wb") as f:
647
+ f.write(contents)
648
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
649
+ return Path(local_file_path)
650
+
610
651
 
611
652
  class CodeInterpreterFactory:
612
653
  """Factory class for creating code interpreters.
@@ -630,13 +671,19 @@ class CodeInterpreterFactory:
630
671
  return instance
631
672
 
632
673
  @staticmethod
633
- def new_instance(code_sandbox_runtime: Optional[str] = None) -> CodeInterpreter:
674
+ def new_instance(
675
+ code_sandbox_runtime: Optional[str] = None, remote_path: Optional[str] = None
676
+ ) -> CodeInterpreter:
634
677
  if not code_sandbox_runtime:
635
678
  code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
636
679
  if code_sandbox_runtime == "e2b":
637
- instance: CodeInterpreter = E2BCodeInterpreter(timeout=_SESSION_TIMEOUT)
680
+ instance: CodeInterpreter = E2BCodeInterpreter(
681
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
682
+ )
638
683
  elif code_sandbox_runtime == "local":
639
- instance = LocalCodeInterpreter(timeout=_SESSION_TIMEOUT)
684
+ instance = LocalCodeInterpreter(
685
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
686
+ )
640
687
  else:
641
688
  raise ValueError(
642
689
  f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
@@ -70,7 +70,7 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
70
70
  r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
71
71
 
72
72
  Parameters:
73
- mask: The mask in run-length encoded as an array.
73
+ rle: The run-length encoded mask.
74
74
  """
75
75
  size = rle["size"]
76
76
  counts = rle["counts"]
@@ -100,7 +100,7 @@ def frames_to_bytes(
100
100
  """
101
101
  with tempfile.NamedTemporaryFile(delete=True) as temp_file:
102
102
  clip = ImageSequenceClip(frames, fps=fps)
103
- clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
103
+ clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
104
104
  with open(temp_file.name + f".{file_ext}", "rb") as f:
105
105
  buffer_bytes = f.read()
106
106
  return buffer_bytes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.117
3
+ Version: 0.2.119
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -81,15 +81,15 @@ export OPENAI_API_KEY="your-api-key"
81
81
  ```
82
82
 
83
83
  ### Vision Agent
84
- There are two agents that you can use. Vision Agent is a conversational agent that has
84
+ There are two agents that you can use. `VisionAgent` is a conversational agent that has
85
85
  access to tools that allow it to write an navigate python code and file systems. It can
86
- converse with the user in natural language. VisionAgentCoder is an agent that can write
87
- code for vision tasks, such as counting people in an image. However, it cannot converse
88
- and can only respond with code. VisionAgent can call VisionAgentCoder to write vision
89
- code.
86
+ converse with the user in natural language. `VisionAgentCoder` is an agent specifically
87
+ for writing code for vision tasks, such as counting people in an image. However, it
88
+ cannot chat with you and can only respond with code. `VisionAgent` can call
89
+ `VisionAgentCoder` to write vision code.
90
90
 
91
91
  #### Basic Usage
92
- To run the streamlit app locally to chat with Vision Agent, you can run the following
92
+ To run the streamlit app locally to chat with `VisionAgent`, you can run the following
93
93
  command:
94
94
 
95
95
  ```bash
@@ -186,7 +186,7 @@ the code and having it update. You just need to add the code as a response from
186
186
  assistant:
187
187
 
188
188
  ```python
189
- agent = va.agent.VisionAgent(verbosity=2)
189
+ agent = va.agent.VisionAgentCoder(verbosity=2)
190
190
  conv = [
191
191
  {
192
192
  "role": "user",
@@ -252,6 +252,10 @@ function. Make sure the documentation is in the same format above with descripti
252
252
  `Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
253
253
  [here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
254
254
 
255
+ Can't find the tool you need and want add it to `VisionAgent`? Check out our
256
+ [vision-agent-tools](https://github.com/landing-ai/vision-agent-tools) repository where
257
+ we add the source code for all the tools used in `VisionAgent`.
258
+
255
259
  ## Additional Backends
256
260
  ### Ollama
257
261
  We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
@@ -1,33 +1,33 @@
1
1
  vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
3
- vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
3
+ vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
5
- vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
- vision_agent/agent/vision_agent_coder.py,sha256=qRSv_krY6-uHJC8exo3Nw0dPJ81jSzhKw2WTCHw1XVE,33733
5
+ vision_agent/agent/vision_agent.py,sha256=IEyXT_JPCuWmBHdEnM1Wrsj7hmCe5pKLf0gnZFJTddI,11046
6
+ vision_agent/agent/vision_agent_coder.py,sha256=DOTmDdGPxcI06Jp6yx4ekRMP0vhiVaK9B9Dl8UyJHeo,34396
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
8
- vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
8
+ vision_agent/agent/vision_agent_prompts.py,sha256=0GliXFtBf32aPu2ClU63FI5ii5CTxWYsvrsmnnDp-gs,7134
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
- vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
11
+ vision_agent/clients/landing_public_api.py,sha256=rGtACkr8o5egDuMHQ5MBO4NuvsgPTp9Ew3rbq4R-vs0,1507
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=cuXtfFb7kJwVTyHTeK_t1bYItPiNjmDI2gF8vJs4gsM,20231
16
- vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=Y6Y7McmdC8cm6UsJgExBLEPi4StBkqfY4y8_Mp7LlWU,2190
18
- vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
15
+ vision_agent/lmm/lmm.py,sha256=AYrZNdhghG293wd3aKZ1jK1lUm2NLWwALktbM4wNais,20862
16
+ vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
17
+ vision_agent/tools/__init__.py,sha256=i7JOLxRaLdcY7-vCNOGAeOFMBfiAUIwWhnT32FO97VE,2201
18
+ vision_agent/tools/meta_tools.py,sha256=Vu9WnKicGhafx9dPzDbQjQdcIzRCYYFPF68o79hDP-8,14616
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
20
  vision_agent/tools/tool_utils.py,sha256=qMsb9d8QtpXGgF9rpPO2dA390BewKdYO68oWKDu-TGg,6504
21
- vision_agent/tools/tools.py,sha256=JscejDn05jpYW6psPkRDesegPtZJshNWCncGFPOpI7c,58626
22
- vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
21
+ vision_agent/tools/tools.py,sha256=kbbMToAaHxl42dDEvyz9Mvtpqts0l0hGoC5YQQyozr8,59953
22
+ vision_agent/tools/tools_types.py,sha256=iLWSirheC87fKQolIhx_O4Jk8Lv7DRiLuE8PJqLGiVQ,2216
23
23
  vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
- vision_agent/utils/execute.py,sha256=1_pyu16WRlFD81W8Uy_Sv7_jD_qkrkxHdUNeFstBzaA,25082
26
- vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
25
+ vision_agent/utils/execute.py,sha256=Ap8Yx80spQq5f2QtKGx1MK03BR45mJKhlp1kfh-rIao,26751
26
+ vision_agent/utils/image_utils.py,sha256=eNghu_2L8624jEXy8ZZS9OX46Mv0DT9bcvLForujwTs,9848
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.117.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.117.dist-info/METADATA,sha256=GxjyDhGf-9M3VAM5T2A1DHuR4A4KbceN4q-wGDw8Vhw,11997
32
- vision_agent-0.2.117.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.117.dist-info/RECORD,,
30
+ vision_agent-0.2.119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.119.dist-info/METADATA,sha256=ag8Cf800dZJtJqJtwEcf4gqf7Qjf-K1JMoeisDI7RWQ,12255
32
+ vision_agent-0.2.119.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.119.dist-info/RECORD,,