vision-agent 0.2.123__py3-none-any.whl → 0.2.124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vision_agent/lmm/lmm.py CHANGED
@@ -1,77 +1,36 @@
1
- import base64
2
- import io
3
1
  import json
4
2
  import logging
5
3
  import os
6
4
  from abc import ABC, abstractmethod
7
5
  from pathlib import Path
8
- from typing import Any, Callable, Dict, Iterator, List, Optional, Union, cast
6
+ from typing import Any, Dict, Iterator, List, Optional, Sequence, Union, cast
9
7
 
10
8
  import anthropic
11
9
  import requests
12
10
  from anthropic.types import ImageBlockParam, MessageParam, TextBlockParam
13
11
  from openai import AzureOpenAI, OpenAI
14
- from PIL import Image
15
12
 
16
- import vision_agent.tools as T
17
- from vision_agent.tools.prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
13
+ from vision_agent.utils.image_utils import encode_media
18
14
 
19
15
  from .types import Message
20
16
 
21
17
  _LOGGER = logging.getLogger(__name__)
22
18
 
23
19
 
24
- def encode_image_bytes(image: bytes) -> str:
25
- image = Image.open(io.BytesIO(image)).convert("RGB") # type: ignore
26
- buffer = io.BytesIO()
27
- image.save(buffer, format="PNG") # type: ignore
28
- encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
29
- return encoded_image
30
-
31
-
32
- def encode_media(media: Union[str, Path]) -> str:
33
- if type(media) is str and media.startswith(("http", "https")):
34
- # for mp4 video url, we assume there is a same url but ends with png
35
- # vision-agent-ui will upload this png when uploading the video
36
- if media.endswith((".mp4", "mov")) and media.find("vision-agent-dev.s3") != -1:
37
- return media[:-4] + ".png"
38
- return media
39
- extension = "png"
40
- extension = Path(media).suffix
41
- if extension.lower() not in {
42
- ".jpg",
43
- ".jpeg",
44
- ".png",
45
- ".webp",
46
- ".bmp",
47
- ".mp4",
48
- ".mov",
49
- }:
50
- raise ValueError(f"Unsupported image extension: {extension}")
51
-
52
- image_bytes = b""
53
- if extension.lower() in {".mp4", ".mov"}:
54
- frames = T.extract_frames(media)
55
- image = frames[len(frames) // 2]
56
- buffer = io.BytesIO()
57
- Image.fromarray(image[0]).convert("RGB").save(buffer, format="PNG")
58
- image_bytes = buffer.getvalue()
59
- else:
60
- image_bytes = open(media, "rb").read()
61
- return encode_image_bytes(image_bytes)
62
-
63
-
64
20
  class LMM(ABC):
65
21
  @abstractmethod
66
22
  def generate(
67
- self, prompt: str, media: Optional[List[Union[str, Path]]] = None, **kwargs: Any
23
+ self,
24
+ prompt: str,
25
+ media: Optional[Sequence[Union[str, Path]]] = None,
26
+ **kwargs: Any,
68
27
  ) -> Union[str, Iterator[Optional[str]]]:
69
28
  pass
70
29
 
71
30
  @abstractmethod
72
31
  def chat(
73
32
  self,
74
- chat: List[Message],
33
+ chat: Sequence[Message],
75
34
  **kwargs: Any,
76
35
  ) -> Union[str, Iterator[Optional[str]]]:
77
36
  pass
@@ -79,7 +38,7 @@ class LMM(ABC):
79
38
  @abstractmethod
80
39
  def __call__(
81
40
  self,
82
- input: Union[str, List[Message]],
41
+ input: Union[str, Sequence[Message]],
83
42
  **kwargs: Any,
84
43
  ) -> Union[str, Iterator[Optional[str]]]:
85
44
  pass
@@ -111,7 +70,7 @@ class OpenAILMM(LMM):
111
70
 
112
71
  def __call__(
113
72
  self,
114
- input: Union[str, List[Message]],
73
+ input: Union[str, Sequence[Message]],
115
74
  **kwargs: Any,
116
75
  ) -> Union[str, Iterator[Optional[str]]]:
117
76
  if isinstance(input, str):
@@ -120,13 +79,13 @@ class OpenAILMM(LMM):
120
79
 
121
80
  def chat(
122
81
  self,
123
- chat: List[Message],
82
+ chat: Sequence[Message],
124
83
  **kwargs: Any,
125
84
  ) -> Union[str, Iterator[Optional[str]]]:
126
85
  """Chat with the LMM model.
127
86
 
128
87
  Parameters:
129
- chat (List[Dict[str, str]]): A list of dictionaries containing the chat
88
+ chat (Squence[Dict[str, str]]): A list of dictionaries containing the chat
130
89
  messages. The messages can be in the format:
131
90
  [{"role": "user", "content": "Hello!"}, ...]
132
91
  or if it contains media, it should be in the format:
@@ -147,6 +106,7 @@ class OpenAILMM(LMM):
147
106
  "url": (
148
107
  encoded_media
149
108
  if encoded_media.startswith(("http", "https"))
109
+ or encoded_media.startswith("data:image/")
150
110
  else f"data:image/png;base64,{encoded_media}"
151
111
  ),
152
112
  "detail": "low",
@@ -174,7 +134,7 @@ class OpenAILMM(LMM):
174
134
  def generate(
175
135
  self,
176
136
  prompt: str,
177
- media: Optional[List[Union[str, Path]]] = None,
137
+ media: Optional[Sequence[Union[str, Path]]] = None,
178
138
  **kwargs: Any,
179
139
  ) -> Union[str, Iterator[Optional[str]]]:
180
140
  message: List[Dict[str, Any]] = [
@@ -192,7 +152,12 @@ class OpenAILMM(LMM):
192
152
  {
193
153
  "type": "image_url",
194
154
  "image_url": {
195
- "url": f"data:image/png;base64,{encoded_media}",
155
+ "url": (
156
+ encoded_media
157
+ if encoded_media.startswith(("http", "https"))
158
+ or encoded_media.startswith("data:image/")
159
+ else f"data:image/png;base64,{encoded_media}"
160
+ ),
196
161
  "detail": "low",
197
162
  },
198
163
  },
@@ -214,81 +179,6 @@ class OpenAILMM(LMM):
214
179
  else:
215
180
  return cast(str, response.choices[0].message.content)
216
181
 
217
- def generate_classifier(self, question: str) -> Callable:
218
- api_doc = T.get_tool_documentation([T.clip])
219
- prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
220
- response = self.client.chat.completions.create(
221
- model=self.model_name,
222
- messages=[
223
- {"role": "system", "content": SYSTEM_PROMPT},
224
- {"role": "user", "content": prompt},
225
- ],
226
- response_format={"type": "json_object"},
227
- )
228
-
229
- try:
230
- params = json.loads(cast(str, response.choices[0].message.content))[
231
- "Parameters"
232
- ]
233
- except json.JSONDecodeError:
234
- _LOGGER.error(
235
- f"Failed to decode response: {response.choices[0].message.content}"
236
- )
237
- raise ValueError("Failed to decode response")
238
-
239
- return lambda x: T.clip(x, params["prompt"])
240
-
241
- def generate_detector(self, question: str) -> Callable:
242
- api_doc = T.get_tool_documentation([T.owl_v2])
243
- prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
244
- response = self.client.chat.completions.create(
245
- model=self.model_name,
246
- messages=[
247
- {"role": "system", "content": SYSTEM_PROMPT},
248
- {"role": "user", "content": prompt},
249
- ],
250
- response_format={"type": "json_object"},
251
- )
252
-
253
- try:
254
- params = json.loads(cast(str, response.choices[0].message.content))[
255
- "Parameters"
256
- ]
257
- except json.JSONDecodeError:
258
- _LOGGER.error(
259
- f"Failed to decode response: {response.choices[0].message.content}"
260
- )
261
- raise ValueError("Failed to decode response")
262
-
263
- return lambda x: T.owl_v2(params["prompt"], x)
264
-
265
- def generate_segmentor(self, question: str) -> Callable:
266
- api_doc = T.get_tool_documentation([T.grounding_sam])
267
- prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
268
- response = self.client.chat.completions.create(
269
- model=self.model_name,
270
- messages=[
271
- {"role": "system", "content": SYSTEM_PROMPT},
272
- {"role": "user", "content": prompt},
273
- ],
274
- response_format={"type": "json_object"},
275
- )
276
-
277
- try:
278
- params = json.loads(cast(str, response.choices[0].message.content))[
279
- "Parameters"
280
- ]
281
- except json.JSONDecodeError:
282
- _LOGGER.error(
283
- f"Failed to decode response: {response.choices[0].message.content}"
284
- )
285
- raise ValueError("Failed to decode response")
286
-
287
- return lambda x: T.grounding_sam(params["prompt"], x)
288
-
289
- def generate_image_qa_tool(self, question: str) -> Callable:
290
- return lambda x: T.git_vqa_v2(question, x)
291
-
292
182
 
293
183
  class AzureOpenAILMM(OpenAILMM):
294
184
  def __init__(
@@ -362,7 +252,7 @@ class OllamaLMM(LMM):
362
252
 
363
253
  def __call__(
364
254
  self,
365
- input: Union[str, List[Message]],
255
+ input: Union[str, Sequence[Message]],
366
256
  **kwargs: Any,
367
257
  ) -> Union[str, Iterator[Optional[str]]]:
368
258
  if isinstance(input, str):
@@ -371,13 +261,13 @@ class OllamaLMM(LMM):
371
261
 
372
262
  def chat(
373
263
  self,
374
- chat: List[Message],
264
+ chat: Sequence[Message],
375
265
  **kwargs: Any,
376
266
  ) -> Union[str, Iterator[Optional[str]]]:
377
267
  """Chat with the LMM model.
378
268
 
379
269
  Parameters:
380
- chat (List[Dict[str, str]]): A list of dictionaries containing the chat
270
+ chat (Sequence[Dict[str, str]]): A list of dictionaries containing the chat
381
271
  messages. The messages can be in the format:
382
272
  [{"role": "user", "content": "Hello!"}, ...]
383
273
  or if it contains media, it should be in the format:
@@ -429,7 +319,7 @@ class OllamaLMM(LMM):
429
319
  def generate(
430
320
  self,
431
321
  prompt: str,
432
- media: Optional[List[Union[str, Path]]] = None,
322
+ media: Optional[Sequence[Union[str, Path]]] = None,
433
323
  **kwargs: Any,
434
324
  ) -> Union[str, Iterator[Optional[str]]]:
435
325
  url = f"{self.url}/generate"
@@ -493,7 +383,7 @@ class ClaudeSonnetLMM(LMM):
493
383
 
494
384
  def __call__(
495
385
  self,
496
- input: Union[str, List[Dict[str, Any]]],
386
+ input: Union[str, Sequence[Dict[str, Any]]],
497
387
  **kwargs: Any,
498
388
  ) -> Union[str, Iterator[Optional[str]]]:
499
389
  if isinstance(input, str):
@@ -502,7 +392,7 @@ class ClaudeSonnetLMM(LMM):
502
392
 
503
393
  def chat(
504
394
  self,
505
- chat: List[Dict[str, Any]],
395
+ chat: Sequence[Dict[str, Any]],
506
396
  **kwargs: Any,
507
397
  ) -> Union[str, Iterator[Optional[str]]]:
508
398
  messages: List[MessageParam] = []
@@ -551,7 +441,7 @@ class ClaudeSonnetLMM(LMM):
551
441
  def generate(
552
442
  self,
553
443
  prompt: str,
554
- media: Optional[List[Union[str, Path]]] = None,
444
+ media: Optional[Sequence[Union[str, Path]]] = None,
555
445
  **kwargs: Any,
556
446
  ) -> Union[str, Iterator[Optional[str]]]:
557
447
  content: List[Union[TextBlockParam, ImageBlockParam]] = [
@@ -16,6 +16,8 @@ from .tools import (
16
16
  clip,
17
17
  closest_box_distance,
18
18
  closest_mask_distance,
19
+ countgd_counting,
20
+ countgd_example_based_counting,
19
21
  depth_anything_v2,
20
22
  detr_segmentation,
21
23
  dpt_hybrid_midas,
@@ -30,6 +32,8 @@ from .tools import (
30
32
  generate_soft_edge_image,
31
33
  get_tool_documentation,
32
34
  git_vqa_v2,
35
+ gpt4o_image_vqa,
36
+ gpt4o_video_vqa,
33
37
  grounding_dino,
34
38
  grounding_sam,
35
39
  ixc25_image_vqa,
@@ -37,13 +41,11 @@ from .tools import (
37
41
  load_image,
38
42
  loca_visual_prompt_counting,
39
43
  loca_zero_shot_counting,
40
- countgd_counting,
41
- countgd_example_based_counting,
42
44
  ocr,
43
45
  overlay_bounding_boxes,
46
+ overlay_counting_results,
44
47
  overlay_heat_map,
45
48
  overlay_segmentation_masks,
46
- overlay_counting_results,
47
49
  owl_v2,
48
50
  save_image,
49
51
  save_json,
@@ -1,6 +1,6 @@
1
- import os
2
1
  import inspect
3
2
  import logging
3
+ import os
4
4
  from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple
5
5
 
6
6
  import pandas as pd
@@ -10,10 +10,10 @@ from requests import Session
10
10
  from requests.adapters import HTTPAdapter
11
11
  from urllib3.util.retry import Retry
12
12
 
13
+ from vision_agent.tools.tools_types import BoundingBoxes
13
14
  from vision_agent.utils.exceptions import RemoteToolCallFailed
14
15
  from vision_agent.utils.execute import Error, MimeType
15
16
  from vision_agent.utils.type_defs import LandingaiAPIKey
16
- from vision_agent.tools.tools_types import BoundingBoxes
17
17
 
18
18
  _LOGGER = logging.getLogger(__name__)
19
19
  _LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
@@ -13,26 +13,27 @@ import cv2
13
13
  import numpy as np
14
14
  import requests
15
15
  from moviepy.editor import ImageSequenceClip
16
- from PIL import Image, ImageDraw, ImageFont, ImageEnhance
16
+ from PIL import Image, ImageDraw, ImageEnhance, ImageFont
17
17
  from pillow_heif import register_heif_opener # type: ignore
18
18
  from pytube import YouTube # type: ignore
19
19
 
20
20
  from vision_agent.clients.landing_public_api import LandingPublicAPI
21
+ from vision_agent.lmm.lmm import OpenAILMM
21
22
  from vision_agent.tools.tool_utils import (
23
+ filter_bboxes_by_threshold,
22
24
  get_tool_descriptions,
23
25
  get_tool_documentation,
24
26
  get_tools_df,
25
27
  get_tools_info,
26
28
  send_inference_request,
27
29
  send_task_inference_request,
28
- filter_bboxes_by_threshold,
29
30
  )
30
31
  from vision_agent.tools.tools_types import (
31
32
  FineTuning,
32
33
  Florence2FtRequest,
33
34
  JobStatus,
34
- PromptTask,
35
35
  ODResponseData,
36
+ PromptTask,
36
37
  )
37
38
  from vision_agent.utils import extract_frames_from_video
38
39
  from vision_agent.utils.exceptions import FineTuneModelIsNotReady
@@ -42,6 +43,7 @@ from vision_agent.utils.image_utils import (
42
43
  convert_quad_box_to_bbox,
43
44
  convert_to_b64,
44
45
  denormalize_bbox,
46
+ encode_image_bytes,
45
47
  frames_to_bytes,
46
48
  get_image_size,
47
49
  normalize_bbox,
@@ -691,6 +693,69 @@ def ixc25_video_vqa(prompt: str, frames: List[np.ndarray]) -> str:
691
693
  return cast(str, data["answer"])
692
694
 
693
695
 
696
+ def gpt4o_image_vqa(prompt: str, image: np.ndarray) -> str:
697
+ """'gpt4o_image_vqa' is a tool that can answer any questions about arbitrary images
698
+ including regular images or images of documents or presentations. It returns text
699
+ as an answer to the question.
700
+
701
+ Parameters:
702
+ prompt (str): The question about the image
703
+ image (np.ndarray): The reference image used for the question
704
+
705
+ Returns:
706
+ str: A string which is the answer to the given prompt.
707
+
708
+ Example
709
+ -------
710
+ >>> gpt4o_image_vqa('What is the cat doing?', image)
711
+ 'drinking milk'
712
+ """
713
+
714
+ lmm = OpenAILMM()
715
+ buffer = io.BytesIO()
716
+ Image.fromarray(image).save(buffer, format="PNG")
717
+ image_bytes = buffer.getvalue()
718
+ image_b64 = "data:image/png;base64," + encode_image_bytes(image_bytes)
719
+ resp = lmm.generate(prompt, [image_b64])
720
+ return cast(str, resp)
721
+
722
+
723
+ def gpt4o_video_vqa(prompt: str, frames: List[np.ndarray]) -> str:
724
+ """'gpt4o_video_vqa' is a tool that can answer any questions about arbitrary videos
725
+ including regular videos or videos of documents or presentations. It returns text
726
+ as an answer to the question.
727
+
728
+ Parameters:
729
+ prompt (str): The question about the video
730
+ frames (List[np.ndarray]): The reference frames used for the question
731
+
732
+ Returns:
733
+ str: A string which is the answer to the given prompt.
734
+
735
+ Example
736
+ -------
737
+ >>> gpt4o_video_vqa('Which football player made the goal?', frames)
738
+ 'Lionel Messi'
739
+ """
740
+
741
+ lmm = OpenAILMM()
742
+
743
+ if len(frames) > 10:
744
+ step = len(frames) / 10
745
+ frames = [frames[int(i * step)] for i in range(10)]
746
+
747
+ frames_b64 = []
748
+ for frame in frames:
749
+ buffer = io.BytesIO()
750
+ Image.fromarray(frame).save(buffer, format="PNG")
751
+ image_bytes = buffer.getvalue()
752
+ image_b64 = "data:image/png;base64," + encode_image_bytes(image_bytes)
753
+ frames_b64.append(image_b64)
754
+
755
+ resp = lmm.generate(prompt, frames_b64)
756
+ return cast(str, resp)
757
+
758
+
694
759
  def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
695
760
  """'git_vqa_v2' is a tool that can answer questions about the visual
696
761
  contents of an image given a question and an image. It returns an answer to the
@@ -1,8 +1,8 @@
1
1
  from enum import Enum
2
+ from typing import List, Optional, Tuple, Union
2
3
  from uuid import UUID
3
- from typing import List, Tuple, Optional, Union
4
4
 
5
- from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
5
+ from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
6
6
 
7
7
 
8
8
  class BboxInput(BaseModel):
@@ -13,6 +13,8 @@ from moviepy.editor import ImageSequenceClip
13
13
  from PIL import Image, ImageDraw, ImageFont
14
14
  from PIL.Image import Image as ImageType
15
15
 
16
+ from vision_agent.utils import extract_frames_from_video
17
+
16
18
  COLORS = [
17
19
  (158, 218, 229),
18
20
  (219, 219, 141),
@@ -172,6 +174,51 @@ def convert_to_b64(data: Union[str, Path, np.ndarray, ImageType]) -> str:
172
174
  )
173
175
 
174
176
 
177
+ def encode_image_bytes(image: bytes) -> str:
178
+ image = Image.open(io.BytesIO(image)).convert("RGB") # type: ignore
179
+ buffer = io.BytesIO()
180
+ image.save(buffer, format="PNG") # type: ignore
181
+ encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
182
+ return encoded_image
183
+
184
+
185
+ def encode_media(media: Union[str, Path]) -> str:
186
+ if isinstance(media, str) and media.startswith(("http", "https")):
187
+ # for mp4 video url, we assume there is a same url but ends with png
188
+ # vision-agent-ui will upload this png when uploading the video
189
+ if media.endswith((".mp4", "mov")) and media.find("vision-agent-dev.s3") != -1:
190
+ return media[:-4] + ".png"
191
+ return media
192
+
193
+ # if media is already a base64 encoded image return
194
+ if isinstance(media, str) and media.startswith("data:image/"):
195
+ return media
196
+
197
+ extension = "png"
198
+ extension = Path(media).suffix
199
+ if extension.lower() not in {
200
+ ".jpg",
201
+ ".jpeg",
202
+ ".png",
203
+ ".webp",
204
+ ".bmp",
205
+ ".mp4",
206
+ ".mov",
207
+ }:
208
+ raise ValueError(f"Unsupported image extension: {extension}")
209
+
210
+ image_bytes = b""
211
+ if extension.lower() in {".mp4", ".mov"}:
212
+ frames = extract_frames_from_video(str(media), fps=1)
213
+ image = frames[len(frames) // 2]
214
+ buffer = io.BytesIO()
215
+ Image.fromarray(image[0]).convert("RGB").save(buffer, format="PNG")
216
+ image_bytes = buffer.getvalue()
217
+ else:
218
+ image_bytes = open(media, "rb").read()
219
+ return encode_image_bytes(image_bytes)
220
+
221
+
175
222
  def denormalize_bbox(
176
223
  bbox: List[Union[int, float]], image_size: Tuple[int, ...]
177
224
  ) -> List[float]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.123
3
+ Version: 0.2.124
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -12,22 +12,22 @@ vision_agent/clients/landing_public_api.py,sha256=lU2ev6E8NICmR8DMUljuGcVFy5VNJQ
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=H3a5V7c073-vXRJfQOblE2j_CsZkH1CNNRoQgLjJZuQ,20751
15
+ vision_agent/lmm/lmm.py,sha256=092oefI65_QSRvQm2znXkjTdzlZTh-Ni_38610kfbJg,16836
16
16
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
17
- vision_agent/tools/__init__.py,sha256=TILaqdFYicScvpnCXMxgBsFmSW22NQDIvucvEgo0etw,2289
17
+ vision_agent/tools/__init__.py,sha256=T8Hi5aHf4J2QJDoPRvu5fxbiqMpAY-1Gi2EFIhJbf3A,2331
18
18
  vision_agent/tools/meta_tools.py,sha256=KeGiw2OtY8ARpGbtWjoNAoO1dwevt7LbCupaJX61MkE,18929
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
- vision_agent/tools/tool_utils.py,sha256=e_p-G2nwgWOpoaqpDitY3FJ6fFuTEg5GhDOD67wI2bE,7527
21
- vision_agent/tools/tools.py,sha256=jOBsuN-spY_2TlvpahoRYGvyInhQDTPXXukx9q72lEU,63454
22
- vision_agent/tools/tools_types.py,sha256=qs11HGLRXc9zytahBtG6TQxCh8Gigvn232at3jk54jI,2356
20
+ vision_agent/tools/tool_utils.py,sha256=62NVlojPMf9MuJ-3yJEcrB3mzmOxN2HrNQzzjVa-FZg,7527
21
+ vision_agent/tools/tools.py,sha256=xT-lDC3NCdltK0_CDTOOiU8B2YhlIdzFhuSbvRVFBI8,65545
22
+ vision_agent/tools/tools_types.py,sha256=rLpCUODPY0yI65SLOTJOxfHFfqWM3WjOq-AYX25Chjk,2356
23
23
  vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
25
  vision_agent/utils/execute.py,sha256=gc4R_0BKUrZyhiKvIxOpYuzQPYVWQEqxr3ANy1lJAw4,27037
26
- vision_agent/utils/image_utils.py,sha256=UloC4byIQLM4CSCaH41SBciQ7X2OqKvsVvNOVKqIH_k,9856
26
+ vision_agent/utils/image_utils.py,sha256=lhdvRWMbQmMMLTmJGI1dFjzNeQSLfPYJEsAkq5Ydj3Y,11476
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.123.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.123.dist-info/METADATA,sha256=eoydeqKc5SAqpYMoNHLp_rajkn6zYy91wQTaWtjFv2c,12255
32
- vision_agent-0.2.123.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.123.dist-info/RECORD,,
30
+ vision_agent-0.2.124.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.124.dist-info/METADATA,sha256=mDvhJytcxFZW_B18Vkn4egk4HJ8UHYl6YQhEJHQAbPk,12255
32
+ vision_agent-0.2.124.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.124.dist-info/RECORD,,