vision-agent 0.2.98__py3-none-any.whl → 0.2.100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ class DefaultImports:
28
28
  code = [
29
29
  "from typing import *",
30
30
  "from vision_agent.utils.execute import CodeInterpreter",
31
- "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
31
+ "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
32
32
  ]
33
33
 
34
34
  @staticmethod
File without changes
@@ -0,0 +1,46 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, Optional
4
+
5
+ from requests import Session
6
+ from requests.adapters import HTTPAdapter
7
+ from requests.exceptions import ConnectionError, RequestException, Timeout
8
+
9
+ _LOGGER = logging.getLogger(__name__)
10
+
11
+
12
+ class BaseHTTP:
13
+ _TIMEOUT = 30 # seconds
14
+ _MAX_RETRIES = 3
15
+
16
+ def __init__(
17
+ self, base_endpoint: str, *, headers: Optional[Dict[str, Any]] = None
18
+ ) -> None:
19
+ self._headers = headers
20
+ if headers is None:
21
+ self._headers = {
22
+ "Content-Type": "application/json",
23
+ }
24
+ self._base_endpoint = base_endpoint
25
+ self._session = Session()
26
+ self._session.headers.update(self._headers) # type: ignore
27
+ self._session.mount(
28
+ self._base_endpoint, HTTPAdapter(max_retries=self._MAX_RETRIES)
29
+ )
30
+
31
+ def post(self, url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
32
+ formatted_url = f"{self._base_endpoint}/{url}"
33
+ _LOGGER.info(f"Sending data to {formatted_url}")
34
+ try:
35
+ response = self._session.post(
36
+ url=formatted_url, json=payload, timeout=self._TIMEOUT
37
+ )
38
+ response.raise_for_status()
39
+ result: Dict[str, Any] = response.json()
40
+ _LOGGER.info(json.dumps(result))
41
+ except (ConnectionError, Timeout, RequestException) as err:
42
+ _LOGGER.warning(f"Error: {err}.")
43
+ except json.JSONDecodeError:
44
+ resp_text = response.text
45
+ _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
46
+ return result
@@ -0,0 +1,26 @@
1
+ import os
2
+ from uuid import UUID
3
+ from typing import List
4
+
5
+ from vision_agent.clients.http import BaseHTTP
6
+ from vision_agent.utils.type_defs import LandingaiAPIKey
7
+ from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
8
+
9
+
10
+ class LandingPublicAPI(BaseHTTP):
11
+ def __init__(self) -> None:
12
+ landing_url = os.environ.get("LANDINGAI_URL", "https://api.dev.landing.ai")
13
+ landing_api_key = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
14
+ headers = {"Content-Type": "application/json", "apikey": landing_api_key}
15
+ super().__init__(base_endpoint=landing_url, headers=headers)
16
+
17
+ def launch_fine_tuning_job(
18
+ self, model_name: str, task: PromptTask, bboxes: List[BboxInputBase64]
19
+ ) -> UUID:
20
+ url = "v1/agent/jobs/fine-tuning"
21
+ data = {
22
+ "model": {"name": model_name, "task": task.value},
23
+ "bboxes": [bbox.model_dump(by_alias=True) for bbox in bboxes],
24
+ }
25
+ response = self.post(url, payload=data)
26
+ return UUID(response["jobId"])
@@ -1,6 +1,6 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
- from .meta_tools import META_TOOL_DOCSTRING
3
+ from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
4
4
  from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
5
5
  from .tools import (
6
6
  TOOL_DESCRIPTIONS,
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import subprocess
3
+ from uuid import UUID
3
4
  from pathlib import Path
4
5
  from typing import Any, Dict, List, Union
5
6
 
@@ -7,6 +8,9 @@ import vision_agent as va
7
8
  from vision_agent.lmm.types import Message
8
9
  from vision_agent.tools.tool_utils import get_tool_documentation
9
10
  from vision_agent.tools.tools import TOOL_DESCRIPTIONS
11
+ from vision_agent.utils.image_utils import convert_to_b64
12
+ from vision_agent.clients.landing_public_api import LandingPublicAPI
13
+ from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
10
14
 
11
15
  # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
12
16
 
@@ -385,6 +389,46 @@ def get_tool_descriptions() -> str:
385
389
  return TOOL_DESCRIPTIONS
386
390
 
387
391
 
392
+ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
393
+ """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
394
+ to detect objects in an image based on a given dataset. It returns the fine
395
+ tuning job id.
396
+
397
+ Parameters:
398
+ bboxes (List[BboxInput]): A list of BboxInput containing the
399
+ image path, labels and bounding boxes.
400
+ task (PromptTask): The florencev2 fine-tuning task. The options are
401
+ CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
402
+
403
+ Returns:
404
+ UUID: The fine tuning job id, this id will used to retrieve the fine
405
+ tuned model.
406
+
407
+ Example
408
+ -------
409
+ >>> fine_tuning_job_id = florencev2_fine_tuning(
410
+ [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
411
+ {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
412
+ "OBJECT_DETECTION"
413
+ )
414
+ """
415
+ bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
416
+ task_input = PromptTask[task]
417
+ fine_tuning_request = [
418
+ BboxInputBase64(
419
+ image=convert_to_b64(bbox_input.image_path),
420
+ filename=bbox_input.image_path.split("/")[-1],
421
+ labels=bbox_input.labels,
422
+ bboxes=bbox_input.bboxes,
423
+ )
424
+ for bbox_input in bboxes_input
425
+ ]
426
+ landing_api = LandingPublicAPI()
427
+ return landing_api.launch_fine_tuning_job(
428
+ "florencev2", task_input, fine_tuning_request
429
+ )
430
+
431
+
388
432
  META_TOOL_DOCSTRING = get_tool_documentation(
389
433
  [
390
434
  get_tool_descriptions,
@@ -398,5 +442,6 @@ META_TOOL_DOCSTRING = get_tool_documentation(
398
442
  search_dir,
399
443
  search_file,
400
444
  find_file,
445
+ florencev2_fine_tuning,
401
446
  ]
402
447
  )
@@ -0,0 +1,30 @@
1
+ from enum import Enum
2
+ from typing import List, Tuple
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class BboxInput(BaseModel):
8
+ image_path: str
9
+ labels: List[str]
10
+ bboxes: List[Tuple[int, int, int, int]]
11
+
12
+
13
+ class BboxInputBase64(BaseModel):
14
+ image: str
15
+ filename: str
16
+ labels: List[str]
17
+ bboxes: List[Tuple[int, int, int, int]]
18
+
19
+
20
+ class PromptTask(str, Enum):
21
+ """
22
+ Valid task prompts options for the Florencev2 model.
23
+ """
24
+
25
+ CAPTION = "<CAPTION>"
26
+ """"""
27
+ CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
28
+ """"""
29
+ OBJECT_DETECTION = "<OD>"
30
+ """"""
@@ -16,7 +16,8 @@ from vision_agent.utils.type_defs import LandingaiAPIKey
16
16
 
17
17
  _LOGGER = logging.getLogger(__name__)
18
18
  _LND_API_KEY = LandingaiAPIKey().api_key
19
- _LND_API_URL = "https://api.landing.ai/v1/agent"
19
+ _LND_API_URL = "https://api.landing.ai/v1/agent/model"
20
+ _LND_API_URL_v2 = "https://api.landing.ai/v1/tools"
20
21
 
21
22
 
22
23
  class ToolCallTrace(BaseModel):
@@ -27,13 +28,13 @@ class ToolCallTrace(BaseModel):
27
28
 
28
29
 
29
30
  def send_inference_request(
30
- payload: Dict[str, Any], endpoint_name: str
31
+ payload: Dict[str, Any], endpoint_name: str, v2: bool = False
31
32
  ) -> Dict[str, Any]:
32
33
  try:
33
34
  if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
34
35
  payload["runtime_tag"] = runtime_tag
35
36
 
36
- url = f"{_LND_API_URL}/model/{endpoint_name}"
37
+ url = f"{_LND_API_URL_v2 if v2 else _LND_API_URL}/{endpoint_name}"
37
38
  if "TOOL_ENDPOINT_URL" in os.environ:
38
39
  url = os.environ["TOOL_ENDPOINT_URL"]
39
40
 
@@ -2,23 +2,23 @@ import io
2
2
  import json
3
3
  import logging
4
4
  import tempfile
5
- from importlib import resources
6
5
  from pathlib import Path
6
+ from importlib import resources
7
7
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
8
8
 
9
9
  import cv2
10
- import numpy as np
11
10
  import requests
11
+ import numpy as np
12
+ from pytube import YouTube # type: ignore
12
13
  from moviepy.editor import ImageSequenceClip
13
14
  from PIL import Image, ImageDraw, ImageFont
14
15
  from pillow_heif import register_heif_opener # type: ignore
15
- from pytube import YouTube # type: ignore
16
16
 
17
17
  from vision_agent.tools.tool_utils import (
18
+ send_inference_request,
18
19
  get_tool_descriptions,
19
20
  get_tool_documentation,
20
21
  get_tools_df,
21
- send_inference_request,
22
22
  )
23
23
  from vision_agent.utils import extract_frames_from_video
24
24
  from vision_agent.utils.execute import FileSerializer, MimeType
@@ -126,7 +126,6 @@ def owl_v2(
126
126
  prompt: str,
127
127
  image: np.ndarray,
128
128
  box_threshold: float = 0.10,
129
- iou_threshold: float = 0.10,
130
129
  ) -> List[Dict[str, Any]]:
131
130
  """'owl_v2' is a tool that can detect and count multiple objects given a text
132
131
  prompt such as category names or referring expressions. The categories in text prompt
@@ -138,8 +137,6 @@ def owl_v2(
138
137
  image (np.ndarray): The image to ground the prompt to.
139
138
  box_threshold (float, optional): The threshold for the box detection. Defaults
140
139
  to 0.10.
141
- iou_threshold (float, optional): The threshold for the Intersection over Union
142
- (IoU). Defaults to 0.10.
143
140
 
144
141
  Returns:
145
142
  List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -159,22 +156,22 @@ def owl_v2(
159
156
  image_size = image.shape[:2]
160
157
  image_b64 = convert_to_b64(image)
161
158
  request_data = {
162
- "prompt": prompt,
159
+ "prompts": prompt.split("."),
163
160
  "image": image_b64,
164
- "tool": "open_vocab_detection",
165
- "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
161
+ "confidence": box_threshold,
166
162
  "function_name": "owl_v2",
167
163
  }
168
- data: Dict[str, Any] = send_inference_request(request_data, "tools")
164
+ data: Dict[str, Any] = send_inference_request(request_data, "owlv2", v2=True)
169
165
  return_data = []
170
- for i in range(len(data["bboxes"])):
171
- return_data.append(
172
- {
173
- "score": round(data["scores"][i], 2),
174
- "label": data["labels"][i].strip(),
175
- "bbox": normalize_bbox(data["bboxes"][i], image_size),
176
- }
177
- )
166
+ if data is not None:
167
+ for elt in data:
168
+ return_data.append(
169
+ {
170
+ "bbox": normalize_bbox(elt["bbox"], image_size), # type: ignore
171
+ "label": elt["label"], # type: ignore
172
+ "score": round(elt["score"], 2), # type: ignore
173
+ }
174
+ )
178
175
  return return_data
179
176
 
180
177
 
@@ -367,11 +364,10 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
367
364
  image_b64 = convert_to_b64(image)
368
365
  data = {
369
366
  "image": image_b64,
370
- "tool": "zero_shot_counting",
371
367
  "function_name": "loca_zero_shot_counting",
372
368
  }
373
- resp_data = send_inference_request(data, "tools")
374
- resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
369
+ resp_data = send_inference_request(data, "loca", v2=True)
370
+ resp_data["heat_map"] = np.array(resp_data["heat_map"][0]).astype(np.uint8)
375
371
  return resp_data
376
372
 
377
373
 
@@ -397,17 +393,15 @@ def loca_visual_prompt_counting(
397
393
 
398
394
  image_size = get_image_size(image)
399
395
  bbox = visual_prompt["bbox"]
400
- bbox_str = ", ".join(map(str, denormalize_bbox(bbox, image_size)))
401
396
  image_b64 = convert_to_b64(image)
402
397
 
403
398
  data = {
404
399
  "image": image_b64,
405
- "prompt": bbox_str,
406
- "tool": "few_shot_counting",
400
+ "bbox": list(map(int, denormalize_bbox(bbox, image_size))),
407
401
  "function_name": "loca_visual_prompt_counting",
408
402
  }
409
- resp_data = send_inference_request(data, "tools")
410
- resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
403
+ resp_data = send_inference_request(data, "loca", v2=True)
404
+ resp_data["heat_map"] = np.array(resp_data["heat_map"][0]).astype(np.uint8)
411
405
  return resp_data
412
406
 
413
407
 
@@ -432,13 +426,12 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
432
426
  image_b64 = convert_to_b64(image)
433
427
  data = {
434
428
  "image": image_b64,
435
- "prompt": prompt,
436
- "tool": "image_question_answering_with_context",
429
+ "question": prompt,
437
430
  "function_name": "florencev2_roberta_vqa",
438
431
  }
439
432
 
440
- answer = send_inference_request(data, "tools")
441
- return answer["text"][0] # type: ignore
433
+ answer = send_inference_request(data, "florence2-qa", v2=True)
434
+ return answer # type: ignore
442
435
 
443
436
 
444
437
  def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
@@ -544,17 +537,16 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
544
537
  Example
545
538
  -------
546
539
  >>> vit_nsfw_classification(image)
547
- {"labels": "normal", "scores": 0.68},
540
+ {"label": "normal", "scores": 0.68},
548
541
  """
549
542
 
550
543
  image_b64 = convert_to_b64(image)
551
544
  data = {
552
545
  "image": image_b64,
553
- "tool": "nsfw_image_classification",
554
546
  "function_name": "vit_nsfw_classification",
555
547
  }
556
- resp_data = send_inference_request(data, "tools")
557
- resp_data["scores"] = round(resp_data["scores"], 4)
548
+ resp_data = send_inference_request(data, "nsfw-classification", v2=True)
549
+ resp_data["score"] = round(resp_data["score"], 4)
558
550
  return resp_data
559
551
 
560
552
 
@@ -603,21 +595,21 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
603
595
  'This image contains a cat sitting on a table with a bowl of milk.'
604
596
  """
605
597
  image_b64 = convert_to_b64(image)
598
+ task = "<MORE_DETAILED_CAPTION>" if detail_caption else "<DETAILED_CAPTION>"
606
599
  data = {
607
600
  "image": image_b64,
608
- "tool": "florence2_image_captioning",
609
- "detail_caption": detail_caption,
601
+ "task": task,
610
602
  "function_name": "florencev2_image_caption",
611
603
  }
612
604
 
613
- answer = send_inference_request(data, "tools")
614
- return answer["text"][0] # type: ignore
605
+ answer = send_inference_request(data, "florence2", v2=True)
606
+ return answer[task] # type: ignore
615
607
 
616
608
 
617
- def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
618
- """'florencev2_object_detection' is a tool that can detect common objects in an
619
- image without any text prompt or thresholding. It returns a list of detected objects
620
- as labels and their location as bounding boxes.
609
+ def florencev2_object_detection(image: np.ndarray, prompt: str) -> List[Dict[str, Any]]:
610
+ """'florencev2_object_detection' is a tool that can detect objects given a text
611
+ prompt such as a phrase or class names separated by commas. It returns a list of
612
+ detected objects as labels and their location as bounding boxes with score of 1.0.
621
613
 
622
614
  Parameters:
623
615
  image (np.ndarray): The image to used to detect objects
@@ -631,29 +623,30 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
631
623
 
632
624
  Example
633
625
  -------
634
- >>> florencev2_object_detection(image)
626
+ >>> florencev2_object_detection(image, 'person looking at a coyote')
635
627
  [
636
- {'score': 1.0, 'label': 'window', 'bbox': [0.1, 0.11, 0.35, 0.4]},
637
- {'score': 1.0, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
638
- {'score': 1.0, 'label': 'person', 'bbox': [0.34, 0.21, 0.85, 0.5},
628
+ {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
629
+ {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
639
630
  ]
640
631
  """
641
632
  image_size = image.shape[:2]
642
633
  image_b64 = convert_to_b64(image)
643
634
  data = {
644
635
  "image": image_b64,
645
- "tool": "object_detection",
636
+ "task": "<CAPTION_TO_PHRASE_GROUNDING>",
637
+ "prompt": prompt,
646
638
  "function_name": "florencev2_object_detection",
647
639
  }
648
640
 
649
- answer = send_inference_request(data, "tools")
641
+ detections = send_inference_request(data, "florence2", v2=True)
642
+ detections = detections["<CAPTION_TO_PHRASE_GROUNDING>"]
650
643
  return_data = []
651
- for i in range(len(answer["bboxes"])):
644
+ for i in range(len(detections["bboxes"])):
652
645
  return_data.append(
653
646
  {
654
- "score": round(answer["scores"][i], 2),
655
- "label": answer["labels"][i],
656
- "bbox": normalize_bbox(answer["bboxes"][i], image_size),
647
+ "score": 1.0,
648
+ "label": detections["labels"][i],
649
+ "bbox": normalize_bbox(detections["bboxes"][i], image_size),
657
650
  }
658
651
  )
659
652
  return return_data
@@ -742,13 +735,16 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
742
735
  image_b64 = convert_to_b64(image)
743
736
  data = {
744
737
  "image": image_b64,
745
- "tool": "generate_depth",
746
738
  "function_name": "depth_anything_v2",
747
739
  }
748
740
 
749
- answer = send_inference_request(data, "tools")
750
- return_data = np.array(b64_to_pil(answer["masks"][0]).convert("L"))
751
- return return_data
741
+ depth_map = send_inference_request(data, "depth-anything-v2", v2=True)
742
+ depth_map_np = np.array(depth_map["map"])
743
+ depth_map_np = (depth_map_np - depth_map_np.min()) / (
744
+ depth_map_np.max() - depth_map_np.min()
745
+ )
746
+ depth_map_np = (255 * depth_map_np).astype(np.uint8)
747
+ return depth_map_np
752
748
 
753
749
 
754
750
  def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
@@ -839,12 +835,11 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
839
835
  image_b64 = convert_to_b64(image)
840
836
  data = {
841
837
  "image": image_b64,
842
- "tool": "generate_pose",
843
838
  "function_name": "generate_pose_image",
844
839
  }
845
840
 
846
- answer = send_inference_request(data, "tools")
847
- return_data = np.array(b64_to_pil(answer["masks"][0]).convert("RGB"))
841
+ pos_img = send_inference_request(data, "pose-detector", v2=True)
842
+ return_data = np.array(b64_to_pil(pos_img["data"]).convert("RGB"))
848
843
  return return_data
849
844
 
850
845
 
@@ -1063,7 +1058,6 @@ def save_video(
1063
1058
  if fps <= 0:
1064
1059
  _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
1065
1060
  fps = 4
1066
-
1067
1061
  with ImageSequenceClip(frames, fps=fps) as video:
1068
1062
  if output_video_path:
1069
1063
  f = open(output_video_path, "wb")
@@ -1254,7 +1248,6 @@ TOOLS = [
1254
1248
  loca_visual_prompt_counting,
1255
1249
  florencev2_roberta_vqa,
1256
1250
  florencev2_image_caption,
1257
- florencev2_object_detection,
1258
1251
  detr_segmentation,
1259
1252
  depth_anything_v2,
1260
1253
  generate_soft_edge_image,
@@ -209,7 +209,7 @@ class Result:
209
209
  return formats
210
210
 
211
211
  @staticmethod
212
- def from_e2b_result(result: E2BResult) -> "Result": # type: ignore
212
+ def from_e2b_result(result: E2BResult) -> "Result":
213
213
  """
214
214
  Creates a Result object from an E2BResult object.
215
215
  """
@@ -361,7 +361,7 @@ class Execution(BaseModel):
361
361
  )
362
362
 
363
363
  @staticmethod
364
- def from_e2b_execution(exec: E2BExecution) -> "Execution": # type: ignore
364
+ def from_e2b_execution(exec: E2BExecution) -> "Execution":
365
365
  """Creates an Execution object from an E2BResult object."""
366
366
  return Execution(
367
367
  results=[Result.from_e2b_result(res) for res in exec.results],
@@ -14,7 +14,7 @@ class LandingaiAPIKey(BaseSettings):
14
14
  """
15
15
 
16
16
  api_key: str = Field(
17
- default="land_sk_fnmSzD0ksknSfvhyD8UGu9R4ss3bKfLL1Im5gb6tDQTy2z1Oy5",
17
+ default="land_sk_zKvyPcPV2bVoq7q87KwduoerAxuQpx33DnqP8M1BliOCiZOSoI",
18
18
  alias="LANDINGAI_API_KEY",
19
19
  description="The API key of LandingAI.",
20
20
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.98
3
+ Version: 0.2.100
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,28 +2,32 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
3
  vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
4
  vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
5
- vision_agent/agent/vision_agent.py,sha256=i_rNpc7faqHTifp2c9sQE4Js3qYUKuJeiqauTp90OlE,8417
5
+ vision_agent/agent/vision_agent.py,sha256=U7VqUR-Io0xkGHpcF03Kq87Y0YQIdZQGqxuXdwjQzgk,8441
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=N8oVwfxrz6emHlucJC5hGQvkA9cQWW2sMLFtshwLdI8,30309
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
9
+ vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
11
+ vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
9
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
11
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
12
15
  vision_agent/lmm/lmm.py,sha256=KcS6h-8whGFmwt7t4LNlj0hZ4U-rBojYBLKLmrMsF48,15075
13
16
  vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
14
- vision_agent/tools/__init__.py,sha256=UNiaJAOt1C709gaJ-a9h9BzKnY5JmoEUpgKftsOnyPQ,1882
15
- vision_agent/tools/meta_tools.py,sha256=rmxgVzj-vJKeewHbue3qHru4sYsFLxlSZV-YH-eyH5w,13366
17
+ vision_agent/tools/__init__.py,sha256=e8q4lYD3acyX1ikMKLz4nlaAR_WZpBAIyq2CGYOYnvM,1906
18
+ vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
19
+ vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
16
20
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
17
- vision_agent/tools/tool_utils.py,sha256=XoB-iae8hHrBQgJd3fV6-UjZAkClysobUaOM17IcHuE,4597
18
- vision_agent/tools/tools.py,sha256=fHD4qhn7cGG1O77J_BHfaRfW6LMQuj1OIu9xqYu6AG8,43220
21
+ vision_agent/tools/tool_utils.py,sha256=Zg2aP58UqVRUlEtekWwSwGK5Z5c0eyNrKOvAfEyY4Ik,4694
22
+ vision_agent/tools/tools.py,sha256=jWWioqBNsoNaGa8WKVldKBk_y9ZD1shO52kSE-26MFc,43111
19
23
  vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
20
24
  vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
21
- vision_agent/utils/execute.py,sha256=s43aUtuq7ZNjil2mxrddiz8EvvqlJwttkYlIiZouXqM,25125
25
+ vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
22
26
  vision_agent/utils/image_utils.py,sha256=y69wtNla0xHZ1h1x0-vv7nOyKUq69jtjSJBiDCn6EM0,7703
23
27
  vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
24
- vision_agent/utils/type_defs.py,sha256=oVFJcicB-s_09lqvn61u0A5ncZsTqZArZledXWbrrg0,1384
28
+ vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
25
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
26
- vision_agent-0.2.98.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- vision_agent-0.2.98.dist-info/METADATA,sha256=ANK0JJR0vAu0Tq9W07O6UM4XvUTnoKVqrqwm9gK-DuU,10728
28
- vision_agent-0.2.98.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
29
- vision_agent-0.2.98.dist-info/RECORD,,
30
+ vision_agent-0.2.100.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.100.dist-info/METADATA,sha256=aoWhGb6-cKJpae77m_JsrUP7ljLz1LHVnmYLHSA7-U0,10729
32
+ vision_agent-0.2.100.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.100.dist-info/RECORD,,