vision-agent 0.2.109__py3-none-any.whl → 0.2.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ class DefaultImports:
28
28
  code = [
29
29
  "from typing import *",
30
30
  "from vision_agent.utils.execute import CodeInterpreter",
31
- "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
31
+ "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
32
32
  ]
33
33
 
34
34
  @staticmethod
@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional
4
4
 
5
5
  from requests import Session
6
6
  from requests.adapters import HTTPAdapter
7
- from requests.exceptions import ConnectionError, RequestException, Timeout
8
7
 
9
8
  _LOGGER = logging.getLogger(__name__)
10
9
 
@@ -38,9 +37,22 @@ class BaseHTTP:
38
37
  response.raise_for_status()
39
38
  result: Dict[str, Any] = response.json()
40
39
  _LOGGER.info(json.dumps(result))
41
- except (ConnectionError, Timeout, RequestException) as err:
42
- _LOGGER.warning(f"Error: {err}.")
43
40
  except json.JSONDecodeError:
44
41
  resp_text = response.text
45
42
  _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
43
+ raise
44
+ return result
45
+
46
+ def get(self, url: str) -> Dict[str, Any]:
47
+ formatted_url = f"{self._base_endpoint}/{url}"
48
+ _LOGGER.info(f"Sending data to {formatted_url}")
49
+ try:
50
+ response = self._session.get(url=formatted_url, timeout=self._TIMEOUT)
51
+ response.raise_for_status()
52
+ result: Dict[str, Any] = response.json()
53
+ _LOGGER.info(json.dumps(result))
54
+ except json.JSONDecodeError:
55
+ resp_text = response.text
56
+ _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
57
+ raise
46
58
  return result
@@ -2,9 +2,12 @@ import os
2
2
  from uuid import UUID
3
3
  from typing import List
4
4
 
5
+ from requests.exceptions import HTTPError
6
+
5
7
  from vision_agent.clients.http import BaseHTTP
6
8
  from vision_agent.utils.type_defs import LandingaiAPIKey
7
- from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
9
+ from vision_agent.utils.exceptions import FineTuneModelNotFound
10
+ from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
8
11
 
9
12
 
10
13
  class LandingPublicAPI(BaseHTTP):
@@ -24,3 +27,12 @@ class LandingPublicAPI(BaseHTTP):
24
27
  }
25
28
  response = self.post(url, payload=data)
26
29
  return UUID(response["jobId"])
30
+
31
+ def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
32
+ url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
33
+ try:
34
+ get_job = self.get(url)
35
+ except HTTPError as err:
36
+ if err.response.status_code == 404:
37
+ raise FineTuneModelNotFound()
38
+ return JobStatus(get_job["status"])
@@ -1,12 +1,15 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
- from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
3
+ from .meta_tools import (
4
+ META_TOOL_DOCSTRING,
5
+ )
4
6
  from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
5
7
  from .tools import (
6
8
  TOOL_DESCRIPTIONS,
7
9
  TOOL_DOCSTRING,
8
10
  TOOLS,
9
11
  TOOLS_DF,
12
+ TOOLS_INFO,
10
13
  UTILITIES_DOCSTRING,
11
14
  blip_image_caption,
12
15
  clip,
@@ -52,15 +55,16 @@ def register_tool(imports: Optional[List] = None) -> Callable:
52
55
  def decorator(tool: Callable) -> Callable:
53
56
  import inspect
54
57
 
55
- from .tools import get_tool_descriptions, get_tools_df
58
+ from .tools import get_tool_descriptions, get_tools_df, get_tools_info
56
59
 
57
- global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING
60
+ global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO
58
61
 
59
62
  if tool not in TOOLS:
60
63
  TOOLS.append(tool)
61
64
  TOOLS_DF = get_tools_df(TOOLS) # type: ignore
62
65
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
63
66
  TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
67
+ TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
64
68
 
65
69
  globals()[tool.__name__] = tool
66
70
  if imports is not None:
@@ -1,6 +1,5 @@
1
1
  import os
2
2
  import subprocess
3
- from uuid import UUID
4
3
  from pathlib import Path
5
4
  from typing import Any, Dict, List, Union
6
5
 
@@ -8,9 +7,7 @@ import vision_agent as va
8
7
  from vision_agent.lmm.types import Message
9
8
  from vision_agent.tools.tool_utils import get_tool_documentation
10
9
  from vision_agent.tools.tools import TOOL_DESCRIPTIONS
11
- from vision_agent.utils.image_utils import convert_to_b64
12
- from vision_agent.clients.landing_public_api import LandingPublicAPI
13
- from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
10
+
14
11
 
15
12
  # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
16
13
 
@@ -384,51 +381,11 @@ def edit_file(file_path: str, start: int, end: int, content: str) -> str:
384
381
 
385
382
  def get_tool_descriptions() -> str:
386
383
  """Returns a description of all the tools that `generate_vision_code` has access to.
387
- Helpful for answerings questions about what types of vision tasks you can do with
384
+ Helpful for answering questions about what types of vision tasks you can do with
388
385
  `generate_vision_code`."""
389
386
  return TOOL_DESCRIPTIONS
390
387
 
391
388
 
392
- def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
393
- """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
394
- to detect objects in an image based on a given dataset. It returns the fine
395
- tuning job id.
396
-
397
- Parameters:
398
- bboxes (List[BboxInput]): A list of BboxInput containing the
399
- image path, labels and bounding boxes.
400
- task (PromptTask): The florencev2 fine-tuning task. The options are
401
- CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
402
-
403
- Returns:
404
- UUID: The fine tuning job id, this id will used to retrieve the fine
405
- tuned model.
406
-
407
- Example
408
- -------
409
- >>> fine_tuning_job_id = florencev2_fine_tuning(
410
- [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
411
- {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
412
- "OBJECT_DETECTION"
413
- )
414
- """
415
- bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
416
- task_input = PromptTask[task]
417
- fine_tuning_request = [
418
- BboxInputBase64(
419
- image=convert_to_b64(bbox_input.image_path),
420
- filename=bbox_input.image_path.split("/")[-1],
421
- labels=bbox_input.labels,
422
- bboxes=bbox_input.bboxes,
423
- )
424
- for bbox_input in bboxes_input
425
- ]
426
- landing_api = LandingPublicAPI()
427
- return landing_api.launch_fine_tuning_job(
428
- "florencev2", task_input, fine_tuning_request
429
- )
430
-
431
-
432
389
  META_TOOL_DOCSTRING = get_tool_documentation(
433
390
  [
434
391
  get_tool_descriptions,
@@ -442,6 +399,5 @@ META_TOOL_DOCSTRING = get_tool_documentation(
442
399
  search_dir,
443
400
  search_file,
444
401
  find_file,
445
- florencev2_fine_tuning,
446
402
  ]
447
403
  )
@@ -15,9 +15,10 @@ from vision_agent.utils.execute import Error, MimeType
15
15
  from vision_agent.utils.type_defs import LandingaiAPIKey
16
16
 
17
17
  _LOGGER = logging.getLogger(__name__)
18
- _LND_API_KEY = LandingaiAPIKey().api_key
19
- _LND_API_URL = "https://api.landing.ai/v1/agent/model"
20
- _LND_API_URL_v2 = "https://api.landing.ai/v1/tools"
18
+ _LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
19
+ _LND_BASE_URL = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
20
+ _LND_API_URL = f"{_LND_BASE_URL}/v1/agent/model"
21
+ _LND_API_URL_v2 = f"{_LND_BASE_URL}/v1/tools"
21
22
 
22
23
 
23
24
  class ToolCallTrace(BaseModel):
@@ -28,8 +29,13 @@ class ToolCallTrace(BaseModel):
28
29
 
29
30
 
30
31
  def send_inference_request(
31
- payload: Dict[str, Any], endpoint_name: str, v2: bool = False
32
+ payload: Dict[str, Any],
33
+ endpoint_name: str,
34
+ v2: bool = False,
35
+ metadata_payload: Optional[Dict[str, Any]] = None,
32
36
  ) -> Dict[str, Any]:
37
+ # TODO: runtime_tag and function_name should be metadata_payload and now included
38
+ # in the service payload
33
39
  try:
34
40
  if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
35
41
  payload["runtime_tag"] = runtime_tag
@@ -62,9 +68,13 @@ def send_inference_request(
62
68
  traceback_raw=[],
63
69
  )
64
70
  _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
65
- raise RemoteToolCallFailed(
66
- payload["function_name"], res.status_code, res.text
67
- )
71
+ # TODO: function_name should be in metadata_payload
72
+ function_name = "unknown"
73
+ if "function_name" in payload:
74
+ function_name = payload["function_name"]
75
+ elif metadata_payload is not None and "function_name" in metadata_payload:
76
+ function_name = metadata_payload["function_name"]
77
+ raise RemoteToolCallFailed(function_name, res.status_code, res.text)
68
78
 
69
79
  resp = res.json()
70
80
  tool_call_trace.response = resp
@@ -142,3 +152,16 @@ def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
142
152
  data["doc"].append(doc)
143
153
 
144
154
  return pd.DataFrame(data) # type: ignore
155
+
156
+
157
+ def get_tools_info(funcs: List[Callable[..., Any]]) -> Dict[str, str]:
158
+ data: Dict[str, str] = {}
159
+
160
+ for func in funcs:
161
+ desc = func.__doc__
162
+ if desc is None:
163
+ desc = ""
164
+
165
+ data[func.__name__] = f"{func.__name__}{inspect.signature(func)}:\n{desc}"
166
+
167
+ return data
@@ -2,6 +2,7 @@ import io
2
2
  import json
3
3
  import logging
4
4
  import tempfile
5
+ from uuid import UUID
5
6
  from pathlib import Path
6
7
  from importlib import resources
7
8
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -19,7 +20,9 @@ from vision_agent.tools.tool_utils import (
19
20
  get_tool_descriptions,
20
21
  get_tool_documentation,
21
22
  get_tools_df,
23
+ get_tools_info,
22
24
  )
25
+ from vision_agent.utils.exceptions import FineTuneModelIsNotReady
23
26
  from vision_agent.utils import extract_frames_from_video
24
27
  from vision_agent.utils.execute import FileSerializer, MimeType
25
28
  from vision_agent.utils.image_utils import (
@@ -31,6 +34,15 @@ from vision_agent.utils.image_utils import (
31
34
  convert_quad_box_to_bbox,
32
35
  rle_decode,
33
36
  )
37
+ from vision_agent.tools.tools_types import (
38
+ BboxInput,
39
+ BboxInputBase64,
40
+ PromptTask,
41
+ Florencev2FtRequest,
42
+ FineTuning,
43
+ JobStatus,
44
+ )
45
+ from vision_agent.clients.landing_public_api import LandingPublicAPI
34
46
 
35
47
  register_heif_opener()
36
48
 
@@ -1285,6 +1297,119 @@ def overlay_heat_map(
1285
1297
  return np.array(combined)
1286
1298
 
1287
1299
 
1300
+ # TODO: add this function to the imports so that is picked in the agent
1301
+ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
1302
+ """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
1303
+ to detect objects in an image based on a given dataset. It returns the fine
1304
+ tuning job id.
1305
+
1306
+ Parameters:
1307
+ bboxes (List[BboxInput]): A list of BboxInput containing the
1308
+ image path, labels and bounding boxes.
1309
+ task (PromptTask): The florencev2 fine-tuning task. The options are
1310
+ CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
1311
+
1312
+ Returns:
1313
+ UUID: The fine tuning job id, this id will used to retrieve the fine
1314
+ tuned model.
1315
+
1316
+ Example
1317
+ -------
1318
+ >>> fine_tuning_job_id = florencev2_fine_tuning(
1319
+ [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
1320
+ {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
1321
+ "OBJECT_DETECTION"
1322
+ )
1323
+ """
1324
+ bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
1325
+ task_input = PromptTask[task]
1326
+ fine_tuning_request = [
1327
+ BboxInputBase64(
1328
+ image=convert_to_b64(bbox_input.image_path),
1329
+ filename=bbox_input.image_path.split("/")[-1],
1330
+ labels=bbox_input.labels,
1331
+ bboxes=bbox_input.bboxes,
1332
+ )
1333
+ for bbox_input in bboxes_input
1334
+ ]
1335
+ landing_api = LandingPublicAPI()
1336
+ return landing_api.launch_fine_tuning_job(
1337
+ "florencev2", task_input, fine_tuning_request
1338
+ )
1339
+
1340
+
1341
+ # TODO: add this function to the imports so that is picked in the agent
1342
+ def florencev2_fine_tuned_object_detection(
1343
+ image: np.ndarray, prompt: str, model_id: UUID, task: str
1344
+ ) -> List[Dict[str, Any]]:
1345
+ """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
1346
+ to detect objects given a text prompt such as a phrase or class names separated by
1347
+ commas. It returns a list of detected objects as labels and their location as
1348
+ bounding boxes with score of 1.0.
1349
+
1350
+ Parameters:
1351
+ image (np.ndarray): The image to used to detect objects.
1352
+ prompt (str): The prompt to help find objects in the image.
1353
+ model_id (UUID): The fine-tuned model id.
1354
+ task (PromptTask): The florencev2 fine-tuning task. The options are
1355
+ CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
1356
+
1357
+ Returns:
1358
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
1359
+ bounding box of the detected objects with normalized coordinates between 0
1360
+ and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
1361
+ top-left and xmax and ymax are the coordinates of the bottom-right of the
1362
+ bounding box. The scores are always 1.0 and cannot be thresholded
1363
+
1364
+ Example
1365
+ -------
1366
+ >>> florencev2_fine_tuned_object_detection(
1367
+ image,
1368
+ 'person looking at a coyote',
1369
+ UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
1370
+ )
1371
+ [
1372
+ {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
1373
+ {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
1374
+ ]
1375
+ """
1376
+ # check if job succeeded first
1377
+ landing_api = LandingPublicAPI()
1378
+ status = landing_api.check_fine_tuning_job(model_id)
1379
+ if status is not JobStatus.SUCCEEDED:
1380
+ raise FineTuneModelIsNotReady()
1381
+
1382
+ task = PromptTask[task]
1383
+ if task is PromptTask.OBJECT_DETECTION:
1384
+ prompt = ""
1385
+
1386
+ data_obj = Florencev2FtRequest(
1387
+ image=convert_to_b64(image),
1388
+ task=task,
1389
+ tool="florencev2_fine_tuning",
1390
+ prompt=prompt,
1391
+ fine_tuning=FineTuning(job_id=model_id),
1392
+ )
1393
+ data = data_obj.model_dump(by_alias=True)
1394
+ metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
1395
+ detections = send_inference_request(
1396
+ data, "tools", v2=False, metadata_payload=metadata_payload
1397
+ )
1398
+
1399
+ detections = detections[task.value]
1400
+ return_data = []
1401
+ image_size = image.shape[:2]
1402
+ for i in range(len(detections["bboxes"])):
1403
+ return_data.append(
1404
+ {
1405
+ "score": 1.0,
1406
+ "label": detections["labels"][i],
1407
+ "bbox": normalize_bbox(detections["bboxes"][i], image_size),
1408
+ }
1409
+ )
1410
+ return return_data
1411
+
1412
+
1288
1413
  TOOLS = [
1289
1414
  owl_v2,
1290
1415
  grounding_sam,
@@ -1317,6 +1442,7 @@ TOOLS = [
1317
1442
  TOOLS_DF = get_tools_df(TOOLS) # type: ignore
1318
1443
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
1319
1444
  TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
1445
+ TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
1320
1446
  UTILITIES_DOCSTRING = get_tool_documentation(
1321
1447
  [
1322
1448
  save_json,
@@ -0,0 +1,84 @@
1
+ from uuid import UUID
2
+ from enum import Enum
3
+ from typing import List, Tuple, Optional
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
6
+
7
+
8
+ class BboxInput(BaseModel):
9
+ image_path: str
10
+ labels: List[str]
11
+ bboxes: List[Tuple[int, int, int, int]]
12
+
13
+
14
+ class BboxInputBase64(BaseModel):
15
+ image: str
16
+ filename: str
17
+ labels: List[str]
18
+ bboxes: List[Tuple[int, int, int, int]]
19
+
20
+
21
+ class PromptTask(str, Enum):
22
+ """
23
+ Valid task prompts options for the Florencev2 model.
24
+ """
25
+
26
+ CAPTION = "<CAPTION>"
27
+ """"""
28
+ CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
29
+ """"""
30
+ OBJECT_DETECTION = "<OD>"
31
+ """"""
32
+
33
+
34
+ class FineTuning(BaseModel):
35
+ model_config = ConfigDict(populate_by_name=True)
36
+
37
+ job_id: UUID = Field(alias="jobId")
38
+
39
+ @field_serializer("job_id")
40
+ def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
41
+ return str(job_id)
42
+
43
+
44
+ class Florencev2FtRequest(BaseModel):
45
+ model_config = ConfigDict(populate_by_name=True)
46
+
47
+ image: str
48
+ task: PromptTask
49
+ tool: str
50
+ prompt: Optional[str] = ""
51
+ fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
52
+
53
+
54
+ class JobStatus(str, Enum):
55
+ """The status of a fine-tuning job.
56
+
57
+ CREATED:
58
+ The job has been created and is waiting to be scheduled to run.
59
+ STARTING:
60
+ The job has started running, but not entering the training phase.
61
+ TRAINING:
62
+ The job is training a model.
63
+ EVALUATING:
64
+ The job is evaluating the model and computing metrics.
65
+ PUBLISHING:
66
+ The job is exporting the artifact(s) to an external directory (s3 or local).
67
+ SUCCEEDED:
68
+ The job has finished, including training, evaluation and publishing the
69
+ artifact(s).
70
+ FAILED:
71
+ The job has failed for some reason internally, it can be due to resources
72
+ issues or the code itself.
73
+ STOPPED:
74
+ The job has been stopped by the use locally or in the cloud.
75
+ """
76
+
77
+ CREATED = "CREATED"
78
+ STARTING = "STARTING"
79
+ TRAINING = "TRAINING"
80
+ EVALUATING = "EVALUATING"
81
+ PUBLISHING = "PUBLISHING"
82
+ SUCCEEDED = "SUCCEEDED"
83
+ FAILED = "FAILED"
84
+ STOPPED = "STOPPED"
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
49
49
  """
50
50
 
51
51
  is_retryable = True
52
+
53
+
54
+ class FineTuneModelIsNotReady(Exception):
55
+ """Exception raised when the fine-tune model is not ready.
56
+ If this is raised, it's recommended to wait 5 seconds before trying to use
57
+ the model again.
58
+ """
59
+
60
+
61
+ class FineTuneModelNotFound(Exception):
62
+ """Exception raised when the fine-tune model is not found.
63
+ If this is raised, it's recommended to try another model id.
64
+ """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.109
3
+ Version: 0.2.111
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,32 +2,32 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
3
  vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
4
  vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
5
- vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
5
+ vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
11
- vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
10
+ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
+ vision_agent/clients/landing_public_api.py,sha256=ImMzR6qVvkwgiMMmQRGl91E4xktKjoctun0hWn9PxfE,1507
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
15
  vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
16
  vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=MK0D8NtIChwGHwqsTz3LeV5BGuQecNVrNzUsyaEwuGA,1926
18
- vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
19
- vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
17
+ vision_agent/tools/__init__.py,sha256=ibjymNE7QqtZLgAm3oytYDANNhGLovQsjFqVZZCQWEU,2018
18
+ vision_agent/tools/meta_tools.py,sha256=Bm_sIeorVRW_aWA-htA0G-BbbN4yZm91Tam90s90cnA,13366
20
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
21
- vision_agent/tools/tool_utils.py,sha256=ZhZ9oEcOvRSuWPy-gV0rx3pvaaXzBW-ZC3YQanXrq1g,4733
22
- vision_agent/tools/tools.py,sha256=6sheMaBfuJUxDboN1GA0L4bTeuKoljrGlGNdwXn_dq8,44805
20
+ vision_agent/tools/tool_utils.py,sha256=FU6DCMB3hk9e8p4nAkAv7mHQDIhH8fssyxAYE1bmGK4,5628
21
+ vision_agent/tools/tools.py,sha256=RSKzMcEUNQwcKnQLSH4Go284QgBl3pXqIqmCGMY4shY,49616
22
+ vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
23
23
  vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
24
- vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
24
+ vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
25
  vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
26
26
  vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
27
27
  vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.109.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.109.dist-info/METADATA,sha256=ILl3GV0gpeSUgDP5QlGQP4r12nsbyxovYCxn3EtPkx4,10732
32
- vision_agent-0.2.109.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.109.dist-info/RECORD,,
30
+ vision_agent-0.2.111.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.111.dist-info/METADATA,sha256=Rqo5Hv-b8GnmZloGTELvU5lzbEZAY6cz96KUGKM7WR8,10732
32
+ vision_agent-0.2.111.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.111.dist-info/RECORD,,
@@ -1,30 +0,0 @@
1
- from enum import Enum
2
- from typing import List, Tuple
3
-
4
- from pydantic import BaseModel
5
-
6
-
7
- class BboxInput(BaseModel):
8
- image_path: str
9
- labels: List[str]
10
- bboxes: List[Tuple[int, int, int, int]]
11
-
12
-
13
- class BboxInputBase64(BaseModel):
14
- image: str
15
- filename: str
16
- labels: List[str]
17
- bboxes: List[Tuple[int, int, int, int]]
18
-
19
-
20
- class PromptTask(str, Enum):
21
- """
22
- Valid task prompts options for the Florencev2 model.
23
- """
24
-
25
- CAPTION = "<CAPTION>"
26
- """"""
27
- CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
28
- """"""
29
- OBJECT_DETECTION = "<OD>"
30
- """"""