vision-agent 0.2.109__py3-none-any.whl → 0.2.111__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -28,7 +28,7 @@ class DefaultImports:
28
28
  code = [
29
29
  "from typing import *",
30
30
  "from vision_agent.utils.execute import CodeInterpreter",
31
- "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
31
+ "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
32
32
  ]
33
33
 
34
34
  @staticmethod
@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional
4
4
 
5
5
  from requests import Session
6
6
  from requests.adapters import HTTPAdapter
7
- from requests.exceptions import ConnectionError, RequestException, Timeout
8
7
 
9
8
  _LOGGER = logging.getLogger(__name__)
10
9
 
@@ -38,9 +37,22 @@ class BaseHTTP:
38
37
  response.raise_for_status()
39
38
  result: Dict[str, Any] = response.json()
40
39
  _LOGGER.info(json.dumps(result))
41
- except (ConnectionError, Timeout, RequestException) as err:
42
- _LOGGER.warning(f"Error: {err}.")
43
40
  except json.JSONDecodeError:
44
41
  resp_text = response.text
45
42
  _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
43
+ raise
44
+ return result
45
+
46
+ def get(self, url: str) -> Dict[str, Any]:
47
+ formatted_url = f"{self._base_endpoint}/{url}"
48
+ _LOGGER.info(f"Sending data to {formatted_url}")
49
+ try:
50
+ response = self._session.get(url=formatted_url, timeout=self._TIMEOUT)
51
+ response.raise_for_status()
52
+ result: Dict[str, Any] = response.json()
53
+ _LOGGER.info(json.dumps(result))
54
+ except json.JSONDecodeError:
55
+ resp_text = response.text
56
+ _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
57
+ raise
46
58
  return result
@@ -2,9 +2,12 @@ import os
2
2
  from uuid import UUID
3
3
  from typing import List
4
4
 
5
+ from requests.exceptions import HTTPError
6
+
5
7
  from vision_agent.clients.http import BaseHTTP
6
8
  from vision_agent.utils.type_defs import LandingaiAPIKey
7
- from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
9
+ from vision_agent.utils.exceptions import FineTuneModelNotFound
10
+ from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
8
11
 
9
12
 
10
13
  class LandingPublicAPI(BaseHTTP):
@@ -24,3 +27,12 @@ class LandingPublicAPI(BaseHTTP):
24
27
  }
25
28
  response = self.post(url, payload=data)
26
29
  return UUID(response["jobId"])
30
+
31
+ def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
32
+ url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
33
+ try:
34
+ get_job = self.get(url)
35
+ except HTTPError as err:
36
+ if err.response.status_code == 404:
37
+ raise FineTuneModelNotFound()
38
+ return JobStatus(get_job["status"])
@@ -1,12 +1,15 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
- from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
3
+ from .meta_tools import (
4
+ META_TOOL_DOCSTRING,
5
+ )
4
6
  from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
5
7
  from .tools import (
6
8
  TOOL_DESCRIPTIONS,
7
9
  TOOL_DOCSTRING,
8
10
  TOOLS,
9
11
  TOOLS_DF,
12
+ TOOLS_INFO,
10
13
  UTILITIES_DOCSTRING,
11
14
  blip_image_caption,
12
15
  clip,
@@ -52,15 +55,16 @@ def register_tool(imports: Optional[List] = None) -> Callable:
52
55
  def decorator(tool: Callable) -> Callable:
53
56
  import inspect
54
57
 
55
- from .tools import get_tool_descriptions, get_tools_df
58
+ from .tools import get_tool_descriptions, get_tools_df, get_tools_info
56
59
 
57
- global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING
60
+ global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO
58
61
 
59
62
  if tool not in TOOLS:
60
63
  TOOLS.append(tool)
61
64
  TOOLS_DF = get_tools_df(TOOLS) # type: ignore
62
65
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
63
66
  TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
67
+ TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
64
68
 
65
69
  globals()[tool.__name__] = tool
66
70
  if imports is not None:
@@ -1,6 +1,5 @@
1
1
  import os
2
2
  import subprocess
3
- from uuid import UUID
4
3
  from pathlib import Path
5
4
  from typing import Any, Dict, List, Union
6
5
 
@@ -8,9 +7,7 @@ import vision_agent as va
8
7
  from vision_agent.lmm.types import Message
9
8
  from vision_agent.tools.tool_utils import get_tool_documentation
10
9
  from vision_agent.tools.tools import TOOL_DESCRIPTIONS
11
- from vision_agent.utils.image_utils import convert_to_b64
12
- from vision_agent.clients.landing_public_api import LandingPublicAPI
13
- from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
10
+
14
11
 
15
12
  # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
16
13
 
@@ -384,51 +381,11 @@ def edit_file(file_path: str, start: int, end: int, content: str) -> str:
384
381
 
385
382
  def get_tool_descriptions() -> str:
386
383
  """Returns a description of all the tools that `generate_vision_code` has access to.
387
- Helpful for answerings questions about what types of vision tasks you can do with
384
+ Helpful for answering questions about what types of vision tasks you can do with
388
385
  `generate_vision_code`."""
389
386
  return TOOL_DESCRIPTIONS
390
387
 
391
388
 
392
- def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
393
- """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
394
- to detect objects in an image based on a given dataset. It returns the fine
395
- tuning job id.
396
-
397
- Parameters:
398
- bboxes (List[BboxInput]): A list of BboxInput containing the
399
- image path, labels and bounding boxes.
400
- task (PromptTask): The florencev2 fine-tuning task. The options are
401
- CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
402
-
403
- Returns:
404
- UUID: The fine tuning job id, this id will used to retrieve the fine
405
- tuned model.
406
-
407
- Example
408
- -------
409
- >>> fine_tuning_job_id = florencev2_fine_tuning(
410
- [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
411
- {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
412
- "OBJECT_DETECTION"
413
- )
414
- """
415
- bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
416
- task_input = PromptTask[task]
417
- fine_tuning_request = [
418
- BboxInputBase64(
419
- image=convert_to_b64(bbox_input.image_path),
420
- filename=bbox_input.image_path.split("/")[-1],
421
- labels=bbox_input.labels,
422
- bboxes=bbox_input.bboxes,
423
- )
424
- for bbox_input in bboxes_input
425
- ]
426
- landing_api = LandingPublicAPI()
427
- return landing_api.launch_fine_tuning_job(
428
- "florencev2", task_input, fine_tuning_request
429
- )
430
-
431
-
432
389
  META_TOOL_DOCSTRING = get_tool_documentation(
433
390
  [
434
391
  get_tool_descriptions,
@@ -442,6 +399,5 @@ META_TOOL_DOCSTRING = get_tool_documentation(
442
399
  search_dir,
443
400
  search_file,
444
401
  find_file,
445
- florencev2_fine_tuning,
446
402
  ]
447
403
  )
@@ -15,9 +15,10 @@ from vision_agent.utils.execute import Error, MimeType
15
15
  from vision_agent.utils.type_defs import LandingaiAPIKey
16
16
 
17
17
  _LOGGER = logging.getLogger(__name__)
18
- _LND_API_KEY = LandingaiAPIKey().api_key
19
- _LND_API_URL = "https://api.landing.ai/v1/agent/model"
20
- _LND_API_URL_v2 = "https://api.landing.ai/v1/tools"
18
+ _LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
19
+ _LND_BASE_URL = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
20
+ _LND_API_URL = f"{_LND_BASE_URL}/v1/agent/model"
21
+ _LND_API_URL_v2 = f"{_LND_BASE_URL}/v1/tools"
21
22
 
22
23
 
23
24
  class ToolCallTrace(BaseModel):
@@ -28,8 +29,13 @@ class ToolCallTrace(BaseModel):
28
29
 
29
30
 
30
31
  def send_inference_request(
31
- payload: Dict[str, Any], endpoint_name: str, v2: bool = False
32
+ payload: Dict[str, Any],
33
+ endpoint_name: str,
34
+ v2: bool = False,
35
+ metadata_payload: Optional[Dict[str, Any]] = None,
32
36
  ) -> Dict[str, Any]:
37
+ # TODO: runtime_tag and function_name should be metadata_payload and now included
38
+ # in the service payload
33
39
  try:
34
40
  if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
35
41
  payload["runtime_tag"] = runtime_tag
@@ -62,9 +68,13 @@ def send_inference_request(
62
68
  traceback_raw=[],
63
69
  )
64
70
  _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
65
- raise RemoteToolCallFailed(
66
- payload["function_name"], res.status_code, res.text
67
- )
71
+ # TODO: function_name should be in metadata_payload
72
+ function_name = "unknown"
73
+ if "function_name" in payload:
74
+ function_name = payload["function_name"]
75
+ elif metadata_payload is not None and "function_name" in metadata_payload:
76
+ function_name = metadata_payload["function_name"]
77
+ raise RemoteToolCallFailed(function_name, res.status_code, res.text)
68
78
 
69
79
  resp = res.json()
70
80
  tool_call_trace.response = resp
@@ -142,3 +152,16 @@ def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
142
152
  data["doc"].append(doc)
143
153
 
144
154
  return pd.DataFrame(data) # type: ignore
155
+
156
+
157
+ def get_tools_info(funcs: List[Callable[..., Any]]) -> Dict[str, str]:
158
+ data: Dict[str, str] = {}
159
+
160
+ for func in funcs:
161
+ desc = func.__doc__
162
+ if desc is None:
163
+ desc = ""
164
+
165
+ data[func.__name__] = f"{func.__name__}{inspect.signature(func)}:\n{desc}"
166
+
167
+ return data
@@ -2,6 +2,7 @@ import io
2
2
  import json
3
3
  import logging
4
4
  import tempfile
5
+ from uuid import UUID
5
6
  from pathlib import Path
6
7
  from importlib import resources
7
8
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -19,7 +20,9 @@ from vision_agent.tools.tool_utils import (
19
20
  get_tool_descriptions,
20
21
  get_tool_documentation,
21
22
  get_tools_df,
23
+ get_tools_info,
22
24
  )
25
+ from vision_agent.utils.exceptions import FineTuneModelIsNotReady
23
26
  from vision_agent.utils import extract_frames_from_video
24
27
  from vision_agent.utils.execute import FileSerializer, MimeType
25
28
  from vision_agent.utils.image_utils import (
@@ -31,6 +34,15 @@ from vision_agent.utils.image_utils import (
31
34
  convert_quad_box_to_bbox,
32
35
  rle_decode,
33
36
  )
37
+ from vision_agent.tools.tools_types import (
38
+ BboxInput,
39
+ BboxInputBase64,
40
+ PromptTask,
41
+ Florencev2FtRequest,
42
+ FineTuning,
43
+ JobStatus,
44
+ )
45
+ from vision_agent.clients.landing_public_api import LandingPublicAPI
34
46
 
35
47
  register_heif_opener()
36
48
 
@@ -1285,6 +1297,119 @@ def overlay_heat_map(
1285
1297
  return np.array(combined)
1286
1298
 
1287
1299
 
1300
+ # TODO: add this function to the imports so that is picked in the agent
1301
+ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
1302
+ """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
1303
+ to detect objects in an image based on a given dataset. It returns the fine
1304
+ tuning job id.
1305
+
1306
+ Parameters:
1307
+ bboxes (List[BboxInput]): A list of BboxInput containing the
1308
+ image path, labels and bounding boxes.
1309
+ task (PromptTask): The florencev2 fine-tuning task. The options are
1310
+ CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
1311
+
1312
+ Returns:
1313
+ UUID: The fine tuning job id, this id will used to retrieve the fine
1314
+ tuned model.
1315
+
1316
+ Example
1317
+ -------
1318
+ >>> fine_tuning_job_id = florencev2_fine_tuning(
1319
+ [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
1320
+ {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
1321
+ "OBJECT_DETECTION"
1322
+ )
1323
+ """
1324
+ bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
1325
+ task_input = PromptTask[task]
1326
+ fine_tuning_request = [
1327
+ BboxInputBase64(
1328
+ image=convert_to_b64(bbox_input.image_path),
1329
+ filename=bbox_input.image_path.split("/")[-1],
1330
+ labels=bbox_input.labels,
1331
+ bboxes=bbox_input.bboxes,
1332
+ )
1333
+ for bbox_input in bboxes_input
1334
+ ]
1335
+ landing_api = LandingPublicAPI()
1336
+ return landing_api.launch_fine_tuning_job(
1337
+ "florencev2", task_input, fine_tuning_request
1338
+ )
1339
+
1340
+
1341
+ # TODO: add this function to the imports so that is picked in the agent
1342
+ def florencev2_fine_tuned_object_detection(
1343
+ image: np.ndarray, prompt: str, model_id: UUID, task: str
1344
+ ) -> List[Dict[str, Any]]:
1345
+ """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
1346
+ to detect objects given a text prompt such as a phrase or class names separated by
1347
+ commas. It returns a list of detected objects as labels and their location as
1348
+ bounding boxes with score of 1.0.
1349
+
1350
+ Parameters:
1351
+ image (np.ndarray): The image to used to detect objects.
1352
+ prompt (str): The prompt to help find objects in the image.
1353
+ model_id (UUID): The fine-tuned model id.
1354
+ task (PromptTask): The florencev2 fine-tuning task. The options are
1355
+ CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
1356
+
1357
+ Returns:
1358
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
1359
+ bounding box of the detected objects with normalized coordinates between 0
1360
+ and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
1361
+ top-left and xmax and ymax are the coordinates of the bottom-right of the
1362
+ bounding box. The scores are always 1.0 and cannot be thresholded
1363
+
1364
+ Example
1365
+ -------
1366
+ >>> florencev2_fine_tuned_object_detection(
1367
+ image,
1368
+ 'person looking at a coyote',
1369
+ UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
1370
+ )
1371
+ [
1372
+ {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
1373
+ {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
1374
+ ]
1375
+ """
1376
+ # check if job succeeded first
1377
+ landing_api = LandingPublicAPI()
1378
+ status = landing_api.check_fine_tuning_job(model_id)
1379
+ if status is not JobStatus.SUCCEEDED:
1380
+ raise FineTuneModelIsNotReady()
1381
+
1382
+ task = PromptTask[task]
1383
+ if task is PromptTask.OBJECT_DETECTION:
1384
+ prompt = ""
1385
+
1386
+ data_obj = Florencev2FtRequest(
1387
+ image=convert_to_b64(image),
1388
+ task=task,
1389
+ tool="florencev2_fine_tuning",
1390
+ prompt=prompt,
1391
+ fine_tuning=FineTuning(job_id=model_id),
1392
+ )
1393
+ data = data_obj.model_dump(by_alias=True)
1394
+ metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
1395
+ detections = send_inference_request(
1396
+ data, "tools", v2=False, metadata_payload=metadata_payload
1397
+ )
1398
+
1399
+ detections = detections[task.value]
1400
+ return_data = []
1401
+ image_size = image.shape[:2]
1402
+ for i in range(len(detections["bboxes"])):
1403
+ return_data.append(
1404
+ {
1405
+ "score": 1.0,
1406
+ "label": detections["labels"][i],
1407
+ "bbox": normalize_bbox(detections["bboxes"][i], image_size),
1408
+ }
1409
+ )
1410
+ return return_data
1411
+
1412
+
1288
1413
  TOOLS = [
1289
1414
  owl_v2,
1290
1415
  grounding_sam,
@@ -1317,6 +1442,7 @@ TOOLS = [
1317
1442
  TOOLS_DF = get_tools_df(TOOLS) # type: ignore
1318
1443
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
1319
1444
  TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
1445
+ TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
1320
1446
  UTILITIES_DOCSTRING = get_tool_documentation(
1321
1447
  [
1322
1448
  save_json,
@@ -0,0 +1,84 @@
1
+ from uuid import UUID
2
+ from enum import Enum
3
+ from typing import List, Tuple, Optional
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
6
+
7
+
8
+ class BboxInput(BaseModel):
9
+ image_path: str
10
+ labels: List[str]
11
+ bboxes: List[Tuple[int, int, int, int]]
12
+
13
+
14
+ class BboxInputBase64(BaseModel):
15
+ image: str
16
+ filename: str
17
+ labels: List[str]
18
+ bboxes: List[Tuple[int, int, int, int]]
19
+
20
+
21
+ class PromptTask(str, Enum):
22
+ """
23
+ Valid task prompts options for the Florencev2 model.
24
+ """
25
+
26
+ CAPTION = "<CAPTION>"
27
+ """"""
28
+ CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
29
+ """"""
30
+ OBJECT_DETECTION = "<OD>"
31
+ """"""
32
+
33
+
34
+ class FineTuning(BaseModel):
35
+ model_config = ConfigDict(populate_by_name=True)
36
+
37
+ job_id: UUID = Field(alias="jobId")
38
+
39
+ @field_serializer("job_id")
40
+ def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
41
+ return str(job_id)
42
+
43
+
44
+ class Florencev2FtRequest(BaseModel):
45
+ model_config = ConfigDict(populate_by_name=True)
46
+
47
+ image: str
48
+ task: PromptTask
49
+ tool: str
50
+ prompt: Optional[str] = ""
51
+ fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
52
+
53
+
54
+ class JobStatus(str, Enum):
55
+ """The status of a fine-tuning job.
56
+
57
+ CREATED:
58
+ The job has been created and is waiting to be scheduled to run.
59
+ STARTING:
60
+ The job has started running, but not entering the training phase.
61
+ TRAINING:
62
+ The job is training a model.
63
+ EVALUATING:
64
+ The job is evaluating the model and computing metrics.
65
+ PUBLISHING:
66
+ The job is exporting the artifact(s) to an external directory (s3 or local).
67
+ SUCCEEDED:
68
+ The job has finished, including training, evaluation and publishing the
69
+ artifact(s).
70
+ FAILED:
71
+ The job has failed for some reason internally, it can be due to resources
72
+ issues or the code itself.
73
+ STOPPED:
74
+ The job has been stopped by the use locally or in the cloud.
75
+ """
76
+
77
+ CREATED = "CREATED"
78
+ STARTING = "STARTING"
79
+ TRAINING = "TRAINING"
80
+ EVALUATING = "EVALUATING"
81
+ PUBLISHING = "PUBLISHING"
82
+ SUCCEEDED = "SUCCEEDED"
83
+ FAILED = "FAILED"
84
+ STOPPED = "STOPPED"
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
49
49
  """
50
50
 
51
51
  is_retryable = True
52
+
53
+
54
+ class FineTuneModelIsNotReady(Exception):
55
+ """Exception raised when the fine-tune model is not ready.
56
+ If this is raised, it's recommended to wait 5 seconds before trying to use
57
+ the model again.
58
+ """
59
+
60
+
61
+ class FineTuneModelNotFound(Exception):
62
+ """Exception raised when the fine-tune model is not found.
63
+ If this is raised, it's recommended to try another model id.
64
+ """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.109
3
+ Version: 0.2.111
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,32 +2,32 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
3
  vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
4
  vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
5
- vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
5
+ vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
11
- vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
10
+ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
+ vision_agent/clients/landing_public_api.py,sha256=ImMzR6qVvkwgiMMmQRGl91E4xktKjoctun0hWn9PxfE,1507
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
15
  vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
16
  vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=MK0D8NtIChwGHwqsTz3LeV5BGuQecNVrNzUsyaEwuGA,1926
18
- vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
19
- vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
17
+ vision_agent/tools/__init__.py,sha256=ibjymNE7QqtZLgAm3oytYDANNhGLovQsjFqVZZCQWEU,2018
18
+ vision_agent/tools/meta_tools.py,sha256=Bm_sIeorVRW_aWA-htA0G-BbbN4yZm91Tam90s90cnA,13366
20
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
21
- vision_agent/tools/tool_utils.py,sha256=ZhZ9oEcOvRSuWPy-gV0rx3pvaaXzBW-ZC3YQanXrq1g,4733
22
- vision_agent/tools/tools.py,sha256=6sheMaBfuJUxDboN1GA0L4bTeuKoljrGlGNdwXn_dq8,44805
20
+ vision_agent/tools/tool_utils.py,sha256=FU6DCMB3hk9e8p4nAkAv7mHQDIhH8fssyxAYE1bmGK4,5628
21
+ vision_agent/tools/tools.py,sha256=RSKzMcEUNQwcKnQLSH4Go284QgBl3pXqIqmCGMY4shY,49616
22
+ vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
23
23
  vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
24
- vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
24
+ vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
25
  vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
26
26
  vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
27
27
  vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.109.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.109.dist-info/METADATA,sha256=ILl3GV0gpeSUgDP5QlGQP4r12nsbyxovYCxn3EtPkx4,10732
32
- vision_agent-0.2.109.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.109.dist-info/RECORD,,
30
+ vision_agent-0.2.111.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.111.dist-info/METADATA,sha256=Rqo5Hv-b8GnmZloGTELvU5lzbEZAY6cz96KUGKM7WR8,10732
32
+ vision_agent-0.2.111.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.111.dist-info/RECORD,,
@@ -1,30 +0,0 @@
1
- from enum import Enum
2
- from typing import List, Tuple
3
-
4
- from pydantic import BaseModel
5
-
6
-
7
- class BboxInput(BaseModel):
8
- image_path: str
9
- labels: List[str]
10
- bboxes: List[Tuple[int, int, int, int]]
11
-
12
-
13
- class BboxInputBase64(BaseModel):
14
- image: str
15
- filename: str
16
- labels: List[str]
17
- bboxes: List[Tuple[int, int, int, int]]
18
-
19
-
20
- class PromptTask(str, Enum):
21
- """
22
- Valid task prompts options for the Florencev2 model.
23
- """
24
-
25
- CAPTION = "<CAPTION>"
26
- """"""
27
- CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
28
- """"""
29
- OBJECT_DETECTION = "<OD>"
30
- """"""