vision-agent 0.2.109__py3-none-any.whl → 0.2.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +1 -1
- vision_agent/clients/http.py +15 -3
- vision_agent/clients/landing_public_api.py +13 -1
- vision_agent/tools/__init__.py +7 -3
- vision_agent/tools/meta_tools.py +2 -46
- vision_agent/tools/tool_utils.py +30 -7
- vision_agent/tools/tools.py +126 -0
- vision_agent/tools/tools_types.py +84 -0
- vision_agent/utils/exceptions.py +13 -0
- {vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/METADATA +1 -1
- {vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/RECORD +13 -13
- vision_agent/tools/meta_tools_types.py +0 -30
- {vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/WHEEL +0 -0
@@ -28,7 +28,7 @@ class DefaultImports:
|
|
28
28
|
code = [
|
29
29
|
"from typing import *",
|
30
30
|
"from vision_agent.utils.execute import CodeInterpreter",
|
31
|
-
"from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions
|
31
|
+
"from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
|
32
32
|
]
|
33
33
|
|
34
34
|
@staticmethod
|
vision_agent/clients/http.py
CHANGED
@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional
|
|
4
4
|
|
5
5
|
from requests import Session
|
6
6
|
from requests.adapters import HTTPAdapter
|
7
|
-
from requests.exceptions import ConnectionError, RequestException, Timeout
|
8
7
|
|
9
8
|
_LOGGER = logging.getLogger(__name__)
|
10
9
|
|
@@ -38,9 +37,22 @@ class BaseHTTP:
|
|
38
37
|
response.raise_for_status()
|
39
38
|
result: Dict[str, Any] = response.json()
|
40
39
|
_LOGGER.info(json.dumps(result))
|
41
|
-
except (ConnectionError, Timeout, RequestException) as err:
|
42
|
-
_LOGGER.warning(f"Error: {err}.")
|
43
40
|
except json.JSONDecodeError:
|
44
41
|
resp_text = response.text
|
45
42
|
_LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
|
43
|
+
raise
|
44
|
+
return result
|
45
|
+
|
46
|
+
def get(self, url: str) -> Dict[str, Any]:
|
47
|
+
formatted_url = f"{self._base_endpoint}/{url}"
|
48
|
+
_LOGGER.info(f"Sending data to {formatted_url}")
|
49
|
+
try:
|
50
|
+
response = self._session.get(url=formatted_url, timeout=self._TIMEOUT)
|
51
|
+
response.raise_for_status()
|
52
|
+
result: Dict[str, Any] = response.json()
|
53
|
+
_LOGGER.info(json.dumps(result))
|
54
|
+
except json.JSONDecodeError:
|
55
|
+
resp_text = response.text
|
56
|
+
_LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
|
57
|
+
raise
|
46
58
|
return result
|
@@ -2,9 +2,12 @@ import os
|
|
2
2
|
from uuid import UUID
|
3
3
|
from typing import List
|
4
4
|
|
5
|
+
from requests.exceptions import HTTPError
|
6
|
+
|
5
7
|
from vision_agent.clients.http import BaseHTTP
|
6
8
|
from vision_agent.utils.type_defs import LandingaiAPIKey
|
7
|
-
from vision_agent.
|
9
|
+
from vision_agent.utils.exceptions import FineTuneModelNotFound
|
10
|
+
from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
|
8
11
|
|
9
12
|
|
10
13
|
class LandingPublicAPI(BaseHTTP):
|
@@ -24,3 +27,12 @@ class LandingPublicAPI(BaseHTTP):
|
|
24
27
|
}
|
25
28
|
response = self.post(url, payload=data)
|
26
29
|
return UUID(response["jobId"])
|
30
|
+
|
31
|
+
def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
|
32
|
+
url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
|
33
|
+
try:
|
34
|
+
get_job = self.get(url)
|
35
|
+
except HTTPError as err:
|
36
|
+
if err.response.status_code == 404:
|
37
|
+
raise FineTuneModelNotFound()
|
38
|
+
return JobStatus(get_job["status"])
|
vision_agent/tools/__init__.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
from typing import Callable, List, Optional
|
2
2
|
|
3
|
-
from .meta_tools import
|
3
|
+
from .meta_tools import (
|
4
|
+
META_TOOL_DOCSTRING,
|
5
|
+
)
|
4
6
|
from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
|
5
7
|
from .tools import (
|
6
8
|
TOOL_DESCRIPTIONS,
|
7
9
|
TOOL_DOCSTRING,
|
8
10
|
TOOLS,
|
9
11
|
TOOLS_DF,
|
12
|
+
TOOLS_INFO,
|
10
13
|
UTILITIES_DOCSTRING,
|
11
14
|
blip_image_caption,
|
12
15
|
clip,
|
@@ -52,15 +55,16 @@ def register_tool(imports: Optional[List] = None) -> Callable:
|
|
52
55
|
def decorator(tool: Callable) -> Callable:
|
53
56
|
import inspect
|
54
57
|
|
55
|
-
from .tools import get_tool_descriptions, get_tools_df
|
58
|
+
from .tools import get_tool_descriptions, get_tools_df, get_tools_info
|
56
59
|
|
57
|
-
global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING
|
60
|
+
global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO
|
58
61
|
|
59
62
|
if tool not in TOOLS:
|
60
63
|
TOOLS.append(tool)
|
61
64
|
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
62
65
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
63
66
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
67
|
+
TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
|
64
68
|
|
65
69
|
globals()[tool.__name__] = tool
|
66
70
|
if imports is not None:
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import os
|
2
2
|
import subprocess
|
3
|
-
from uuid import UUID
|
4
3
|
from pathlib import Path
|
5
4
|
from typing import Any, Dict, List, Union
|
6
5
|
|
@@ -8,9 +7,7 @@ import vision_agent as va
|
|
8
7
|
from vision_agent.lmm.types import Message
|
9
8
|
from vision_agent.tools.tool_utils import get_tool_documentation
|
10
9
|
from vision_agent.tools.tools import TOOL_DESCRIPTIONS
|
11
|
-
|
12
|
-
from vision_agent.clients.landing_public_api import LandingPublicAPI
|
13
|
-
from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
|
10
|
+
|
14
11
|
|
15
12
|
# These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
|
16
13
|
|
@@ -384,51 +381,11 @@ def edit_file(file_path: str, start: int, end: int, content: str) -> str:
|
|
384
381
|
|
385
382
|
def get_tool_descriptions() -> str:
|
386
383
|
"""Returns a description of all the tools that `generate_vision_code` has access to.
|
387
|
-
Helpful for
|
384
|
+
Helpful for answering questions about what types of vision tasks you can do with
|
388
385
|
`generate_vision_code`."""
|
389
386
|
return TOOL_DESCRIPTIONS
|
390
387
|
|
391
388
|
|
392
|
-
def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
|
393
|
-
"""'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
|
394
|
-
to detect objects in an image based on a given dataset. It returns the fine
|
395
|
-
tuning job id.
|
396
|
-
|
397
|
-
Parameters:
|
398
|
-
bboxes (List[BboxInput]): A list of BboxInput containing the
|
399
|
-
image path, labels and bounding boxes.
|
400
|
-
task (PromptTask): The florencev2 fine-tuning task. The options are
|
401
|
-
CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
|
402
|
-
|
403
|
-
Returns:
|
404
|
-
UUID: The fine tuning job id, this id will used to retrieve the fine
|
405
|
-
tuned model.
|
406
|
-
|
407
|
-
Example
|
408
|
-
-------
|
409
|
-
>>> fine_tuning_job_id = florencev2_fine_tuning(
|
410
|
-
[{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
|
411
|
-
{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
|
412
|
-
"OBJECT_DETECTION"
|
413
|
-
)
|
414
|
-
"""
|
415
|
-
bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
|
416
|
-
task_input = PromptTask[task]
|
417
|
-
fine_tuning_request = [
|
418
|
-
BboxInputBase64(
|
419
|
-
image=convert_to_b64(bbox_input.image_path),
|
420
|
-
filename=bbox_input.image_path.split("/")[-1],
|
421
|
-
labels=bbox_input.labels,
|
422
|
-
bboxes=bbox_input.bboxes,
|
423
|
-
)
|
424
|
-
for bbox_input in bboxes_input
|
425
|
-
]
|
426
|
-
landing_api = LandingPublicAPI()
|
427
|
-
return landing_api.launch_fine_tuning_job(
|
428
|
-
"florencev2", task_input, fine_tuning_request
|
429
|
-
)
|
430
|
-
|
431
|
-
|
432
389
|
META_TOOL_DOCSTRING = get_tool_documentation(
|
433
390
|
[
|
434
391
|
get_tool_descriptions,
|
@@ -442,6 +399,5 @@ META_TOOL_DOCSTRING = get_tool_documentation(
|
|
442
399
|
search_dir,
|
443
400
|
search_file,
|
444
401
|
find_file,
|
445
|
-
florencev2_fine_tuning,
|
446
402
|
]
|
447
403
|
)
|
vision_agent/tools/tool_utils.py
CHANGED
@@ -15,9 +15,10 @@ from vision_agent.utils.execute import Error, MimeType
|
|
15
15
|
from vision_agent.utils.type_defs import LandingaiAPIKey
|
16
16
|
|
17
17
|
_LOGGER = logging.getLogger(__name__)
|
18
|
-
_LND_API_KEY = LandingaiAPIKey().api_key
|
19
|
-
|
20
|
-
|
18
|
+
_LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
|
19
|
+
_LND_BASE_URL = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
|
20
|
+
_LND_API_URL = f"{_LND_BASE_URL}/v1/agent/model"
|
21
|
+
_LND_API_URL_v2 = f"{_LND_BASE_URL}/v1/tools"
|
21
22
|
|
22
23
|
|
23
24
|
class ToolCallTrace(BaseModel):
|
@@ -28,8 +29,13 @@ class ToolCallTrace(BaseModel):
|
|
28
29
|
|
29
30
|
|
30
31
|
def send_inference_request(
|
31
|
-
payload: Dict[str, Any],
|
32
|
+
payload: Dict[str, Any],
|
33
|
+
endpoint_name: str,
|
34
|
+
v2: bool = False,
|
35
|
+
metadata_payload: Optional[Dict[str, Any]] = None,
|
32
36
|
) -> Dict[str, Any]:
|
37
|
+
# TODO: runtime_tag and function_name should be metadata_payload and now included
|
38
|
+
# in the service payload
|
33
39
|
try:
|
34
40
|
if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
|
35
41
|
payload["runtime_tag"] = runtime_tag
|
@@ -62,9 +68,13 @@ def send_inference_request(
|
|
62
68
|
traceback_raw=[],
|
63
69
|
)
|
64
70
|
_LOGGER.error(f"Request failed: {res.status_code} {res.text}")
|
65
|
-
|
66
|
-
|
67
|
-
|
71
|
+
# TODO: function_name should be in metadata_payload
|
72
|
+
function_name = "unknown"
|
73
|
+
if "function_name" in payload:
|
74
|
+
function_name = payload["function_name"]
|
75
|
+
elif metadata_payload is not None and "function_name" in metadata_payload:
|
76
|
+
function_name = metadata_payload["function_name"]
|
77
|
+
raise RemoteToolCallFailed(function_name, res.status_code, res.text)
|
68
78
|
|
69
79
|
resp = res.json()
|
70
80
|
tool_call_trace.response = resp
|
@@ -142,3 +152,16 @@ def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
|
|
142
152
|
data["doc"].append(doc)
|
143
153
|
|
144
154
|
return pd.DataFrame(data) # type: ignore
|
155
|
+
|
156
|
+
|
157
|
+
def get_tools_info(funcs: List[Callable[..., Any]]) -> Dict[str, str]:
|
158
|
+
data: Dict[str, str] = {}
|
159
|
+
|
160
|
+
for func in funcs:
|
161
|
+
desc = func.__doc__
|
162
|
+
if desc is None:
|
163
|
+
desc = ""
|
164
|
+
|
165
|
+
data[func.__name__] = f"{func.__name__}{inspect.signature(func)}:\n{desc}"
|
166
|
+
|
167
|
+
return data
|
vision_agent/tools/tools.py
CHANGED
@@ -2,6 +2,7 @@ import io
|
|
2
2
|
import json
|
3
3
|
import logging
|
4
4
|
import tempfile
|
5
|
+
from uuid import UUID
|
5
6
|
from pathlib import Path
|
6
7
|
from importlib import resources
|
7
8
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
@@ -19,7 +20,9 @@ from vision_agent.tools.tool_utils import (
|
|
19
20
|
get_tool_descriptions,
|
20
21
|
get_tool_documentation,
|
21
22
|
get_tools_df,
|
23
|
+
get_tools_info,
|
22
24
|
)
|
25
|
+
from vision_agent.utils.exceptions import FineTuneModelIsNotReady
|
23
26
|
from vision_agent.utils import extract_frames_from_video
|
24
27
|
from vision_agent.utils.execute import FileSerializer, MimeType
|
25
28
|
from vision_agent.utils.image_utils import (
|
@@ -31,6 +34,15 @@ from vision_agent.utils.image_utils import (
|
|
31
34
|
convert_quad_box_to_bbox,
|
32
35
|
rle_decode,
|
33
36
|
)
|
37
|
+
from vision_agent.tools.tools_types import (
|
38
|
+
BboxInput,
|
39
|
+
BboxInputBase64,
|
40
|
+
PromptTask,
|
41
|
+
Florencev2FtRequest,
|
42
|
+
FineTuning,
|
43
|
+
JobStatus,
|
44
|
+
)
|
45
|
+
from vision_agent.clients.landing_public_api import LandingPublicAPI
|
34
46
|
|
35
47
|
register_heif_opener()
|
36
48
|
|
@@ -1285,6 +1297,119 @@ def overlay_heat_map(
|
|
1285
1297
|
return np.array(combined)
|
1286
1298
|
|
1287
1299
|
|
1300
|
+
# TODO: add this function to the imports so that is picked in the agent
|
1301
|
+
def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
|
1302
|
+
"""'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
|
1303
|
+
to detect objects in an image based on a given dataset. It returns the fine
|
1304
|
+
tuning job id.
|
1305
|
+
|
1306
|
+
Parameters:
|
1307
|
+
bboxes (List[BboxInput]): A list of BboxInput containing the
|
1308
|
+
image path, labels and bounding boxes.
|
1309
|
+
task (PromptTask): The florencev2 fine-tuning task. The options are
|
1310
|
+
CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
|
1311
|
+
|
1312
|
+
Returns:
|
1313
|
+
UUID: The fine tuning job id, this id will used to retrieve the fine
|
1314
|
+
tuned model.
|
1315
|
+
|
1316
|
+
Example
|
1317
|
+
-------
|
1318
|
+
>>> fine_tuning_job_id = florencev2_fine_tuning(
|
1319
|
+
[{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
|
1320
|
+
{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
|
1321
|
+
"OBJECT_DETECTION"
|
1322
|
+
)
|
1323
|
+
"""
|
1324
|
+
bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
|
1325
|
+
task_input = PromptTask[task]
|
1326
|
+
fine_tuning_request = [
|
1327
|
+
BboxInputBase64(
|
1328
|
+
image=convert_to_b64(bbox_input.image_path),
|
1329
|
+
filename=bbox_input.image_path.split("/")[-1],
|
1330
|
+
labels=bbox_input.labels,
|
1331
|
+
bboxes=bbox_input.bboxes,
|
1332
|
+
)
|
1333
|
+
for bbox_input in bboxes_input
|
1334
|
+
]
|
1335
|
+
landing_api = LandingPublicAPI()
|
1336
|
+
return landing_api.launch_fine_tuning_job(
|
1337
|
+
"florencev2", task_input, fine_tuning_request
|
1338
|
+
)
|
1339
|
+
|
1340
|
+
|
1341
|
+
# TODO: add this function to the imports so that is picked in the agent
|
1342
|
+
def florencev2_fine_tuned_object_detection(
|
1343
|
+
image: np.ndarray, prompt: str, model_id: UUID, task: str
|
1344
|
+
) -> List[Dict[str, Any]]:
|
1345
|
+
"""'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
|
1346
|
+
to detect objects given a text prompt such as a phrase or class names separated by
|
1347
|
+
commas. It returns a list of detected objects as labels and their location as
|
1348
|
+
bounding boxes with score of 1.0.
|
1349
|
+
|
1350
|
+
Parameters:
|
1351
|
+
image (np.ndarray): The image to used to detect objects.
|
1352
|
+
prompt (str): The prompt to help find objects in the image.
|
1353
|
+
model_id (UUID): The fine-tuned model id.
|
1354
|
+
task (PromptTask): The florencev2 fine-tuning task. The options are
|
1355
|
+
CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
|
1356
|
+
|
1357
|
+
Returns:
|
1358
|
+
List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
|
1359
|
+
bounding box of the detected objects with normalized coordinates between 0
|
1360
|
+
and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
|
1361
|
+
top-left and xmax and ymax are the coordinates of the bottom-right of the
|
1362
|
+
bounding box. The scores are always 1.0 and cannot be thresholded
|
1363
|
+
|
1364
|
+
Example
|
1365
|
+
-------
|
1366
|
+
>>> florencev2_fine_tuned_object_detection(
|
1367
|
+
image,
|
1368
|
+
'person looking at a coyote',
|
1369
|
+
UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
|
1370
|
+
)
|
1371
|
+
[
|
1372
|
+
{'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
1373
|
+
{'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
|
1374
|
+
]
|
1375
|
+
"""
|
1376
|
+
# check if job succeeded first
|
1377
|
+
landing_api = LandingPublicAPI()
|
1378
|
+
status = landing_api.check_fine_tuning_job(model_id)
|
1379
|
+
if status is not JobStatus.SUCCEEDED:
|
1380
|
+
raise FineTuneModelIsNotReady()
|
1381
|
+
|
1382
|
+
task = PromptTask[task]
|
1383
|
+
if task is PromptTask.OBJECT_DETECTION:
|
1384
|
+
prompt = ""
|
1385
|
+
|
1386
|
+
data_obj = Florencev2FtRequest(
|
1387
|
+
image=convert_to_b64(image),
|
1388
|
+
task=task,
|
1389
|
+
tool="florencev2_fine_tuning",
|
1390
|
+
prompt=prompt,
|
1391
|
+
fine_tuning=FineTuning(job_id=model_id),
|
1392
|
+
)
|
1393
|
+
data = data_obj.model_dump(by_alias=True)
|
1394
|
+
metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
|
1395
|
+
detections = send_inference_request(
|
1396
|
+
data, "tools", v2=False, metadata_payload=metadata_payload
|
1397
|
+
)
|
1398
|
+
|
1399
|
+
detections = detections[task.value]
|
1400
|
+
return_data = []
|
1401
|
+
image_size = image.shape[:2]
|
1402
|
+
for i in range(len(detections["bboxes"])):
|
1403
|
+
return_data.append(
|
1404
|
+
{
|
1405
|
+
"score": 1.0,
|
1406
|
+
"label": detections["labels"][i],
|
1407
|
+
"bbox": normalize_bbox(detections["bboxes"][i], image_size),
|
1408
|
+
}
|
1409
|
+
)
|
1410
|
+
return return_data
|
1411
|
+
|
1412
|
+
|
1288
1413
|
TOOLS = [
|
1289
1414
|
owl_v2,
|
1290
1415
|
grounding_sam,
|
@@ -1317,6 +1442,7 @@ TOOLS = [
|
|
1317
1442
|
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
1318
1443
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
1319
1444
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
1445
|
+
TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
|
1320
1446
|
UTILITIES_DOCSTRING = get_tool_documentation(
|
1321
1447
|
[
|
1322
1448
|
save_json,
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from uuid import UUID
|
2
|
+
from enum import Enum
|
3
|
+
from typing import List, Tuple, Optional
|
4
|
+
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
|
6
|
+
|
7
|
+
|
8
|
+
class BboxInput(BaseModel):
|
9
|
+
image_path: str
|
10
|
+
labels: List[str]
|
11
|
+
bboxes: List[Tuple[int, int, int, int]]
|
12
|
+
|
13
|
+
|
14
|
+
class BboxInputBase64(BaseModel):
|
15
|
+
image: str
|
16
|
+
filename: str
|
17
|
+
labels: List[str]
|
18
|
+
bboxes: List[Tuple[int, int, int, int]]
|
19
|
+
|
20
|
+
|
21
|
+
class PromptTask(str, Enum):
|
22
|
+
"""
|
23
|
+
Valid task prompts options for the Florencev2 model.
|
24
|
+
"""
|
25
|
+
|
26
|
+
CAPTION = "<CAPTION>"
|
27
|
+
""""""
|
28
|
+
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
29
|
+
""""""
|
30
|
+
OBJECT_DETECTION = "<OD>"
|
31
|
+
""""""
|
32
|
+
|
33
|
+
|
34
|
+
class FineTuning(BaseModel):
|
35
|
+
model_config = ConfigDict(populate_by_name=True)
|
36
|
+
|
37
|
+
job_id: UUID = Field(alias="jobId")
|
38
|
+
|
39
|
+
@field_serializer("job_id")
|
40
|
+
def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
|
41
|
+
return str(job_id)
|
42
|
+
|
43
|
+
|
44
|
+
class Florencev2FtRequest(BaseModel):
|
45
|
+
model_config = ConfigDict(populate_by_name=True)
|
46
|
+
|
47
|
+
image: str
|
48
|
+
task: PromptTask
|
49
|
+
tool: str
|
50
|
+
prompt: Optional[str] = ""
|
51
|
+
fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
|
52
|
+
|
53
|
+
|
54
|
+
class JobStatus(str, Enum):
|
55
|
+
"""The status of a fine-tuning job.
|
56
|
+
|
57
|
+
CREATED:
|
58
|
+
The job has been created and is waiting to be scheduled to run.
|
59
|
+
STARTING:
|
60
|
+
The job has started running, but not entering the training phase.
|
61
|
+
TRAINING:
|
62
|
+
The job is training a model.
|
63
|
+
EVALUATING:
|
64
|
+
The job is evaluating the model and computing metrics.
|
65
|
+
PUBLISHING:
|
66
|
+
The job is exporting the artifact(s) to an external directory (s3 or local).
|
67
|
+
SUCCEEDED:
|
68
|
+
The job has finished, including training, evaluation and publishing the
|
69
|
+
artifact(s).
|
70
|
+
FAILED:
|
71
|
+
The job has failed for some reason internally, it can be due to resources
|
72
|
+
issues or the code itself.
|
73
|
+
STOPPED:
|
74
|
+
The job has been stopped by the use locally or in the cloud.
|
75
|
+
"""
|
76
|
+
|
77
|
+
CREATED = "CREATED"
|
78
|
+
STARTING = "STARTING"
|
79
|
+
TRAINING = "TRAINING"
|
80
|
+
EVALUATING = "EVALUATING"
|
81
|
+
PUBLISHING = "PUBLISHING"
|
82
|
+
SUCCEEDED = "SUCCEEDED"
|
83
|
+
FAILED = "FAILED"
|
84
|
+
STOPPED = "STOPPED"
|
vision_agent/utils/exceptions.py
CHANGED
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
|
|
49
49
|
"""
|
50
50
|
|
51
51
|
is_retryable = True
|
52
|
+
|
53
|
+
|
54
|
+
class FineTuneModelIsNotReady(Exception):
|
55
|
+
"""Exception raised when the fine-tune model is not ready.
|
56
|
+
If this is raised, it's recommended to wait 5 seconds before trying to use
|
57
|
+
the model again.
|
58
|
+
"""
|
59
|
+
|
60
|
+
|
61
|
+
class FineTuneModelNotFound(Exception):
|
62
|
+
"""Exception raised when the fine-tune model is not found.
|
63
|
+
If this is raised, it's recommended to try another model id.
|
64
|
+
"""
|
@@ -2,32 +2,32 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
|
3
3
|
vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
|
9
9
|
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
vision_agent/clients/http.py,sha256=
|
11
|
-
vision_agent/clients/landing_public_api.py,sha256=
|
10
|
+
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
11
|
+
vision_agent/clients/landing_public_api.py,sha256=ImMzR6qVvkwgiMMmQRGl91E4xktKjoctun0hWn9PxfE,1507
|
12
12
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
14
|
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
15
|
vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
|
16
16
|
vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
|
17
|
-
vision_agent/tools/__init__.py,sha256=
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
19
|
-
vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
|
17
|
+
vision_agent/tools/__init__.py,sha256=ibjymNE7QqtZLgAm3oytYDANNhGLovQsjFqVZZCQWEU,2018
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=Bm_sIeorVRW_aWA-htA0G-BbbN4yZm91Tam90s90cnA,13366
|
20
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
21
|
-
vision_agent/tools/tool_utils.py,sha256=
|
22
|
-
vision_agent/tools/tools.py,sha256=
|
20
|
+
vision_agent/tools/tool_utils.py,sha256=FU6DCMB3hk9e8p4nAkAv7mHQDIhH8fssyxAYE1bmGK4,5628
|
21
|
+
vision_agent/tools/tools.py,sha256=RSKzMcEUNQwcKnQLSH4Go284QgBl3pXqIqmCGMY4shY,49616
|
22
|
+
vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
|
23
23
|
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
24
|
-
vision_agent/utils/exceptions.py,sha256=
|
24
|
+
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
25
|
vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
|
26
26
|
vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
|
27
27
|
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.111.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.111.dist-info/METADATA,sha256=Rqo5Hv-b8GnmZloGTELvU5lzbEZAY6cz96KUGKM7WR8,10732
|
32
|
+
vision_agent-0.2.111.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.111.dist-info/RECORD,,
|
@@ -1,30 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import List, Tuple
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
5
|
-
|
6
|
-
|
7
|
-
class BboxInput(BaseModel):
|
8
|
-
image_path: str
|
9
|
-
labels: List[str]
|
10
|
-
bboxes: List[Tuple[int, int, int, int]]
|
11
|
-
|
12
|
-
|
13
|
-
class BboxInputBase64(BaseModel):
|
14
|
-
image: str
|
15
|
-
filename: str
|
16
|
-
labels: List[str]
|
17
|
-
bboxes: List[Tuple[int, int, int, int]]
|
18
|
-
|
19
|
-
|
20
|
-
class PromptTask(str, Enum):
|
21
|
-
"""
|
22
|
-
Valid task prompts options for the Florencev2 model.
|
23
|
-
"""
|
24
|
-
|
25
|
-
CAPTION = "<CAPTION>"
|
26
|
-
""""""
|
27
|
-
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
28
|
-
""""""
|
29
|
-
OBJECT_DETECTION = "<OD>"
|
30
|
-
""""""
|
File without changes
|
File without changes
|