vision-agent 0.2.109__tar.gz → 0.2.111__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.109 → vision_agent-0.2.111}/PKG-INFO +1 -1
- {vision_agent-0.2.109 → vision_agent-0.2.111}/pyproject.toml +3 -1
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/vision_agent.py +1 -1
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/clients/http.py +15 -3
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/clients/landing_public_api.py +13 -1
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/tools/__init__.py +7 -3
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/tools/meta_tools.py +2 -46
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/tools/tool_utils.py +30 -7
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/tools/tools.py +126 -0
- vision_agent-0.2.111/vision_agent/tools/tools_types.py +84 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/utils/exceptions.py +13 -0
- vision_agent-0.2.109/vision_agent/tools/meta_tools_types.py +0 -30
- {vision_agent-0.2.109 → vision_agent-0.2.111}/LICENSE +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/README.md +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/utils/video.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "vision-agent"
|
7
|
-
version = "0.2.
|
7
|
+
version = "0.2.111"
|
8
8
|
description = "Toolset for Vision Agent"
|
9
9
|
authors = ["Landing AI <dev@landing.ai>"]
|
10
10
|
readme = "README.md"
|
@@ -78,6 +78,8 @@ line_length = 88
|
|
78
78
|
profile = "black"
|
79
79
|
|
80
80
|
[tool.mypy]
|
81
|
+
plugins = "pydantic.mypy"
|
82
|
+
|
81
83
|
exclude = "tests"
|
82
84
|
show_error_context = true
|
83
85
|
pretty = true
|
@@ -28,7 +28,7 @@ class DefaultImports:
|
|
28
28
|
code = [
|
29
29
|
"from typing import *",
|
30
30
|
"from vision_agent.utils.execute import CodeInterpreter",
|
31
|
-
"from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions
|
31
|
+
"from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
|
32
32
|
]
|
33
33
|
|
34
34
|
@staticmethod
|
@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional
|
|
4
4
|
|
5
5
|
from requests import Session
|
6
6
|
from requests.adapters import HTTPAdapter
|
7
|
-
from requests.exceptions import ConnectionError, RequestException, Timeout
|
8
7
|
|
9
8
|
_LOGGER = logging.getLogger(__name__)
|
10
9
|
|
@@ -38,9 +37,22 @@ class BaseHTTP:
|
|
38
37
|
response.raise_for_status()
|
39
38
|
result: Dict[str, Any] = response.json()
|
40
39
|
_LOGGER.info(json.dumps(result))
|
41
|
-
except (ConnectionError, Timeout, RequestException) as err:
|
42
|
-
_LOGGER.warning(f"Error: {err}.")
|
43
40
|
except json.JSONDecodeError:
|
44
41
|
resp_text = response.text
|
45
42
|
_LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
|
43
|
+
raise
|
44
|
+
return result
|
45
|
+
|
46
|
+
def get(self, url: str) -> Dict[str, Any]:
|
47
|
+
formatted_url = f"{self._base_endpoint}/{url}"
|
48
|
+
_LOGGER.info(f"Sending data to {formatted_url}")
|
49
|
+
try:
|
50
|
+
response = self._session.get(url=formatted_url, timeout=self._TIMEOUT)
|
51
|
+
response.raise_for_status()
|
52
|
+
result: Dict[str, Any] = response.json()
|
53
|
+
_LOGGER.info(json.dumps(result))
|
54
|
+
except json.JSONDecodeError:
|
55
|
+
resp_text = response.text
|
56
|
+
_LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
|
57
|
+
raise
|
46
58
|
return result
|
@@ -2,9 +2,12 @@ import os
|
|
2
2
|
from uuid import UUID
|
3
3
|
from typing import List
|
4
4
|
|
5
|
+
from requests.exceptions import HTTPError
|
6
|
+
|
5
7
|
from vision_agent.clients.http import BaseHTTP
|
6
8
|
from vision_agent.utils.type_defs import LandingaiAPIKey
|
7
|
-
from vision_agent.
|
9
|
+
from vision_agent.utils.exceptions import FineTuneModelNotFound
|
10
|
+
from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
|
8
11
|
|
9
12
|
|
10
13
|
class LandingPublicAPI(BaseHTTP):
|
@@ -24,3 +27,12 @@ class LandingPublicAPI(BaseHTTP):
|
|
24
27
|
}
|
25
28
|
response = self.post(url, payload=data)
|
26
29
|
return UUID(response["jobId"])
|
30
|
+
|
31
|
+
def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
|
32
|
+
url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
|
33
|
+
try:
|
34
|
+
get_job = self.get(url)
|
35
|
+
except HTTPError as err:
|
36
|
+
if err.response.status_code == 404:
|
37
|
+
raise FineTuneModelNotFound()
|
38
|
+
return JobStatus(get_job["status"])
|
@@ -1,12 +1,15 @@
|
|
1
1
|
from typing import Callable, List, Optional
|
2
2
|
|
3
|
-
from .meta_tools import
|
3
|
+
from .meta_tools import (
|
4
|
+
META_TOOL_DOCSTRING,
|
5
|
+
)
|
4
6
|
from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
|
5
7
|
from .tools import (
|
6
8
|
TOOL_DESCRIPTIONS,
|
7
9
|
TOOL_DOCSTRING,
|
8
10
|
TOOLS,
|
9
11
|
TOOLS_DF,
|
12
|
+
TOOLS_INFO,
|
10
13
|
UTILITIES_DOCSTRING,
|
11
14
|
blip_image_caption,
|
12
15
|
clip,
|
@@ -52,15 +55,16 @@ def register_tool(imports: Optional[List] = None) -> Callable:
|
|
52
55
|
def decorator(tool: Callable) -> Callable:
|
53
56
|
import inspect
|
54
57
|
|
55
|
-
from .tools import get_tool_descriptions, get_tools_df
|
58
|
+
from .tools import get_tool_descriptions, get_tools_df, get_tools_info
|
56
59
|
|
57
|
-
global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING
|
60
|
+
global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO
|
58
61
|
|
59
62
|
if tool not in TOOLS:
|
60
63
|
TOOLS.append(tool)
|
61
64
|
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
62
65
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
63
66
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
67
|
+
TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
|
64
68
|
|
65
69
|
globals()[tool.__name__] = tool
|
66
70
|
if imports is not None:
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import os
|
2
2
|
import subprocess
|
3
|
-
from uuid import UUID
|
4
3
|
from pathlib import Path
|
5
4
|
from typing import Any, Dict, List, Union
|
6
5
|
|
@@ -8,9 +7,7 @@ import vision_agent as va
|
|
8
7
|
from vision_agent.lmm.types import Message
|
9
8
|
from vision_agent.tools.tool_utils import get_tool_documentation
|
10
9
|
from vision_agent.tools.tools import TOOL_DESCRIPTIONS
|
11
|
-
|
12
|
-
from vision_agent.clients.landing_public_api import LandingPublicAPI
|
13
|
-
from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
|
10
|
+
|
14
11
|
|
15
12
|
# These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
|
16
13
|
|
@@ -384,51 +381,11 @@ def edit_file(file_path: str, start: int, end: int, content: str) -> str:
|
|
384
381
|
|
385
382
|
def get_tool_descriptions() -> str:
|
386
383
|
"""Returns a description of all the tools that `generate_vision_code` has access to.
|
387
|
-
Helpful for
|
384
|
+
Helpful for answering questions about what types of vision tasks you can do with
|
388
385
|
`generate_vision_code`."""
|
389
386
|
return TOOL_DESCRIPTIONS
|
390
387
|
|
391
388
|
|
392
|
-
def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
|
393
|
-
"""'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
|
394
|
-
to detect objects in an image based on a given dataset. It returns the fine
|
395
|
-
tuning job id.
|
396
|
-
|
397
|
-
Parameters:
|
398
|
-
bboxes (List[BboxInput]): A list of BboxInput containing the
|
399
|
-
image path, labels and bounding boxes.
|
400
|
-
task (PromptTask): The florencev2 fine-tuning task. The options are
|
401
|
-
CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
|
402
|
-
|
403
|
-
Returns:
|
404
|
-
UUID: The fine tuning job id, this id will used to retrieve the fine
|
405
|
-
tuned model.
|
406
|
-
|
407
|
-
Example
|
408
|
-
-------
|
409
|
-
>>> fine_tuning_job_id = florencev2_fine_tuning(
|
410
|
-
[{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
|
411
|
-
{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
|
412
|
-
"OBJECT_DETECTION"
|
413
|
-
)
|
414
|
-
"""
|
415
|
-
bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
|
416
|
-
task_input = PromptTask[task]
|
417
|
-
fine_tuning_request = [
|
418
|
-
BboxInputBase64(
|
419
|
-
image=convert_to_b64(bbox_input.image_path),
|
420
|
-
filename=bbox_input.image_path.split("/")[-1],
|
421
|
-
labels=bbox_input.labels,
|
422
|
-
bboxes=bbox_input.bboxes,
|
423
|
-
)
|
424
|
-
for bbox_input in bboxes_input
|
425
|
-
]
|
426
|
-
landing_api = LandingPublicAPI()
|
427
|
-
return landing_api.launch_fine_tuning_job(
|
428
|
-
"florencev2", task_input, fine_tuning_request
|
429
|
-
)
|
430
|
-
|
431
|
-
|
432
389
|
META_TOOL_DOCSTRING = get_tool_documentation(
|
433
390
|
[
|
434
391
|
get_tool_descriptions,
|
@@ -442,6 +399,5 @@ META_TOOL_DOCSTRING = get_tool_documentation(
|
|
442
399
|
search_dir,
|
443
400
|
search_file,
|
444
401
|
find_file,
|
445
|
-
florencev2_fine_tuning,
|
446
402
|
]
|
447
403
|
)
|
@@ -15,9 +15,10 @@ from vision_agent.utils.execute import Error, MimeType
|
|
15
15
|
from vision_agent.utils.type_defs import LandingaiAPIKey
|
16
16
|
|
17
17
|
_LOGGER = logging.getLogger(__name__)
|
18
|
-
_LND_API_KEY = LandingaiAPIKey().api_key
|
19
|
-
|
20
|
-
|
18
|
+
_LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
|
19
|
+
_LND_BASE_URL = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
|
20
|
+
_LND_API_URL = f"{_LND_BASE_URL}/v1/agent/model"
|
21
|
+
_LND_API_URL_v2 = f"{_LND_BASE_URL}/v1/tools"
|
21
22
|
|
22
23
|
|
23
24
|
class ToolCallTrace(BaseModel):
|
@@ -28,8 +29,13 @@ class ToolCallTrace(BaseModel):
|
|
28
29
|
|
29
30
|
|
30
31
|
def send_inference_request(
|
31
|
-
payload: Dict[str, Any],
|
32
|
+
payload: Dict[str, Any],
|
33
|
+
endpoint_name: str,
|
34
|
+
v2: bool = False,
|
35
|
+
metadata_payload: Optional[Dict[str, Any]] = None,
|
32
36
|
) -> Dict[str, Any]:
|
37
|
+
# TODO: runtime_tag and function_name should be metadata_payload and now included
|
38
|
+
# in the service payload
|
33
39
|
try:
|
34
40
|
if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
|
35
41
|
payload["runtime_tag"] = runtime_tag
|
@@ -62,9 +68,13 @@ def send_inference_request(
|
|
62
68
|
traceback_raw=[],
|
63
69
|
)
|
64
70
|
_LOGGER.error(f"Request failed: {res.status_code} {res.text}")
|
65
|
-
|
66
|
-
|
67
|
-
|
71
|
+
# TODO: function_name should be in metadata_payload
|
72
|
+
function_name = "unknown"
|
73
|
+
if "function_name" in payload:
|
74
|
+
function_name = payload["function_name"]
|
75
|
+
elif metadata_payload is not None and "function_name" in metadata_payload:
|
76
|
+
function_name = metadata_payload["function_name"]
|
77
|
+
raise RemoteToolCallFailed(function_name, res.status_code, res.text)
|
68
78
|
|
69
79
|
resp = res.json()
|
70
80
|
tool_call_trace.response = resp
|
@@ -142,3 +152,16 @@ def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
|
|
142
152
|
data["doc"].append(doc)
|
143
153
|
|
144
154
|
return pd.DataFrame(data) # type: ignore
|
155
|
+
|
156
|
+
|
157
|
+
def get_tools_info(funcs: List[Callable[..., Any]]) -> Dict[str, str]:
|
158
|
+
data: Dict[str, str] = {}
|
159
|
+
|
160
|
+
for func in funcs:
|
161
|
+
desc = func.__doc__
|
162
|
+
if desc is None:
|
163
|
+
desc = ""
|
164
|
+
|
165
|
+
data[func.__name__] = f"{func.__name__}{inspect.signature(func)}:\n{desc}"
|
166
|
+
|
167
|
+
return data
|
@@ -2,6 +2,7 @@ import io
|
|
2
2
|
import json
|
3
3
|
import logging
|
4
4
|
import tempfile
|
5
|
+
from uuid import UUID
|
5
6
|
from pathlib import Path
|
6
7
|
from importlib import resources
|
7
8
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
@@ -19,7 +20,9 @@ from vision_agent.tools.tool_utils import (
|
|
19
20
|
get_tool_descriptions,
|
20
21
|
get_tool_documentation,
|
21
22
|
get_tools_df,
|
23
|
+
get_tools_info,
|
22
24
|
)
|
25
|
+
from vision_agent.utils.exceptions import FineTuneModelIsNotReady
|
23
26
|
from vision_agent.utils import extract_frames_from_video
|
24
27
|
from vision_agent.utils.execute import FileSerializer, MimeType
|
25
28
|
from vision_agent.utils.image_utils import (
|
@@ -31,6 +34,15 @@ from vision_agent.utils.image_utils import (
|
|
31
34
|
convert_quad_box_to_bbox,
|
32
35
|
rle_decode,
|
33
36
|
)
|
37
|
+
from vision_agent.tools.tools_types import (
|
38
|
+
BboxInput,
|
39
|
+
BboxInputBase64,
|
40
|
+
PromptTask,
|
41
|
+
Florencev2FtRequest,
|
42
|
+
FineTuning,
|
43
|
+
JobStatus,
|
44
|
+
)
|
45
|
+
from vision_agent.clients.landing_public_api import LandingPublicAPI
|
34
46
|
|
35
47
|
register_heif_opener()
|
36
48
|
|
@@ -1285,6 +1297,119 @@ def overlay_heat_map(
|
|
1285
1297
|
return np.array(combined)
|
1286
1298
|
|
1287
1299
|
|
1300
|
+
# TODO: add this function to the imports so that is picked in the agent
|
1301
|
+
def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
|
1302
|
+
"""'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
|
1303
|
+
to detect objects in an image based on a given dataset. It returns the fine
|
1304
|
+
tuning job id.
|
1305
|
+
|
1306
|
+
Parameters:
|
1307
|
+
bboxes (List[BboxInput]): A list of BboxInput containing the
|
1308
|
+
image path, labels and bounding boxes.
|
1309
|
+
task (PromptTask): The florencev2 fine-tuning task. The options are
|
1310
|
+
CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
|
1311
|
+
|
1312
|
+
Returns:
|
1313
|
+
UUID: The fine tuning job id, this id will used to retrieve the fine
|
1314
|
+
tuned model.
|
1315
|
+
|
1316
|
+
Example
|
1317
|
+
-------
|
1318
|
+
>>> fine_tuning_job_id = florencev2_fine_tuning(
|
1319
|
+
[{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
|
1320
|
+
{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
|
1321
|
+
"OBJECT_DETECTION"
|
1322
|
+
)
|
1323
|
+
"""
|
1324
|
+
bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
|
1325
|
+
task_input = PromptTask[task]
|
1326
|
+
fine_tuning_request = [
|
1327
|
+
BboxInputBase64(
|
1328
|
+
image=convert_to_b64(bbox_input.image_path),
|
1329
|
+
filename=bbox_input.image_path.split("/")[-1],
|
1330
|
+
labels=bbox_input.labels,
|
1331
|
+
bboxes=bbox_input.bboxes,
|
1332
|
+
)
|
1333
|
+
for bbox_input in bboxes_input
|
1334
|
+
]
|
1335
|
+
landing_api = LandingPublicAPI()
|
1336
|
+
return landing_api.launch_fine_tuning_job(
|
1337
|
+
"florencev2", task_input, fine_tuning_request
|
1338
|
+
)
|
1339
|
+
|
1340
|
+
|
1341
|
+
# TODO: add this function to the imports so that is picked in the agent
|
1342
|
+
def florencev2_fine_tuned_object_detection(
|
1343
|
+
image: np.ndarray, prompt: str, model_id: UUID, task: str
|
1344
|
+
) -> List[Dict[str, Any]]:
|
1345
|
+
"""'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
|
1346
|
+
to detect objects given a text prompt such as a phrase or class names separated by
|
1347
|
+
commas. It returns a list of detected objects as labels and their location as
|
1348
|
+
bounding boxes with score of 1.0.
|
1349
|
+
|
1350
|
+
Parameters:
|
1351
|
+
image (np.ndarray): The image to used to detect objects.
|
1352
|
+
prompt (str): The prompt to help find objects in the image.
|
1353
|
+
model_id (UUID): The fine-tuned model id.
|
1354
|
+
task (PromptTask): The florencev2 fine-tuning task. The options are
|
1355
|
+
CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
|
1356
|
+
|
1357
|
+
Returns:
|
1358
|
+
List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
|
1359
|
+
bounding box of the detected objects with normalized coordinates between 0
|
1360
|
+
and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
|
1361
|
+
top-left and xmax and ymax are the coordinates of the bottom-right of the
|
1362
|
+
bounding box. The scores are always 1.0 and cannot be thresholded
|
1363
|
+
|
1364
|
+
Example
|
1365
|
+
-------
|
1366
|
+
>>> florencev2_fine_tuned_object_detection(
|
1367
|
+
image,
|
1368
|
+
'person looking at a coyote',
|
1369
|
+
UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
|
1370
|
+
)
|
1371
|
+
[
|
1372
|
+
{'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
1373
|
+
{'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
|
1374
|
+
]
|
1375
|
+
"""
|
1376
|
+
# check if job succeeded first
|
1377
|
+
landing_api = LandingPublicAPI()
|
1378
|
+
status = landing_api.check_fine_tuning_job(model_id)
|
1379
|
+
if status is not JobStatus.SUCCEEDED:
|
1380
|
+
raise FineTuneModelIsNotReady()
|
1381
|
+
|
1382
|
+
task = PromptTask[task]
|
1383
|
+
if task is PromptTask.OBJECT_DETECTION:
|
1384
|
+
prompt = ""
|
1385
|
+
|
1386
|
+
data_obj = Florencev2FtRequest(
|
1387
|
+
image=convert_to_b64(image),
|
1388
|
+
task=task,
|
1389
|
+
tool="florencev2_fine_tuning",
|
1390
|
+
prompt=prompt,
|
1391
|
+
fine_tuning=FineTuning(job_id=model_id),
|
1392
|
+
)
|
1393
|
+
data = data_obj.model_dump(by_alias=True)
|
1394
|
+
metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
|
1395
|
+
detections = send_inference_request(
|
1396
|
+
data, "tools", v2=False, metadata_payload=metadata_payload
|
1397
|
+
)
|
1398
|
+
|
1399
|
+
detections = detections[task.value]
|
1400
|
+
return_data = []
|
1401
|
+
image_size = image.shape[:2]
|
1402
|
+
for i in range(len(detections["bboxes"])):
|
1403
|
+
return_data.append(
|
1404
|
+
{
|
1405
|
+
"score": 1.0,
|
1406
|
+
"label": detections["labels"][i],
|
1407
|
+
"bbox": normalize_bbox(detections["bboxes"][i], image_size),
|
1408
|
+
}
|
1409
|
+
)
|
1410
|
+
return return_data
|
1411
|
+
|
1412
|
+
|
1288
1413
|
TOOLS = [
|
1289
1414
|
owl_v2,
|
1290
1415
|
grounding_sam,
|
@@ -1317,6 +1442,7 @@ TOOLS = [
|
|
1317
1442
|
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
1318
1443
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
1319
1444
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
1445
|
+
TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
|
1320
1446
|
UTILITIES_DOCSTRING = get_tool_documentation(
|
1321
1447
|
[
|
1322
1448
|
save_json,
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from uuid import UUID
|
2
|
+
from enum import Enum
|
3
|
+
from typing import List, Tuple, Optional
|
4
|
+
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
|
6
|
+
|
7
|
+
|
8
|
+
class BboxInput(BaseModel):
|
9
|
+
image_path: str
|
10
|
+
labels: List[str]
|
11
|
+
bboxes: List[Tuple[int, int, int, int]]
|
12
|
+
|
13
|
+
|
14
|
+
class BboxInputBase64(BaseModel):
|
15
|
+
image: str
|
16
|
+
filename: str
|
17
|
+
labels: List[str]
|
18
|
+
bboxes: List[Tuple[int, int, int, int]]
|
19
|
+
|
20
|
+
|
21
|
+
class PromptTask(str, Enum):
|
22
|
+
"""
|
23
|
+
Valid task prompts options for the Florencev2 model.
|
24
|
+
"""
|
25
|
+
|
26
|
+
CAPTION = "<CAPTION>"
|
27
|
+
""""""
|
28
|
+
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
29
|
+
""""""
|
30
|
+
OBJECT_DETECTION = "<OD>"
|
31
|
+
""""""
|
32
|
+
|
33
|
+
|
34
|
+
class FineTuning(BaseModel):
|
35
|
+
model_config = ConfigDict(populate_by_name=True)
|
36
|
+
|
37
|
+
job_id: UUID = Field(alias="jobId")
|
38
|
+
|
39
|
+
@field_serializer("job_id")
|
40
|
+
def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
|
41
|
+
return str(job_id)
|
42
|
+
|
43
|
+
|
44
|
+
class Florencev2FtRequest(BaseModel):
|
45
|
+
model_config = ConfigDict(populate_by_name=True)
|
46
|
+
|
47
|
+
image: str
|
48
|
+
task: PromptTask
|
49
|
+
tool: str
|
50
|
+
prompt: Optional[str] = ""
|
51
|
+
fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
|
52
|
+
|
53
|
+
|
54
|
+
class JobStatus(str, Enum):
|
55
|
+
"""The status of a fine-tuning job.
|
56
|
+
|
57
|
+
CREATED:
|
58
|
+
The job has been created and is waiting to be scheduled to run.
|
59
|
+
STARTING:
|
60
|
+
The job has started running, but not entering the training phase.
|
61
|
+
TRAINING:
|
62
|
+
The job is training a model.
|
63
|
+
EVALUATING:
|
64
|
+
The job is evaluating the model and computing metrics.
|
65
|
+
PUBLISHING:
|
66
|
+
The job is exporting the artifact(s) to an external directory (s3 or local).
|
67
|
+
SUCCEEDED:
|
68
|
+
The job has finished, including training, evaluation and publishing the
|
69
|
+
artifact(s).
|
70
|
+
FAILED:
|
71
|
+
The job has failed for some reason internally, it can be due to resources
|
72
|
+
issues or the code itself.
|
73
|
+
STOPPED:
|
74
|
+
The job has been stopped by the use locally or in the cloud.
|
75
|
+
"""
|
76
|
+
|
77
|
+
CREATED = "CREATED"
|
78
|
+
STARTING = "STARTING"
|
79
|
+
TRAINING = "TRAINING"
|
80
|
+
EVALUATING = "EVALUATING"
|
81
|
+
PUBLISHING = "PUBLISHING"
|
82
|
+
SUCCEEDED = "SUCCEEDED"
|
83
|
+
FAILED = "FAILED"
|
84
|
+
STOPPED = "STOPPED"
|
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
|
|
49
49
|
"""
|
50
50
|
|
51
51
|
is_retryable = True
|
52
|
+
|
53
|
+
|
54
|
+
class FineTuneModelIsNotReady(Exception):
|
55
|
+
"""Exception raised when the fine-tune model is not ready.
|
56
|
+
If this is raised, it's recommended to wait 5 seconds before trying to use
|
57
|
+
the model again.
|
58
|
+
"""
|
59
|
+
|
60
|
+
|
61
|
+
class FineTuneModelNotFound(Exception):
|
62
|
+
"""Exception raised when the fine-tune model is not found.
|
63
|
+
If this is raised, it's recommended to try another model id.
|
64
|
+
"""
|
@@ -1,30 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import List, Tuple
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
5
|
-
|
6
|
-
|
7
|
-
class BboxInput(BaseModel):
|
8
|
-
image_path: str
|
9
|
-
labels: List[str]
|
10
|
-
bboxes: List[Tuple[int, int, int, int]]
|
11
|
-
|
12
|
-
|
13
|
-
class BboxInputBase64(BaseModel):
|
14
|
-
image: str
|
15
|
-
filename: str
|
16
|
-
labels: List[str]
|
17
|
-
bboxes: List[Tuple[int, int, int, int]]
|
18
|
-
|
19
|
-
|
20
|
-
class PromptTask(str, Enum):
|
21
|
-
"""
|
22
|
-
Valid task prompts options for the Florencev2 model.
|
23
|
-
"""
|
24
|
-
|
25
|
-
CAPTION = "<CAPTION>"
|
26
|
-
""""""
|
27
|
-
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
28
|
-
""""""
|
29
|
-
OBJECT_DETECTION = "<OD>"
|
30
|
-
""""""
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.109 → vision_agent-0.2.111}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|