vision-agent 0.2.152__tar.gz → 0.2.154__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.152 → vision_agent-0.2.154}/PKG-INFO +1 -1
- {vision_agent-0.2.152 → vision_agent-0.2.154}/pyproject.toml +1 -1
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/tools/meta_tools.py +4 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/tools/tool_utils.py +9 -4
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/tools/tools.py +146 -46
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/tools/tools_types.py +8 -13
- {vision_agent-0.2.152 → vision_agent-0.2.154}/LICENSE +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/README.md +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/utils/video.py +0 -0
@@ -671,6 +671,10 @@ def use_object_detection_fine_tuning(
|
|
671
671
|
r'florence2_phrase_grounding\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
|
672
672
|
lambda match: f'florence2_phrase_grounding("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
673
673
|
),
|
674
|
+
(
|
675
|
+
r'florence2_phrase_grounding_video\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
|
676
|
+
lambda match: f'florence2_phrase_grounding_video("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
677
|
+
),
|
674
678
|
(
|
675
679
|
r'owl_v2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
|
676
680
|
lambda match: f'owl_v2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
@@ -1,6 +1,6 @@
|
|
1
|
+
import os
|
1
2
|
import inspect
|
2
3
|
import logging
|
3
|
-
import os
|
4
4
|
from base64 import b64encode
|
5
5
|
from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple
|
6
6
|
|
@@ -37,8 +37,9 @@ def send_inference_request(
|
|
37
37
|
files: Optional[List[Tuple[Any, ...]]] = None,
|
38
38
|
v2: bool = False,
|
39
39
|
metadata_payload: Optional[Dict[str, Any]] = None,
|
40
|
+
is_form: bool = False,
|
40
41
|
) -> Any:
|
41
|
-
# TODO: runtime_tag and function_name should be metadata_payload and
|
42
|
+
# TODO: runtime_tag and function_name should be metadata_payload and not included
|
42
43
|
# in the service payload
|
43
44
|
if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
|
44
45
|
payload["runtime_tag"] = runtime_tag
|
@@ -64,7 +65,7 @@ def send_inference_request(
|
|
64
65
|
elif metadata_payload is not None and "function_name" in metadata_payload:
|
65
66
|
function_name = metadata_payload["function_name"]
|
66
67
|
|
67
|
-
response = _call_post(url, payload, session, files, function_name)
|
68
|
+
response = _call_post(url, payload, session, files, function_name, is_form)
|
68
69
|
|
69
70
|
# TODO: consider making the response schema the same between below two sources
|
70
71
|
return response if "TOOL_ENDPOINT_AUTH" in os.environ else response["data"]
|
@@ -75,6 +76,7 @@ def send_task_inference_request(
|
|
75
76
|
task_name: str,
|
76
77
|
files: Optional[List[Tuple[Any, ...]]] = None,
|
77
78
|
metadata: Optional[Dict[str, Any]] = None,
|
79
|
+
is_form: bool = False,
|
78
80
|
) -> Any:
|
79
81
|
url = f"{_LND_API_URL_v2}/{task_name}"
|
80
82
|
headers = {"apikey": _LND_API_KEY}
|
@@ -87,7 +89,7 @@ def send_task_inference_request(
|
|
87
89
|
function_name = "unknown"
|
88
90
|
if metadata is not None and "function_name" in metadata:
|
89
91
|
function_name = metadata["function_name"]
|
90
|
-
response = _call_post(url, payload, session, files, function_name)
|
92
|
+
response = _call_post(url, payload, session, files, function_name, is_form)
|
91
93
|
return response["data"]
|
92
94
|
|
93
95
|
|
@@ -203,6 +205,7 @@ def _call_post(
|
|
203
205
|
session: Session,
|
204
206
|
files: Optional[List[Tuple[Any, ...]]] = None,
|
205
207
|
function_name: str = "unknown",
|
208
|
+
is_form: bool = False,
|
206
209
|
) -> Any:
|
207
210
|
files_in_b64 = None
|
208
211
|
if files:
|
@@ -210,6 +213,8 @@ def _call_post(
|
|
210
213
|
try:
|
211
214
|
if files is not None:
|
212
215
|
response = session.post(url, data=payload, files=files)
|
216
|
+
elif is_form:
|
217
|
+
response = session.post(url, data=payload)
|
213
218
|
else:
|
214
219
|
response = session.post(url, json=payload)
|
215
220
|
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import base64
|
1
2
|
import io
|
2
3
|
import json
|
3
4
|
import logging
|
@@ -28,7 +29,6 @@ from vision_agent.tools.tool_utils import (
|
|
28
29
|
send_task_inference_request,
|
29
30
|
)
|
30
31
|
from vision_agent.tools.tools_types import (
|
31
|
-
FineTuning,
|
32
32
|
Florence2FtRequest,
|
33
33
|
JobStatus,
|
34
34
|
ODResponseData,
|
@@ -194,20 +194,26 @@ def owl_v2_image(
|
|
194
194
|
data_obj = Florence2FtRequest(
|
195
195
|
image=image_b64,
|
196
196
|
task=PromptTask.PHRASE_GROUNDING,
|
197
|
-
tool="florencev2_fine_tuning",
|
198
197
|
prompt=prompt,
|
199
|
-
|
198
|
+
job_id=UUID(fine_tune_id),
|
200
199
|
)
|
201
|
-
data = data_obj.model_dump(by_alias=True)
|
202
|
-
detections = send_inference_request(
|
203
|
-
|
200
|
+
data = data_obj.model_dump(by_alias=True, exclude_none=True)
|
201
|
+
detections = send_inference_request(
|
202
|
+
data,
|
203
|
+
"florence2-ft",
|
204
|
+
v2=True,
|
205
|
+
is_form=True,
|
206
|
+
metadata_payload={"function_name": "owl_v2_image"},
|
207
|
+
)
|
208
|
+
# get the first frame
|
209
|
+
detection = detections[0]
|
204
210
|
bboxes_formatted = [
|
205
211
|
ODResponseData(
|
206
|
-
label=
|
207
|
-
bbox=normalize_bbox(
|
212
|
+
label=detection["labels"][i],
|
213
|
+
bbox=normalize_bbox(detection["bboxes"][i], image_size),
|
208
214
|
score=1.0,
|
209
215
|
)
|
210
|
-
for i in range(len(
|
216
|
+
for i in range(len(detection["bboxes"]))
|
211
217
|
]
|
212
218
|
return [bbox.model_dump() for bbox in bboxes_formatted]
|
213
219
|
|
@@ -419,25 +425,30 @@ def florence2_sam2_image(
|
|
419
425
|
req_data_obj = Florence2FtRequest(
|
420
426
|
image=image_b64,
|
421
427
|
task=PromptTask.PHRASE_GROUNDING,
|
422
|
-
tool="florencev2_fine_tuning",
|
423
428
|
prompt=prompt,
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
429
|
+
postprocessing="sam2",
|
430
|
+
job_id=UUID(fine_tune_id),
|
431
|
+
)
|
432
|
+
req_data = req_data_obj.model_dump(by_alias=True, exclude_none=True)
|
433
|
+
detections_ft = send_inference_request(
|
434
|
+
req_data,
|
435
|
+
"florence2-ft",
|
436
|
+
v2=True,
|
437
|
+
is_form=True,
|
438
|
+
metadata_payload={"function_name": "florence2_sam2_image"},
|
428
439
|
)
|
429
|
-
|
430
|
-
|
431
|
-
detections_ft = detections_ft["<CAPTION_TO_PHRASE_GROUNDING>"]
|
440
|
+
# get the first frame
|
441
|
+
detection = detections_ft[0]
|
432
442
|
return_data = []
|
433
|
-
|
434
|
-
for i in range(len(detections_ft["bboxes"])):
|
443
|
+
for i in range(len(detection["bboxes"])):
|
435
444
|
return_data.append(
|
436
445
|
{
|
437
446
|
"score": 1.0,
|
438
|
-
"label":
|
439
|
-
"bbox":
|
440
|
-
|
447
|
+
"label": detection["labels"][i],
|
448
|
+
"bbox": normalize_bbox(
|
449
|
+
detection["bboxes"][i], detection["masks"][i]["size"]
|
450
|
+
),
|
451
|
+
"mask": rle_decode_array(detection["masks"][i]),
|
441
452
|
}
|
442
453
|
)
|
443
454
|
return return_data
|
@@ -451,6 +462,7 @@ def florence2_sam2_image(
|
|
451
462
|
detections: Dict[str, Any] = send_inference_request(
|
452
463
|
payload, "florence2-sam2", files=files, v2=True
|
453
464
|
)
|
465
|
+
|
454
466
|
return_data = []
|
455
467
|
for _, data_i in detections["0"].items():
|
456
468
|
mask = rle_decode_array(data_i["mask"])
|
@@ -688,22 +700,18 @@ def countgd_counting(
|
|
688
700
|
{'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
|
689
701
|
]
|
690
702
|
"""
|
691
|
-
|
692
|
-
files = [("image", buffer_bytes)]
|
703
|
+
image_b64 = convert_to_b64(image)
|
693
704
|
prompt = prompt.replace(", ", " .")
|
694
|
-
payload = {"
|
705
|
+
payload = {"prompt": prompt, "image": image_b64}
|
695
706
|
metadata = {"function_name": "countgd_counting"}
|
696
|
-
resp_data = send_task_inference_request(
|
697
|
-
payload, "text-to-object-detection", files=files, metadata=metadata
|
698
|
-
)
|
699
|
-
bboxes_per_frame = resp_data[0]
|
707
|
+
resp_data = send_task_inference_request(payload, "countgd", metadata=metadata)
|
700
708
|
bboxes_formatted = [
|
701
709
|
ODResponseData(
|
702
710
|
label=bbox["label"],
|
703
|
-
bbox=list(map(lambda x: round(x, 2), bbox["
|
711
|
+
bbox=list(map(lambda x: round(x, 2), bbox["bbox"])),
|
704
712
|
score=round(bbox["score"], 2),
|
705
713
|
)
|
706
|
-
for bbox in
|
714
|
+
for bbox in resp_data
|
707
715
|
]
|
708
716
|
filtered_bboxes = filter_bboxes_by_threshold(bboxes_formatted, box_threshold)
|
709
717
|
return [bbox.model_dump() for bbox in filtered_bboxes]
|
@@ -887,7 +895,10 @@ def ixc25_temporal_localization(prompt: str, frames: List[np.ndarray]) -> List[b
|
|
887
895
|
"function_name": "ixc25_temporal_localization",
|
888
896
|
}
|
889
897
|
data: List[int] = send_inference_request(
|
890
|
-
payload,
|
898
|
+
payload,
|
899
|
+
"video-temporal-localization?model=internlm-xcomposer",
|
900
|
+
files=files,
|
901
|
+
v2=True,
|
891
902
|
)
|
892
903
|
chunk_size = round(len(frames) / len(data))
|
893
904
|
data_explode = [[elt] * chunk_size for elt in data]
|
@@ -1135,10 +1146,10 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s
|
|
1135
1146
|
def florence2_phrase_grounding(
|
1136
1147
|
prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
|
1137
1148
|
) -> List[Dict[str, Any]]:
|
1138
|
-
"""'florence2_phrase_grounding'
|
1139
|
-
objects given a text prompt which can be object names or caption.
|
1140
|
-
can optionally separate the object names in the text with commas. It returns
|
1141
|
-
of bounding boxes with normalized coordinates, label names and associated
|
1149
|
+
"""'florence2_phrase_grounding' will run florence2 on a image. It can
|
1150
|
+
detect multiple objects given a text prompt which can be object names or caption.
|
1151
|
+
You can optionally separate the object names in the text with commas. It returns
|
1152
|
+
a list of bounding boxes with normalized coordinates, label names and associated
|
1142
1153
|
probability scores of 1.0.
|
1143
1154
|
|
1144
1155
|
Parameters:
|
@@ -1176,17 +1187,19 @@ def florence2_phrase_grounding(
|
|
1176
1187
|
data_obj = Florence2FtRequest(
|
1177
1188
|
image=image_b64,
|
1178
1189
|
task=PromptTask.PHRASE_GROUNDING,
|
1179
|
-
tool="florencev2_fine_tuning",
|
1180
1190
|
prompt=prompt,
|
1181
|
-
|
1191
|
+
job_id=UUID(fine_tune_id),
|
1182
1192
|
)
|
1183
|
-
data = data_obj.model_dump(by_alias=True)
|
1193
|
+
data = data_obj.model_dump(by_alias=True, exclude_none=True)
|
1184
1194
|
detections = send_inference_request(
|
1185
1195
|
data,
|
1186
|
-
"
|
1187
|
-
v2=
|
1196
|
+
"florence2-ft",
|
1197
|
+
v2=True,
|
1198
|
+
is_form=True,
|
1188
1199
|
metadata_payload={"function_name": "florence2_phrase_grounding"},
|
1189
1200
|
)
|
1201
|
+
# get the first frame
|
1202
|
+
detection = detections[0]
|
1190
1203
|
else:
|
1191
1204
|
data = {
|
1192
1205
|
"image": image_b64,
|
@@ -1195,20 +1208,107 @@ def florence2_phrase_grounding(
|
|
1195
1208
|
"function_name": "florence2_phrase_grounding",
|
1196
1209
|
}
|
1197
1210
|
detections = send_inference_request(data, "florence2", v2=True)
|
1211
|
+
detection = detections["<CAPTION_TO_PHRASE_GROUNDING>"]
|
1198
1212
|
|
1199
|
-
detections = detections["<CAPTION_TO_PHRASE_GROUNDING>"]
|
1200
1213
|
return_data = []
|
1201
|
-
for i in range(len(
|
1214
|
+
for i in range(len(detection["bboxes"])):
|
1202
1215
|
return_data.append(
|
1203
1216
|
ODResponseData(
|
1204
|
-
label=
|
1205
|
-
bbox=normalize_bbox(
|
1217
|
+
label=detection["labels"][i],
|
1218
|
+
bbox=normalize_bbox(detection["bboxes"][i], image_size),
|
1206
1219
|
score=1.0,
|
1207
1220
|
)
|
1208
1221
|
)
|
1209
1222
|
return [bbox.model_dump() for bbox in return_data]
|
1210
1223
|
|
1211
1224
|
|
1225
|
+
def florence2_phrase_grounding_video(
|
1226
|
+
prompt: str, frames: List[np.ndarray], fine_tune_id: Optional[str] = None
|
1227
|
+
) -> List[List[Dict[str, Any]]]:
|
1228
|
+
"""'florence2_phrase_grounding_video' will run florence2 on each frame of a video.
|
1229
|
+
It can detect multiple objects given a text prompt which can be object names or
|
1230
|
+
caption. You can optionally separate the object names in the text with commas.
|
1231
|
+
It returns a list of lists where each inner list contains bounding boxes with
|
1232
|
+
normalized coordinates, label names and associated probability scores of 1.0.
|
1233
|
+
|
1234
|
+
Parameters:
|
1235
|
+
prompt (str): The prompt to ground to the video.
|
1236
|
+
frames (List[np.ndarray]): The list of frames to detect objects.
|
1237
|
+
fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
|
1238
|
+
fine-tuned model ID here to use it.
|
1239
|
+
|
1240
|
+
Returns:
|
1241
|
+
List[List[Dict[str, Any]]]: A list of lists of dictionaries containing the score,
|
1242
|
+
label, and bounding box of the detected objects with normalized coordinates
|
1243
|
+
between 0 and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates
|
1244
|
+
of the top-left and xmax and ymax are the coordinates of the bottom-right of
|
1245
|
+
the bounding box. The scores are always 1.0 and cannot be thresholded.
|
1246
|
+
|
1247
|
+
Example
|
1248
|
+
-------
|
1249
|
+
>>> florence2_phrase_grounding_video('person looking at a coyote', frames)
|
1250
|
+
[
|
1251
|
+
[
|
1252
|
+
{'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
1253
|
+
{'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
|
1254
|
+
],
|
1255
|
+
...
|
1256
|
+
]
|
1257
|
+
"""
|
1258
|
+
if len(frames) == 0:
|
1259
|
+
raise ValueError("No frames provided")
|
1260
|
+
|
1261
|
+
image_size = frames[0].shape[:2]
|
1262
|
+
buffer_bytes = frames_to_bytes(frames)
|
1263
|
+
files = [("video", buffer_bytes)]
|
1264
|
+
|
1265
|
+
if fine_tune_id is not None:
|
1266
|
+
landing_api = LandingPublicAPI()
|
1267
|
+
status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
|
1268
|
+
if status is not JobStatus.SUCCEEDED:
|
1269
|
+
raise FineTuneModelIsNotReady(
|
1270
|
+
f"Fine-tuned model {fine_tune_id} is not ready yet"
|
1271
|
+
)
|
1272
|
+
|
1273
|
+
data_obj = Florence2FtRequest(
|
1274
|
+
task=PromptTask.PHRASE_GROUNDING,
|
1275
|
+
prompt=prompt,
|
1276
|
+
job_id=UUID(fine_tune_id),
|
1277
|
+
)
|
1278
|
+
|
1279
|
+
data = data_obj.model_dump(by_alias=True, exclude_none=True, mode="json")
|
1280
|
+
detections = send_inference_request(
|
1281
|
+
data,
|
1282
|
+
"florence2-ft",
|
1283
|
+
v2=True,
|
1284
|
+
files=files,
|
1285
|
+
metadata_payload={"function_name": "florence2_phrase_grounding_video"},
|
1286
|
+
)
|
1287
|
+
else:
|
1288
|
+
data = {
|
1289
|
+
"prompt": prompt,
|
1290
|
+
"task": "<CAPTION_TO_PHRASE_GROUNDING>",
|
1291
|
+
"function_name": "florence2_phrase_grounding_video",
|
1292
|
+
"video": base64.b64encode(buffer_bytes).decode("utf-8"),
|
1293
|
+
}
|
1294
|
+
detections = send_inference_request(data, "florence2", v2=True)
|
1295
|
+
detections = [d["<CAPTION_TO_PHRASE_GROUNDING>"] for d in detections]
|
1296
|
+
|
1297
|
+
bboxes_formatted = []
|
1298
|
+
for frame_data in detections:
|
1299
|
+
bboxes_formatted_per_frame = []
|
1300
|
+
for idx in range(len(frame_data["bboxes"])):
|
1301
|
+
bboxes_formatted_per_frame.append(
|
1302
|
+
ODResponseData(
|
1303
|
+
label=frame_data["labels"][idx],
|
1304
|
+
bbox=normalize_bbox(frame_data["bboxes"][idx], image_size),
|
1305
|
+
score=1.0,
|
1306
|
+
)
|
1307
|
+
)
|
1308
|
+
bboxes_formatted.append(bboxes_formatted_per_frame)
|
1309
|
+
return [[bbox.model_dump() for bbox in frame] for frame in bboxes_formatted]
|
1310
|
+
|
1311
|
+
|
1212
1312
|
def florence2_ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
1213
1313
|
"""'florence2_ocr' is a tool that can detect text and text regions in an image.
|
1214
1314
|
Each text region contains one line of text. It returns a list of detected text,
|
@@ -1220,7 +1320,7 @@ def florence2_ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
1220
1320
|
|
1221
1321
|
Returns:
|
1222
1322
|
List[Dict[str, Any]]: A list of dictionaries containing the detected text, bbox
|
1223
|
-
with
|
1323
|
+
with normalized coordinates, and confidence score.
|
1224
1324
|
|
1225
1325
|
Example
|
1226
1326
|
-------
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from enum import Enum
|
2
|
-
from typing import List, Optional, Tuple, Union
|
3
2
|
from uuid import UUID
|
3
|
+
from typing import List, Optional, Tuple, Union
|
4
4
|
|
5
5
|
from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
|
6
6
|
|
@@ -24,27 +24,22 @@ class PromptTask(str, Enum):
|
|
24
24
|
PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
25
25
|
|
26
26
|
|
27
|
-
class
|
27
|
+
class Florence2FtRequest(BaseModel):
|
28
28
|
model_config = ConfigDict(populate_by_name=True)
|
29
29
|
|
30
|
-
|
30
|
+
image: Optional[str] = None
|
31
|
+
video: Optional[bytes] = None
|
32
|
+
task: PromptTask
|
33
|
+
prompt: Optional[str] = ""
|
34
|
+
chunk_length_frames: Optional[int] = None
|
31
35
|
postprocessing: Optional[str] = None
|
36
|
+
job_id: Optional[UUID] = Field(None, alias="jobId")
|
32
37
|
|
33
38
|
@field_serializer("job_id")
|
34
39
|
def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
|
35
40
|
return str(job_id)
|
36
41
|
|
37
42
|
|
38
|
-
class Florence2FtRequest(BaseModel):
|
39
|
-
model_config = ConfigDict(populate_by_name=True)
|
40
|
-
|
41
|
-
image: str
|
42
|
-
task: PromptTask
|
43
|
-
tool: str
|
44
|
-
prompt: Optional[str] = ""
|
45
|
-
fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
|
46
|
-
|
47
|
-
|
48
43
|
class JobStatus(str, Enum):
|
49
44
|
"""The status of a fine-tuning job.
|
50
45
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.152 → vision_agent-0.2.154}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|