vision-agent 0.2.111__tar.gz → 0.2.113__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.111 → vision_agent-0.2.113}/PKG-INFO +1 -1
- {vision_agent-0.2.111 → vision_agent-0.2.113}/pyproject.toml +1 -1
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/agent_utils.py +3 -8
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent_coder.py +39 -23
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent_coder_prompts.py +9 -7
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent_prompts.py +11 -10
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/__init__.py +12 -7
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/meta_tools.py +0 -1
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/tool_utils.py +33 -3
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/tools.py +357 -111
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/execute.py +0 -1
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/image_utils.py +52 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/LICENSE +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/README.md +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/clients/landing_public_api.py +1 -1
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/video.py +0 -0
@@ -4,14 +4,13 @@ import sys
|
|
4
4
|
from typing import Any, Dict
|
5
5
|
|
6
6
|
logging.basicConfig(stream=sys.stdout)
|
7
|
-
_LOGGER = logging.getLogger(__name__)
|
8
7
|
|
9
8
|
|
10
9
|
def extract_json(json_str: str) -> Dict[str, Any]:
|
11
10
|
try:
|
11
|
+
json_str = json_str.replace("\n", " ")
|
12
12
|
json_dict = json.loads(json_str)
|
13
13
|
except json.JSONDecodeError:
|
14
|
-
input_json_str = json_str
|
15
14
|
if "```json" in json_str:
|
16
15
|
json_str = json_str[json_str.find("```json") + len("```json") :]
|
17
16
|
json_str = json_str[: json_str.find("```")]
|
@@ -19,12 +18,8 @@ def extract_json(json_str: str) -> Dict[str, Any]:
|
|
19
18
|
json_str = json_str[json_str.find("```") + len("```") :]
|
20
19
|
# get the last ``` not one from an intermediate string
|
21
20
|
json_str = json_str[: json_str.find("}```")]
|
22
|
-
|
23
|
-
|
24
|
-
except json.JSONDecodeError as e:
|
25
|
-
error_msg = f"Could not extract JSON from the given str: {json_str}.\nFunction input:\n{input_json_str}"
|
26
|
-
_LOGGER.exception(error_msg)
|
27
|
-
raise ValueError(error_msg) from e
|
21
|
+
|
22
|
+
json_dict = json.loads(json_str)
|
28
23
|
return json_dict # type: ignore
|
29
24
|
|
30
25
|
|
@@ -4,6 +4,7 @@ import logging
|
|
4
4
|
import os
|
5
5
|
import sys
|
6
6
|
import tempfile
|
7
|
+
from json import JSONDecodeError
|
7
8
|
from pathlib import Path
|
8
9
|
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast
|
9
10
|
|
@@ -86,8 +87,8 @@ def format_memory(memory: List[Dict[str, str]]) -> str:
|
|
86
87
|
def format_plans(plans: Dict[str, Any]) -> str:
|
87
88
|
plan_str = ""
|
88
89
|
for k, v in plans.items():
|
89
|
-
plan_str += f"{k}
|
90
|
-
plan_str += "-" + "\n-".join([e
|
90
|
+
plan_str += "\n" + f"{k}: {v['thoughts']}\n"
|
91
|
+
plan_str += " -" + "\n -".join([e for e in v["instructions"]])
|
91
92
|
|
92
93
|
return plan_str
|
93
94
|
|
@@ -127,7 +128,11 @@ def write_plans(
|
|
127
128
|
|
128
129
|
user_request = chat[-1]["content"]
|
129
130
|
context = USER_REQ.format(user_request=user_request)
|
130
|
-
prompt = PLAN.format(
|
131
|
+
prompt = PLAN.format(
|
132
|
+
context=context,
|
133
|
+
tool_desc=tool_desc,
|
134
|
+
feedback=working_memory,
|
135
|
+
)
|
131
136
|
chat[-1]["content"] = prompt
|
132
137
|
return extract_json(model(chat, stream=False)) # type: ignore
|
133
138
|
|
@@ -228,13 +233,11 @@ def pick_plan(
|
|
228
233
|
"status": "completed" if tool_output.success else "failed",
|
229
234
|
}
|
230
235
|
)
|
231
|
-
tool_output_str =
|
232
|
-
if len(tool_output.logs.stdout) > 0:
|
233
|
-
tool_output_str = tool_output.logs.stdout[0]
|
236
|
+
tool_output_str = tool_output.text().strip()
|
234
237
|
|
235
238
|
if verbosity == 2:
|
236
239
|
_print_code("Code and test after attempted fix:", code)
|
237
|
-
_LOGGER.info(f"Code execution result after
|
240
|
+
_LOGGER.info(f"Code execution result after attempt {count}")
|
238
241
|
|
239
242
|
count += 1
|
240
243
|
|
@@ -251,7 +254,21 @@ def pick_plan(
|
|
251
254
|
tool_output=tool_output_str[:20_000],
|
252
255
|
)
|
253
256
|
chat[-1]["content"] = prompt
|
254
|
-
|
257
|
+
|
258
|
+
count = 0
|
259
|
+
best_plan = None
|
260
|
+
while best_plan is None and count < max_retries:
|
261
|
+
try:
|
262
|
+
best_plan = extract_json(model(chat, stream=False)) # type: ignore
|
263
|
+
except JSONDecodeError as e:
|
264
|
+
_LOGGER.exception(
|
265
|
+
f"Error while extracting JSON during picking best plan {str(e)}"
|
266
|
+
)
|
267
|
+
pass
|
268
|
+
count += 1
|
269
|
+
|
270
|
+
if best_plan is None:
|
271
|
+
best_plan = {"best_plan": list(plans.keys())[0]}
|
255
272
|
|
256
273
|
if verbosity >= 1:
|
257
274
|
_LOGGER.info(f"Best plan:\n{best_plan}")
|
@@ -525,7 +542,7 @@ def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
|
|
525
542
|
|
526
543
|
|
527
544
|
def retrieve_tools(
|
528
|
-
plans: Dict[str,
|
545
|
+
plans: Dict[str, Dict[str, Any]],
|
529
546
|
tool_recommender: Sim,
|
530
547
|
log_progress: Callable[[Dict[str, Any]], None],
|
531
548
|
verbosity: int = 0,
|
@@ -542,8 +559,8 @@ def retrieve_tools(
|
|
542
559
|
tool_lists: Dict[str, List[Dict[str, str]]] = {}
|
543
560
|
for k, plan in plans.items():
|
544
561
|
tool_lists[k] = []
|
545
|
-
for task in plan:
|
546
|
-
tools = tool_recommender.top_k(task
|
562
|
+
for task in plan["instructions"]:
|
563
|
+
tools = tool_recommender.top_k(task, k=2, thresh=0.3)
|
547
564
|
tool_info.extend([e["doc"] for e in tools])
|
548
565
|
tool_desc.extend([e["desc"] for e in tools])
|
549
566
|
tool_lists[k].extend(
|
@@ -661,6 +678,7 @@ class VisionAgentCoder(Agent):
|
|
661
678
|
chat: List[Message],
|
662
679
|
test_multi_plan: bool = True,
|
663
680
|
display_visualization: bool = False,
|
681
|
+
customized_tool_names: Optional[List[str]] = None,
|
664
682
|
) -> Dict[str, Any]:
|
665
683
|
"""Chat with VisionAgentCoder and return intermediate information regarding the
|
666
684
|
task.
|
@@ -676,6 +694,8 @@ class VisionAgentCoder(Agent):
|
|
676
694
|
with the first plan.
|
677
695
|
display_visualization (bool): If True, it opens a new window locally to
|
678
696
|
show the image(s) created by visualization code (if there is any).
|
697
|
+
customized_tool_names (List[str]): A list of customized tools for agent to pick and use.
|
698
|
+
If not provided, default to full tool set from vision_agent.tools.
|
679
699
|
|
680
700
|
Returns:
|
681
701
|
Dict[str, Any]: A dictionary containing the code, test, test result, plan,
|
@@ -729,7 +749,9 @@ class VisionAgentCoder(Agent):
|
|
729
749
|
)
|
730
750
|
plans = write_plans(
|
731
751
|
int_chat,
|
732
|
-
T.
|
752
|
+
T.get_tool_descriptions_by_names(
|
753
|
+
customized_tool_names, T.FUNCTION_TOOLS, T.UTIL_TOOLS # type: ignore
|
754
|
+
),
|
733
755
|
format_memory(working_memory),
|
734
756
|
self.planner,
|
735
757
|
)
|
@@ -737,18 +759,10 @@ class VisionAgentCoder(Agent):
|
|
737
759
|
if self.verbosity >= 1:
|
738
760
|
for p in plans:
|
739
761
|
# tabulate will fail if the keys are not the same for all elements
|
740
|
-
p_fixed = [
|
741
|
-
{
|
742
|
-
"instructions": (
|
743
|
-
e["instructions"] if "instructions" in e else ""
|
744
|
-
)
|
745
|
-
}
|
746
|
-
for e in plans[p]
|
747
|
-
]
|
762
|
+
p_fixed = [{"instructions": e} for e in plans[p]["instructions"]]
|
748
763
|
_LOGGER.info(
|
749
764
|
f"\n{tabulate(tabular_data=p_fixed, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
|
750
765
|
)
|
751
|
-
|
752
766
|
tool_infos = retrieve_tools(
|
753
767
|
plans,
|
754
768
|
self.tool_recommender,
|
@@ -793,13 +807,15 @@ class VisionAgentCoder(Agent):
|
|
793
807
|
)
|
794
808
|
|
795
809
|
if self.verbosity >= 1:
|
810
|
+
plan_i_fixed = [{"instructions": e} for e in plan_i["instructions"]]
|
796
811
|
_LOGGER.info(
|
797
|
-
f"Picked best plan:\n{tabulate(tabular_data=
|
812
|
+
f"Picked best plan:\n{tabulate(tabular_data=plan_i_fixed, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
|
798
813
|
)
|
799
814
|
|
800
815
|
results = write_and_test_code(
|
801
816
|
chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
|
802
|
-
plan="\n
|
817
|
+
plan=f"\n{plan_i['thoughts']}\n-"
|
818
|
+
+ "\n-".join([e for e in plan_i["instructions"]]),
|
803
819
|
tool_info=tool_info,
|
804
820
|
tool_output=tool_output_str,
|
805
821
|
tool_utils=T.UTILITIES_DOCSTRING,
|
{vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
@@ -30,18 +30,19 @@ PLAN = """
|
|
30
30
|
|
31
31
|
**Instructions**:
|
32
32
|
1. Based on the context and tools you have available, create a plan of subtasks to achieve the user request.
|
33
|
-
2. Output three different plans each utilize a different strategy or
|
33
|
+
2. Output three different plans each utilize a different strategy or set of tools.
|
34
34
|
|
35
35
|
Output a list of jsons in the following format
|
36
36
|
|
37
37
|
```json
|
38
38
|
{{
|
39
39
|
"plan1":
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
40
|
+
{{
|
41
|
+
"thoughts": str # your thought process for choosing this plan
|
42
|
+
"instructions": [
|
43
|
+
str # what you should do in this task associated with a tool
|
44
|
+
]
|
45
|
+
}},
|
45
46
|
"plan2": ...,
|
46
47
|
"plan3": ...
|
47
48
|
}}
|
@@ -127,7 +128,8 @@ PICK_PLAN = """
|
|
127
128
|
|
128
129
|
**Instructions**:
|
129
130
|
1. Given the plans, image, and tool outputs, decide which plan is the best to achieve the user request.
|
130
|
-
2.
|
131
|
+
2. Try solving the problem yourself given the image and pick the plan that matches your solution the best.
|
132
|
+
3. Output a JSON object with the following format:
|
131
133
|
{{
|
132
134
|
"thoughts": str # your thought process for choosing the best plan
|
133
135
|
"best_plan": str # the best plan you have chosen
|
@@ -15,7 +15,7 @@ This is the documentation for the different actions you can take:
|
|
15
15
|
**Examples**:
|
16
16
|
Here is an example of how you can interact with a user and Actions to complete a task:
|
17
17
|
--- START EXAMPLES ---
|
18
|
-
[Current directory: /workspace
|
18
|
+
[Current directory: /example/workspace]
|
19
19
|
{examples}
|
20
20
|
--- END EXAMPLES ---
|
21
21
|
|
@@ -27,16 +27,17 @@ Here is an example of how you can interact with a user and Actions to complete a
|
|
27
27
|
Here is the current conversation so far:
|
28
28
|
--- START CONVERSATION ---
|
29
29
|
[Current directory: {dir}]
|
30
|
+
|
30
31
|
{conversation}
|
31
32
|
"""
|
32
33
|
|
33
34
|
EXAMPLES_CODE1 = """
|
34
35
|
USER: Can you detect the dogs in this image? Media name dog.jpg
|
35
36
|
|
36
|
-
AGENT: {"thoughts": "I will use the generate_vision_code to detect the dogs in the image.", "response": "<execute_python>generate_vision_code('/workspace/
|
37
|
+
AGENT: {"thoughts": "I will use the generate_vision_code to detect the dogs in the image.", "response": "<execute_python>generate_vision_code('/example/workspace/dog_detector.py', 'Can you write code to detect dogs in this image?', media=['/example/workspace/dog.jpg'])</execute_python>", "let_user_respond": false}
|
37
38
|
|
38
39
|
OBSERVATION:
|
39
|
-
[File /workspace/
|
40
|
+
[File /example/workspace/dog_detector.py]
|
40
41
|
0|from vision_agent.tools import load_image, owl_v2
|
41
42
|
1|def detect_dogs(image_path: str):
|
42
43
|
2| image = load_image(image_path)
|
@@ -44,7 +45,7 @@ OBSERVATION:
|
|
44
45
|
4| return dogs
|
45
46
|
[End of file]
|
46
47
|
|
47
|
-
AGENT: {"thoughts": "I have generated the code to detect the dogs in the image, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/workspace/
|
48
|
+
AGENT: {"thoughts": "I have generated the code to detect the dogs in the image, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/example/workspace/dog.jpg'))</execute_python>", "let_user_respond": false}
|
48
49
|
|
49
50
|
OBSERVATION:
|
50
51
|
----- stdout -----
|
@@ -55,10 +56,10 @@ AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask
|
|
55
56
|
|
56
57
|
USER: The the image only has one dog, can you fix this?
|
57
58
|
|
58
|
-
AGENT: {"thoughts": "I will use edit_vision_code to fix the code to detect only one dog. The lower score is 0.23 so I will set the threshold above that.", "response": "<execute_python>edit_vision_code('/workspace/
|
59
|
+
AGENT: {"thoughts": "I will use edit_vision_code to fix the code to detect only one dog. The lower score is 0.23 so I will set the threshold above that.", "response": "<execute_python>edit_vision_code('/example/workspace/dog_detector.py', ['Can you write code to detect dogs in this image?', 'Can you increase the threshold to 0.24?'], media=['/example/workspace/dog.jpg'])</execute_python>", "let_user_respond": false}
|
59
60
|
|
60
61
|
OBSERVATION:
|
61
|
-
[File /workspace/
|
62
|
+
[File /example/workspace/dog_detector.py]
|
62
63
|
0|from vision_agent.tools import load_image, owl_v2
|
63
64
|
1|def detect_dogs(image_path: str):
|
64
65
|
2| image = load_image(image_path)
|
@@ -66,7 +67,7 @@ OBSERVATION:
|
|
66
67
|
4| return dogs
|
67
68
|
[End of file]
|
68
69
|
|
69
|
-
AGENT: {"thoughts": "I have edited the code to detect only one dog, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/workspace/
|
70
|
+
AGENT: {"thoughts": "I have edited the code to detect only one dog, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/example/workspace/dog.jpg'))</execute_python>", "let_user_respond": false}
|
70
71
|
|
71
72
|
OBSERVATION:
|
72
73
|
----- stdout -----
|
@@ -82,10 +83,10 @@ AGENT: {"thoughts": "The user has asked to count workers with helmets but has no
|
|
82
83
|
|
83
84
|
USER: Yes you can use workers.png
|
84
85
|
|
85
|
-
AGENT: {"thoughts": "I will use the generate_vision_code to count the workers with helmets in the image.", "response": "<execute_python>generate_vision_code('/workspace/
|
86
|
+
AGENT: {"thoughts": "I will use the generate_vision_code to count the workers with helmets in the image.", "response": "<execute_python>generate_vision_code('/example/workspace/code.py', 'Can you write code to count workers with helmets in this image?', media=['/example/workspace/workers.png'])</execute_python>", "let_user_respond": false}
|
86
87
|
|
87
88
|
OBSERVATION:
|
88
|
-
[File /workspace/
|
89
|
+
[File /example/workspace/code.py]
|
89
90
|
0|from vision_agent.tools import load_image, owl_v2, closest_box_distance
|
90
91
|
1|def count_workers_with_helmets(image_path: str):
|
91
92
|
2| image = load_image(image_path)
|
@@ -104,7 +105,7 @@ OBSERVATION:
|
|
104
105
|
15| return count
|
105
106
|
[End of file]
|
106
107
|
|
107
|
-
AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code to get the output.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/workspace/
|
108
|
+
AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code to get the output.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/example/workspace/workers.png'))</execute_python>", "let_user_respond": false}
|
108
109
|
|
109
110
|
OBSERVATION:
|
110
111
|
----- stdout -----
|
@@ -1,15 +1,16 @@
|
|
1
1
|
from typing import Callable, List, Optional
|
2
2
|
|
3
|
-
from .meta_tools import
|
4
|
-
META_TOOL_DOCSTRING,
|
5
|
-
)
|
3
|
+
from .meta_tools import META_TOOL_DOCSTRING
|
6
4
|
from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
|
5
|
+
from .tool_utils import get_tool_descriptions_by_names
|
7
6
|
from .tools import (
|
7
|
+
FUNCTION_TOOLS,
|
8
8
|
TOOL_DESCRIPTIONS,
|
9
9
|
TOOL_DOCSTRING,
|
10
10
|
TOOLS,
|
11
11
|
TOOLS_DF,
|
12
12
|
TOOLS_INFO,
|
13
|
+
UTIL_TOOLS,
|
13
14
|
UTILITIES_DOCSTRING,
|
14
15
|
blip_image_caption,
|
15
16
|
clip,
|
@@ -19,16 +20,20 @@ from .tools import (
|
|
19
20
|
detr_segmentation,
|
20
21
|
dpt_hybrid_midas,
|
21
22
|
extract_frames,
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
florence2_image_caption,
|
24
|
+
florence2_object_detection,
|
25
|
+
florence2_ocr,
|
26
|
+
florence2_roberta_vqa,
|
27
|
+
florence2_sam2_image,
|
28
|
+
florence2_sam2_video,
|
26
29
|
generate_pose_image,
|
27
30
|
generate_soft_edge_image,
|
28
31
|
get_tool_documentation,
|
29
32
|
git_vqa_v2,
|
30
33
|
grounding_dino,
|
31
34
|
grounding_sam,
|
35
|
+
ixc25_image_vqa,
|
36
|
+
ixc25_video_vqa,
|
32
37
|
load_image,
|
33
38
|
loca_visual_prompt_counting,
|
34
39
|
loca_zero_shot_counting,
|
@@ -8,7 +8,6 @@ from vision_agent.lmm.types import Message
|
|
8
8
|
from vision_agent.tools.tool_utils import get_tool_documentation
|
9
9
|
from vision_agent.tools.tools import TOOL_DESCRIPTIONS
|
10
10
|
|
11
|
-
|
12
11
|
# These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
|
13
12
|
|
14
13
|
CURRENT_FILE = None
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import inspect
|
2
2
|
import logging
|
3
3
|
import os
|
4
|
-
from typing import Any, Callable, Dict, List, MutableMapping, Optional
|
4
|
+
from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple
|
5
5
|
|
6
6
|
import pandas as pd
|
7
7
|
from IPython.display import display
|
@@ -31,6 +31,7 @@ class ToolCallTrace(BaseModel):
|
|
31
31
|
def send_inference_request(
|
32
32
|
payload: Dict[str, Any],
|
33
33
|
endpoint_name: str,
|
34
|
+
files: Optional[List[Tuple[Any, ...]]] = None,
|
34
35
|
v2: bool = False,
|
35
36
|
metadata_payload: Optional[Dict[str, Any]] = None,
|
36
37
|
) -> Dict[str, Any]:
|
@@ -50,7 +51,7 @@ def send_inference_request(
|
|
50
51
|
response={},
|
51
52
|
error=None,
|
52
53
|
)
|
53
|
-
headers = {"
|
54
|
+
headers = {"apikey": _LND_API_KEY}
|
54
55
|
if "TOOL_ENDPOINT_AUTH" in os.environ:
|
55
56
|
headers["Authorization"] = os.environ["TOOL_ENDPOINT_AUTH"]
|
56
57
|
headers.pop("apikey")
|
@@ -60,7 +61,11 @@ def send_inference_request(
|
|
60
61
|
num_retry=3,
|
61
62
|
headers=headers,
|
62
63
|
)
|
63
|
-
|
64
|
+
|
65
|
+
if files is not None:
|
66
|
+
res = session.post(url, data=payload, files=files)
|
67
|
+
else:
|
68
|
+
res = session.post(url, json=payload)
|
64
69
|
if res.status_code != 200:
|
65
70
|
tool_call_trace.error = Error(
|
66
71
|
name="RemoteToolCallFailed",
|
@@ -137,6 +142,31 @@ def get_tool_descriptions(funcs: List[Callable[..., Any]]) -> str:
|
|
137
142
|
return descriptions
|
138
143
|
|
139
144
|
|
145
|
+
def get_tool_descriptions_by_names(
|
146
|
+
tool_name: Optional[List[str]],
|
147
|
+
funcs: List[Callable[..., Any]],
|
148
|
+
util_funcs: List[
|
149
|
+
Callable[..., Any]
|
150
|
+
], # util_funcs will always be added to the list of functions
|
151
|
+
) -> str:
|
152
|
+
if tool_name is None:
|
153
|
+
return get_tool_descriptions(funcs + util_funcs)
|
154
|
+
|
155
|
+
invalid_names = [
|
156
|
+
name for name in tool_name if name not in {func.__name__ for func in funcs}
|
157
|
+
]
|
158
|
+
|
159
|
+
if invalid_names:
|
160
|
+
raise ValueError(f"Invalid customized tool names: {', '.join(invalid_names)}")
|
161
|
+
|
162
|
+
filtered_funcs = (
|
163
|
+
funcs
|
164
|
+
if not tool_name
|
165
|
+
else [func for func in funcs if func.__name__ in tool_name]
|
166
|
+
)
|
167
|
+
return get_tool_descriptions(filtered_funcs + util_funcs)
|
168
|
+
|
169
|
+
|
140
170
|
def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
|
141
171
|
data: Dict[str, List[str]] = {"desc": [], "doc": []}
|
142
172
|
|