vision-agent 0.2.111__tar.gz → 0.2.113__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {vision_agent-0.2.111 → vision_agent-0.2.113}/PKG-INFO +1 -1
  2. {vision_agent-0.2.111 → vision_agent-0.2.113}/pyproject.toml +1 -1
  3. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/agent_utils.py +3 -8
  4. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent_coder.py +39 -23
  5. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent_coder_prompts.py +9 -7
  6. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent_prompts.py +11 -10
  7. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/__init__.py +12 -7
  8. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/meta_tools.py +0 -1
  9. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/tool_utils.py +33 -3
  10. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/tools.py +357 -111
  11. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/execute.py +0 -1
  12. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/image_utils.py +52 -0
  13. {vision_agent-0.2.111 → vision_agent-0.2.113}/LICENSE +0 -0
  14. {vision_agent-0.2.111 → vision_agent-0.2.113}/README.md +0 -0
  15. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/__init__.py +0 -0
  16. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/__init__.py +0 -0
  17. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/agent.py +0 -0
  18. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/agent/vision_agent.py +0 -0
  19. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/clients/__init__.py +0 -0
  20. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/clients/http.py +0 -0
  21. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/clients/landing_public_api.py +1 -1
  22. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/fonts/__init__.py +0 -0
  23. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  24. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/lmm/__init__.py +0 -0
  25. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/lmm/lmm.py +0 -0
  26. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/lmm/types.py +0 -0
  27. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/prompts.py +0 -0
  28. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/tools/tools_types.py +0 -0
  29. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/__init__.py +0 -0
  30. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/exceptions.py +0 -0
  31. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/sim.py +0 -0
  32. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/type_defs.py +0 -0
  33. {vision_agent-0.2.111 → vision_agent-0.2.113}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.111
3
+ Version: 0.2.113
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.111"
7
+ version = "0.2.113"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -4,14 +4,13 @@ import sys
4
4
  from typing import Any, Dict
5
5
 
6
6
  logging.basicConfig(stream=sys.stdout)
7
- _LOGGER = logging.getLogger(__name__)
8
7
 
9
8
 
10
9
  def extract_json(json_str: str) -> Dict[str, Any]:
11
10
  try:
11
+ json_str = json_str.replace("\n", " ")
12
12
  json_dict = json.loads(json_str)
13
13
  except json.JSONDecodeError:
14
- input_json_str = json_str
15
14
  if "```json" in json_str:
16
15
  json_str = json_str[json_str.find("```json") + len("```json") :]
17
16
  json_str = json_str[: json_str.find("```")]
@@ -19,12 +18,8 @@ def extract_json(json_str: str) -> Dict[str, Any]:
19
18
  json_str = json_str[json_str.find("```") + len("```") :]
20
19
  # get the last ``` not one from an intermediate string
21
20
  json_str = json_str[: json_str.find("}```")]
22
- try:
23
- json_dict = json.loads(json_str)
24
- except json.JSONDecodeError as e:
25
- error_msg = f"Could not extract JSON from the given str: {json_str}.\nFunction input:\n{input_json_str}"
26
- _LOGGER.exception(error_msg)
27
- raise ValueError(error_msg) from e
21
+
22
+ json_dict = json.loads(json_str)
28
23
  return json_dict # type: ignore
29
24
 
30
25
 
@@ -4,6 +4,7 @@ import logging
4
4
  import os
5
5
  import sys
6
6
  import tempfile
7
+ from json import JSONDecodeError
7
8
  from pathlib import Path
8
9
  from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast
9
10
 
@@ -86,8 +87,8 @@ def format_memory(memory: List[Dict[str, str]]) -> str:
86
87
  def format_plans(plans: Dict[str, Any]) -> str:
87
88
  plan_str = ""
88
89
  for k, v in plans.items():
89
- plan_str += f"{k}:\n"
90
- plan_str += "-" + "\n-".join([e["instructions"] for e in v])
90
+ plan_str += "\n" + f"{k}: {v['thoughts']}\n"
91
+ plan_str += " -" + "\n -".join([e for e in v["instructions"]])
91
92
 
92
93
  return plan_str
93
94
 
@@ -127,7 +128,11 @@ def write_plans(
127
128
 
128
129
  user_request = chat[-1]["content"]
129
130
  context = USER_REQ.format(user_request=user_request)
130
- prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
131
+ prompt = PLAN.format(
132
+ context=context,
133
+ tool_desc=tool_desc,
134
+ feedback=working_memory,
135
+ )
131
136
  chat[-1]["content"] = prompt
132
137
  return extract_json(model(chat, stream=False)) # type: ignore
133
138
 
@@ -228,13 +233,11 @@ def pick_plan(
228
233
  "status": "completed" if tool_output.success else "failed",
229
234
  }
230
235
  )
231
- tool_output_str = ""
232
- if len(tool_output.logs.stdout) > 0:
233
- tool_output_str = tool_output.logs.stdout[0]
236
+ tool_output_str = tool_output.text().strip()
234
237
 
235
238
  if verbosity == 2:
236
239
  _print_code("Code and test after attempted fix:", code)
237
- _LOGGER.info(f"Code execution result after attempte {count}")
240
+ _LOGGER.info(f"Code execution result after attempt {count}")
238
241
 
239
242
  count += 1
240
243
 
@@ -251,7 +254,21 @@ def pick_plan(
251
254
  tool_output=tool_output_str[:20_000],
252
255
  )
253
256
  chat[-1]["content"] = prompt
254
- best_plan = extract_json(model(chat, stream=False)) # type: ignore
257
+
258
+ count = 0
259
+ best_plan = None
260
+ while best_plan is None and count < max_retries:
261
+ try:
262
+ best_plan = extract_json(model(chat, stream=False)) # type: ignore
263
+ except JSONDecodeError as e:
264
+ _LOGGER.exception(
265
+ f"Error while extracting JSON during picking best plan {str(e)}"
266
+ )
267
+ pass
268
+ count += 1
269
+
270
+ if best_plan is None:
271
+ best_plan = {"best_plan": list(plans.keys())[0]}
255
272
 
256
273
  if verbosity >= 1:
257
274
  _LOGGER.info(f"Best plan:\n{best_plan}")
@@ -525,7 +542,7 @@ def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
525
542
 
526
543
 
527
544
  def retrieve_tools(
528
- plans: Dict[str, List[Dict[str, str]]],
545
+ plans: Dict[str, Dict[str, Any]],
529
546
  tool_recommender: Sim,
530
547
  log_progress: Callable[[Dict[str, Any]], None],
531
548
  verbosity: int = 0,
@@ -542,8 +559,8 @@ def retrieve_tools(
542
559
  tool_lists: Dict[str, List[Dict[str, str]]] = {}
543
560
  for k, plan in plans.items():
544
561
  tool_lists[k] = []
545
- for task in plan:
546
- tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
562
+ for task in plan["instructions"]:
563
+ tools = tool_recommender.top_k(task, k=2, thresh=0.3)
547
564
  tool_info.extend([e["doc"] for e in tools])
548
565
  tool_desc.extend([e["desc"] for e in tools])
549
566
  tool_lists[k].extend(
@@ -661,6 +678,7 @@ class VisionAgentCoder(Agent):
661
678
  chat: List[Message],
662
679
  test_multi_plan: bool = True,
663
680
  display_visualization: bool = False,
681
+ customized_tool_names: Optional[List[str]] = None,
664
682
  ) -> Dict[str, Any]:
665
683
  """Chat with VisionAgentCoder and return intermediate information regarding the
666
684
  task.
@@ -676,6 +694,8 @@ class VisionAgentCoder(Agent):
676
694
  with the first plan.
677
695
  display_visualization (bool): If True, it opens a new window locally to
678
696
  show the image(s) created by visualization code (if there is any).
697
+ customized_tool_names (List[str]): A list of customized tools for agent to pick and use.
698
+ If not provided, default to full tool set from vision_agent.tools.
679
699
 
680
700
  Returns:
681
701
  Dict[str, Any]: A dictionary containing the code, test, test result, plan,
@@ -729,7 +749,9 @@ class VisionAgentCoder(Agent):
729
749
  )
730
750
  plans = write_plans(
731
751
  int_chat,
732
- T.TOOL_DESCRIPTIONS,
752
+ T.get_tool_descriptions_by_names(
753
+ customized_tool_names, T.FUNCTION_TOOLS, T.UTIL_TOOLS # type: ignore
754
+ ),
733
755
  format_memory(working_memory),
734
756
  self.planner,
735
757
  )
@@ -737,18 +759,10 @@ class VisionAgentCoder(Agent):
737
759
  if self.verbosity >= 1:
738
760
  for p in plans:
739
761
  # tabulate will fail if the keys are not the same for all elements
740
- p_fixed = [
741
- {
742
- "instructions": (
743
- e["instructions"] if "instructions" in e else ""
744
- )
745
- }
746
- for e in plans[p]
747
- ]
762
+ p_fixed = [{"instructions": e} for e in plans[p]["instructions"]]
748
763
  _LOGGER.info(
749
764
  f"\n{tabulate(tabular_data=p_fixed, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
750
765
  )
751
-
752
766
  tool_infos = retrieve_tools(
753
767
  plans,
754
768
  self.tool_recommender,
@@ -793,13 +807,15 @@ class VisionAgentCoder(Agent):
793
807
  )
794
808
 
795
809
  if self.verbosity >= 1:
810
+ plan_i_fixed = [{"instructions": e} for e in plan_i["instructions"]]
796
811
  _LOGGER.info(
797
- f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
812
+ f"Picked best plan:\n{tabulate(tabular_data=plan_i_fixed, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
798
813
  )
799
814
 
800
815
  results = write_and_test_code(
801
816
  chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
802
- plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
817
+ plan=f"\n{plan_i['thoughts']}\n-"
818
+ + "\n-".join([e for e in plan_i["instructions"]]),
803
819
  tool_info=tool_info,
804
820
  tool_output=tool_output_str,
805
821
  tool_utils=T.UTILITIES_DOCSTRING,
@@ -30,18 +30,19 @@ PLAN = """
30
30
 
31
31
  **Instructions**:
32
32
  1. Based on the context and tools you have available, create a plan of subtasks to achieve the user request.
33
- 2. Output three different plans each utilize a different strategy or tool.
33
+ 2. Output three different plans each utilize a different strategy or set of tools.
34
34
 
35
35
  Output a list of jsons in the following format
36
36
 
37
37
  ```json
38
38
  {{
39
39
  "plan1":
40
- [
41
- {{
42
- "instructions": str # what you should do in this task associated with a tool
43
- }}
44
- ],
40
+ {{
41
+ "thoughts": str # your thought process for choosing this plan
42
+ "instructions": [
43
+ str # what you should do in this task associated with a tool
44
+ ]
45
+ }},
45
46
  "plan2": ...,
46
47
  "plan3": ...
47
48
  }}
@@ -127,7 +128,8 @@ PICK_PLAN = """
127
128
 
128
129
  **Instructions**:
129
130
  1. Given the plans, image, and tool outputs, decide which plan is the best to achieve the user request.
130
- 2. Output a JSON object with the following format:
131
+ 2. Try solving the problem yourself given the image and pick the plan that matches your solution the best.
132
+ 3. Output a JSON object with the following format:
131
133
  {{
132
134
  "thoughts": str # your thought process for choosing the best plan
133
135
  "best_plan": str # the best plan you have chosen
@@ -15,7 +15,7 @@ This is the documentation for the different actions you can take:
15
15
  **Examples**:
16
16
  Here is an example of how you can interact with a user and Actions to complete a task:
17
17
  --- START EXAMPLES ---
18
- [Current directory: /workspace/test]
18
+ [Current directory: /example/workspace]
19
19
  {examples}
20
20
  --- END EXAMPLES ---
21
21
 
@@ -27,16 +27,17 @@ Here is an example of how you can interact with a user and Actions to complete a
27
27
  Here is the current conversation so far:
28
28
  --- START CONVERSATION ---
29
29
  [Current directory: {dir}]
30
+
30
31
  {conversation}
31
32
  """
32
33
 
33
34
  EXAMPLES_CODE1 = """
34
35
  USER: Can you detect the dogs in this image? Media name dog.jpg
35
36
 
36
- AGENT: {"thoughts": "I will use the generate_vision_code to detect the dogs in the image.", "response": "<execute_python>generate_vision_code('/workspace/test/dog_detector.py', 'Can you write code to detect dogs in this image?', media=['/workspace/test/dog.jpg'])</execute_python>", "let_user_respond": false}
37
+ AGENT: {"thoughts": "I will use the generate_vision_code to detect the dogs in the image.", "response": "<execute_python>generate_vision_code('/example/workspace/dog_detector.py', 'Can you write code to detect dogs in this image?', media=['/example/workspace/dog.jpg'])</execute_python>", "let_user_respond": false}
37
38
 
38
39
  OBSERVATION:
39
- [File /workspace/test/dog_detector.py]
40
+ [File /example/workspace/dog_detector.py]
40
41
  0|from vision_agent.tools import load_image, owl_v2
41
42
  1|def detect_dogs(image_path: str):
42
43
  2| image = load_image(image_path)
@@ -44,7 +45,7 @@ OBSERVATION:
44
45
  4| return dogs
45
46
  [End of file]
46
47
 
47
- AGENT: {"thoughts": "I have generated the code to detect the dogs in the image, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/workspace/test/dog.jpg'))</execute_python>", "let_user_respond": false}
48
+ AGENT: {"thoughts": "I have generated the code to detect the dogs in the image, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/example/workspace/dog.jpg'))</execute_python>", "let_user_respond": false}
48
49
 
49
50
  OBSERVATION:
50
51
  ----- stdout -----
@@ -55,10 +56,10 @@ AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask
55
56
 
56
57
  USER: The the image only has one dog, can you fix this?
57
58
 
58
- AGENT: {"thoughts": "I will use edit_vision_code to fix the code to detect only one dog. The lower score is 0.23 so I will set the threshold above that.", "response": "<execute_python>edit_vision_code('/workspace/test/dog_detector.py', ['Can you write code to detect dogs in this image?', 'Can you increase the threshold to 0.24?'], media=['/workspace/test/dog.jpg'])</execute_python>", "let_user_respond": false}
59
+ AGENT: {"thoughts": "I will use edit_vision_code to fix the code to detect only one dog. The lower score is 0.23 so I will set the threshold above that.", "response": "<execute_python>edit_vision_code('/example/workspace/dog_detector.py', ['Can you write code to detect dogs in this image?', 'Can you increase the threshold to 0.24?'], media=['/example/workspace/dog.jpg'])</execute_python>", "let_user_respond": false}
59
60
 
60
61
  OBSERVATION:
61
- [File /workspace/test/dog_detector.py]
62
+ [File /example/workspace/dog_detector.py]
62
63
  0|from vision_agent.tools import load_image, owl_v2
63
64
  1|def detect_dogs(image_path: str):
64
65
  2| image = load_image(image_path)
@@ -66,7 +67,7 @@ OBSERVATION:
66
67
  4| return dogs
67
68
  [End of file]
68
69
 
69
- AGENT: {"thoughts": "I have edited the code to detect only one dog, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/workspace/test/dog.jpg'))</execute_python>", "let_user_respond": false}
70
+ AGENT: {"thoughts": "I have edited the code to detect only one dog, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n print(detect_dogs('/example/workspace/dog.jpg'))</execute_python>", "let_user_respond": false}
70
71
 
71
72
  OBSERVATION:
72
73
  ----- stdout -----
@@ -82,10 +83,10 @@ AGENT: {"thoughts": "The user has asked to count workers with helmets but has no
82
83
 
83
84
  USER: Yes you can use workers.png
84
85
 
85
- AGENT: {"thoughts": "I will use the generate_vision_code to count the workers with helmets in the image.", "response": "<execute_python>generate_vision_code('/workspace/test/code.py', 'Can you write code to count workers with helmets in this image?', media=['/workspace/test/workers.png'])</execute_python>", "let_user_respond": false}
86
+ AGENT: {"thoughts": "I will use the generate_vision_code to count the workers with helmets in the image.", "response": "<execute_python>generate_vision_code('/example/workspace/code.py', 'Can you write code to count workers with helmets in this image?', media=['/example/workspace/workers.png'])</execute_python>", "let_user_respond": false}
86
87
 
87
88
  OBSERVATION:
88
- [File /workspace/test/code.py]
89
+ [File /example/workspace/code.py]
89
90
  0|from vision_agent.tools import load_image, owl_v2, closest_box_distance
90
91
  1|def count_workers_with_helmets(image_path: str):
91
92
  2| image = load_image(image_path)
@@ -104,7 +105,7 @@ OBSERVATION:
104
105
  15| return count
105
106
  [End of file]
106
107
 
107
- AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code to get the output.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/workspace/test/workers.png'))</execute_python>", "let_user_respond": false}
108
+ AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code to get the output.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/example/workspace/workers.png'))</execute_python>", "let_user_respond": false}
108
109
 
109
110
  OBSERVATION:
110
111
  ----- stdout -----
@@ -1,15 +1,16 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
- from .meta_tools import (
4
- META_TOOL_DOCSTRING,
5
- )
3
+ from .meta_tools import META_TOOL_DOCSTRING
6
4
  from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
5
+ from .tool_utils import get_tool_descriptions_by_names
7
6
  from .tools import (
7
+ FUNCTION_TOOLS,
8
8
  TOOL_DESCRIPTIONS,
9
9
  TOOL_DOCSTRING,
10
10
  TOOLS,
11
11
  TOOLS_DF,
12
12
  TOOLS_INFO,
13
+ UTIL_TOOLS,
13
14
  UTILITIES_DOCSTRING,
14
15
  blip_image_caption,
15
16
  clip,
@@ -19,16 +20,20 @@ from .tools import (
19
20
  detr_segmentation,
20
21
  dpt_hybrid_midas,
21
22
  extract_frames,
22
- florencev2_image_caption,
23
- florencev2_object_detection,
24
- florencev2_roberta_vqa,
25
- florencev2_ocr,
23
+ florence2_image_caption,
24
+ florence2_object_detection,
25
+ florence2_ocr,
26
+ florence2_roberta_vqa,
27
+ florence2_sam2_image,
28
+ florence2_sam2_video,
26
29
  generate_pose_image,
27
30
  generate_soft_edge_image,
28
31
  get_tool_documentation,
29
32
  git_vqa_v2,
30
33
  grounding_dino,
31
34
  grounding_sam,
35
+ ixc25_image_vqa,
36
+ ixc25_video_vqa,
32
37
  load_image,
33
38
  loca_visual_prompt_counting,
34
39
  loca_zero_shot_counting,
@@ -8,7 +8,6 @@ from vision_agent.lmm.types import Message
8
8
  from vision_agent.tools.tool_utils import get_tool_documentation
9
9
  from vision_agent.tools.tools import TOOL_DESCRIPTIONS
10
10
 
11
-
12
11
  # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
13
12
 
14
13
  CURRENT_FILE = None
@@ -1,7 +1,7 @@
1
1
  import inspect
2
2
  import logging
3
3
  import os
4
- from typing import Any, Callable, Dict, List, MutableMapping, Optional
4
+ from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple
5
5
 
6
6
  import pandas as pd
7
7
  from IPython.display import display
@@ -31,6 +31,7 @@ class ToolCallTrace(BaseModel):
31
31
  def send_inference_request(
32
32
  payload: Dict[str, Any],
33
33
  endpoint_name: str,
34
+ files: Optional[List[Tuple[Any, ...]]] = None,
34
35
  v2: bool = False,
35
36
  metadata_payload: Optional[Dict[str, Any]] = None,
36
37
  ) -> Dict[str, Any]:
@@ -50,7 +51,7 @@ def send_inference_request(
50
51
  response={},
51
52
  error=None,
52
53
  )
53
- headers = {"Content-Type": "application/json", "apikey": _LND_API_KEY}
54
+ headers = {"apikey": _LND_API_KEY}
54
55
  if "TOOL_ENDPOINT_AUTH" in os.environ:
55
56
  headers["Authorization"] = os.environ["TOOL_ENDPOINT_AUTH"]
56
57
  headers.pop("apikey")
@@ -60,7 +61,11 @@ def send_inference_request(
60
61
  num_retry=3,
61
62
  headers=headers,
62
63
  )
63
- res = session.post(url, json=payload)
64
+
65
+ if files is not None:
66
+ res = session.post(url, data=payload, files=files)
67
+ else:
68
+ res = session.post(url, json=payload)
64
69
  if res.status_code != 200:
65
70
  tool_call_trace.error = Error(
66
71
  name="RemoteToolCallFailed",
@@ -137,6 +142,31 @@ def get_tool_descriptions(funcs: List[Callable[..., Any]]) -> str:
137
142
  return descriptions
138
143
 
139
144
 
145
+ def get_tool_descriptions_by_names(
146
+ tool_name: Optional[List[str]],
147
+ funcs: List[Callable[..., Any]],
148
+ util_funcs: List[
149
+ Callable[..., Any]
150
+ ], # util_funcs will always be added to the list of functions
151
+ ) -> str:
152
+ if tool_name is None:
153
+ return get_tool_descriptions(funcs + util_funcs)
154
+
155
+ invalid_names = [
156
+ name for name in tool_name if name not in {func.__name__ for func in funcs}
157
+ ]
158
+
159
+ if invalid_names:
160
+ raise ValueError(f"Invalid customized tool names: {', '.join(invalid_names)}")
161
+
162
+ filtered_funcs = (
163
+ funcs
164
+ if not tool_name
165
+ else [func for func in funcs if func.__name__ in tool_name]
166
+ )
167
+ return get_tool_descriptions(filtered_funcs + util_funcs)
168
+
169
+
140
170
  def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
141
171
  data: Dict[str, List[str]] = {"desc": [], "doc": []}
142
172