vision-agent 0.2.10__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,396 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
5
+
6
+ import pandas as pd
7
+ from rich.console import Console
8
+ from rich.syntax import Syntax
9
+ from tabulate import tabulate
10
+
11
+ from vision_agent.agent import Agent
12
+ from vision_agent.agent.vision_agent_v2_prompt import (
13
+ CODE,
14
+ CODE_SYS_MSG,
15
+ DEBUG,
16
+ DEBUG_EXAMPLE,
17
+ DEBUG_SYS_MSG,
18
+ PLAN,
19
+ PREV_CODE_CONTEXT,
20
+ PREV_CODE_CONTEXT_WITH_REFLECTION,
21
+ TEST,
22
+ USER_REQ_CONTEXT,
23
+ USER_REQ_SUBTASK_CONTEXT,
24
+ USER_REQ_SUBTASK_WM_CONTEXT,
25
+ )
26
+ from vision_agent.llm import LLM, OpenAILLM
27
+ from vision_agent.tools.tools_v2 import TOOL_DESCRIPTIONS, TOOLS_DF
28
+ from vision_agent.utils import Execute, Sim
29
+
30
+ logging.basicConfig(level=logging.INFO)
31
+ _LOGGER = logging.getLogger(__name__)
32
+ _MAX_TABULATE_COL_WIDTH = 80
33
+ _CONSOLE = Console()
34
+
35
+
36
+ def build_working_memory(working_memory: Mapping[str, List[str]]) -> Sim:
37
+ data: Mapping[str, List[str]] = {"desc": [], "doc": []}
38
+ for key, value in working_memory.items():
39
+ data["desc"].append(key)
40
+ data["doc"].append("\n".join(value))
41
+ df = pd.DataFrame(data) # type: ignore
42
+ return Sim(df, sim_key="desc")
43
+
44
+
45
+ def extract_code(code: str) -> str:
46
+ if "```python" in code:
47
+ code = code[code.find("```python") + len("```python") :]
48
+ code = code[: code.find("```")]
49
+ if code.startswith("python\n"):
50
+ code = code[len("python\n") :]
51
+ return code
52
+
53
+
54
+ def extract_json(json_str: str) -> Dict[str, Any]:
55
+ try:
56
+ json_dict = json.loads(json_str)
57
+ except json.JSONDecodeError:
58
+ if "```json" in json_str:
59
+ json_str = json_str[json_str.find("```json") + len("```json") :]
60
+ json_str = json_str[: json_str.find("```")]
61
+ elif "```" in json_str:
62
+ json_str = json_str[json_str.find("```") + len("```") :]
63
+ # get the last ``` not one from an intermediate string
64
+ json_str = json_str[: json_str.find("}```")]
65
+ json_dict = json.loads(json_str)
66
+ return json_dict # type: ignore
67
+
68
+
69
+ def write_plan(
70
+ chat: List[Dict[str, str]],
71
+ plan: Optional[List[Dict[str, Any]]],
72
+ tool_desc: str,
73
+ model: LLM,
74
+ ) -> Tuple[str, List[Dict[str, Any]]]:
75
+ # Get last user request
76
+ if chat[-1]["role"] != "user":
77
+ raise ValueError("Last chat message must be from the user.")
78
+ user_requirements = chat[-1]["content"]
79
+
80
+ context = USER_REQ_CONTEXT.format(user_requirement=user_requirements)
81
+ prompt = PLAN.format(context=context, plan=str(plan), tool_desc=tool_desc)
82
+ chat[-1]["content"] = prompt
83
+ new_plan = extract_json(model.chat(chat))
84
+ return new_plan["user_req"], new_plan["plan"]
85
+
86
+
87
+ def write_code(
88
+ user_req: str,
89
+ subtask: str,
90
+ working_memory: str,
91
+ tool_info: str,
92
+ code: str,
93
+ model: LLM,
94
+ ) -> str:
95
+ prompt = CODE.format(
96
+ context=USER_REQ_SUBTASK_WM_CONTEXT.format(
97
+ user_requirement=user_req, working_memory=working_memory, subtask=subtask
98
+ ),
99
+ tool_info=tool_info,
100
+ code=code,
101
+ )
102
+ messages = [
103
+ {"role": "system", "content": CODE_SYS_MSG},
104
+ {"role": "user", "content": prompt},
105
+ ]
106
+ code = model.chat(messages)
107
+ return extract_code(code)
108
+
109
+
110
+ def write_test(
111
+ user_req: str, subtask: str, tool_info: str, _: str, code: str, model: LLM
112
+ ) -> str:
113
+ prompt = TEST.format(
114
+ context=USER_REQ_SUBTASK_CONTEXT.format(
115
+ user_requirement=user_req, subtask=subtask
116
+ ),
117
+ tool_info=tool_info,
118
+ code=code,
119
+ )
120
+ messages = [
121
+ {"role": "system", "content": CODE_SYS_MSG},
122
+ {"role": "user", "content": prompt},
123
+ ]
124
+ code = model.chat(messages)
125
+ return extract_code(code)
126
+
127
+
128
+ def debug_code(
129
+ user_req: str,
130
+ subtask: str,
131
+ retrieved_ltm: str,
132
+ working_memory: str,
133
+ model: LLM,
134
+ ) -> Tuple[str, str]:
135
+ # Make debug model output JSON
136
+ if hasattr(model, "kwargs"):
137
+ model.kwargs["response_format"] = {"type": "json_object"}
138
+ prompt = DEBUG.format(
139
+ debug_example=DEBUG_EXAMPLE,
140
+ context=USER_REQ_SUBTASK_WM_CONTEXT.format(
141
+ user_requirement=user_req,
142
+ subtask=subtask,
143
+ working_memory=retrieved_ltm,
144
+ ),
145
+ previous_impl=working_memory,
146
+ )
147
+ messages = [
148
+ {"role": "system", "content": DEBUG_SYS_MSG},
149
+ {"role": "user", "content": prompt},
150
+ ]
151
+ code_and_ref = extract_json(model.chat(messages))
152
+ if hasattr(model, "kwargs"):
153
+ del model.kwargs["response_format"]
154
+ return extract_code(code_and_ref["improved_impl"]), code_and_ref["reflection"]
155
+
156
+
157
+ def write_and_exec_code(
158
+ user_req: str,
159
+ subtask: str,
160
+ orig_code: str,
161
+ code_writer_call: Callable[..., str],
162
+ model: LLM,
163
+ tool_info: str,
164
+ exec: Execute,
165
+ retrieved_ltm: str,
166
+ max_retry: int = 3,
167
+ verbosity: int = 0,
168
+ ) -> Tuple[bool, str, str, Dict[str, List[str]]]:
169
+ success = False
170
+ counter = 0
171
+ reflection = ""
172
+
173
+ code = code_writer_call(
174
+ user_req, subtask, retrieved_ltm, tool_info, orig_code, model
175
+ )
176
+ success, result = exec.run_isolation(code)
177
+ if verbosity == 2:
178
+ _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
179
+ _LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
180
+ working_memory: Dict[str, List[str]] = {}
181
+ while not success and counter < max_retry:
182
+ if subtask not in working_memory:
183
+ working_memory[subtask] = []
184
+
185
+ if reflection:
186
+ working_memory[subtask].append(
187
+ PREV_CODE_CONTEXT_WITH_REFLECTION.format(
188
+ code=code, result=result, reflection=reflection
189
+ )
190
+ )
191
+ else:
192
+ working_memory[subtask].append(
193
+ PREV_CODE_CONTEXT.format(code=code, result=result)
194
+ )
195
+
196
+ code, reflection = debug_code(
197
+ user_req, subtask, retrieved_ltm, "\n".join(working_memory[subtask]), model
198
+ )
199
+ success, result = exec.run_isolation(code)
200
+ counter += 1
201
+ if verbosity == 2:
202
+ _CONSOLE.print(
203
+ Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
204
+ )
205
+ _LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
206
+
207
+ if success:
208
+ working_memory[subtask].append(
209
+ PREV_CODE_CONTEXT_WITH_REFLECTION.format(
210
+ reflection=reflection, code=code, result=result
211
+ )
212
+ )
213
+
214
+ return success, code, result, working_memory
215
+
216
+
217
+ def run_plan(
218
+ user_req: str,
219
+ plan: List[Dict[str, Any]],
220
+ coder: LLM,
221
+ exec: Execute,
222
+ code: str,
223
+ tool_recommender: Sim,
224
+ long_term_memory: Optional[Sim] = None,
225
+ verbosity: int = 0,
226
+ ) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
227
+ active_plan = [e for e in plan if "success" not in e or not e["success"]]
228
+ current_code = code
229
+ current_test = ""
230
+ retrieved_ltm = ""
231
+ working_memory: Dict[str, List[str]] = {}
232
+
233
+ for task in active_plan:
234
+ _LOGGER.info(
235
+ f"""
236
+ {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
237
+ )
238
+ tool_info = "\n".join(
239
+ [e["doc"] for e in tool_recommender.top_k(task["instruction"])]
240
+ )
241
+
242
+ if long_term_memory is not None:
243
+ retrieved_ltm = "\n".join(
244
+ [e["doc"] for e in long_term_memory.top_k(task["instruction"], 1)]
245
+ )
246
+
247
+ success, code, result, working_memory_i = write_and_exec_code(
248
+ user_req,
249
+ task["instruction"],
250
+ current_code,
251
+ write_code if task["type"] == "code" else write_test,
252
+ coder,
253
+ tool_info,
254
+ exec,
255
+ retrieved_ltm,
256
+ verbosity=verbosity,
257
+ )
258
+ if task["type"] == "code":
259
+ current_code = code
260
+ else:
261
+ current_test = code
262
+
263
+ working_memory.update(working_memory_i)
264
+
265
+ if verbosity == 1:
266
+ _CONSOLE.print(
267
+ Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
268
+ )
269
+ _LOGGER.info(f"\tCode success: {success} result: {str(result)}")
270
+
271
+ task["success"] = success
272
+ task["result"] = result
273
+ task["code"] = code
274
+
275
+ if not success:
276
+ break
277
+
278
+ return current_code, current_test, plan, working_memory
279
+
280
+
281
+ class VisionAgentV2(Agent):
282
+ """Vision Agent is an AI agentic framework geared towards outputting Python code to
283
+ solve vision tasks. It is inspired by MetaGPT's Data Interpreter
284
+ https://arxiv.org/abs/2402.18679. Vision Agent has several key features to help it
285
+ generate code:
286
+ - A planner to generate a plan of tasks to solve a user requirement. The planner
287
+ can output code tasks or test tasks, where test tasks are used to verify the code.
288
+ - Automatic debugging, if a task fails, the agent will attempt to debug the code
289
+ using the failed output to fix it.
290
+ - A tool recommender to recommend tools to use for a given task. LLM performance
291
+ on tool retrieval starts to decrease as you add more tools, tool retrieval helps
292
+ keep the number of tools to choose from low.
293
+ - Memory retrieval, the agent can remember previous iterations on tasks to help it
294
+ with new tasks.
295
+ - Dynamic replanning, the agent can ask for feedback and replan remaining tasks
296
+ based off of that feedback.
297
+ """
298
+
299
+ def __init__(
300
+ self,
301
+ timeout: int = 600,
302
+ tool_recommender: Optional[Sim] = None,
303
+ long_term_memory: Optional[Sim] = None,
304
+ verbosity: int = 0,
305
+ ) -> None:
306
+ self.planner = OpenAILLM(temperature=0.0, json_mode=True)
307
+ self.coder = OpenAILLM(temperature=0.0)
308
+ self.exec = Execute(timeout=timeout)
309
+ if tool_recommender is None:
310
+ self.tool_recommender = Sim(TOOLS_DF, sim_key="desc")
311
+ else:
312
+ self.tool_recommender = tool_recommender
313
+ self.verbosity = verbosity
314
+ self._working_memory: Dict[str, List[str]] = {}
315
+ if long_term_memory is not None:
316
+ if "doc" not in long_term_memory.df.columns:
317
+ raise ValueError("Long term memory must have a 'doc' column.")
318
+ self.long_term_memory = long_term_memory
319
+ self.max_retries = 3
320
+ if self.verbosity:
321
+ _LOGGER.setLevel(logging.INFO)
322
+
323
+ def __call__(
324
+ self,
325
+ input: Union[List[Dict[str, str]], str],
326
+ image: Optional[Union[str, Path]] = None,
327
+ plan: Optional[List[Dict[str, Any]]] = None,
328
+ ) -> str:
329
+ if isinstance(input, str):
330
+ input = [{"role": "user", "content": input}]
331
+ results = self.chat_with_workflow(input, image, plan)
332
+ return results["code"] # type: ignore
333
+
334
+ def chat_with_workflow(
335
+ self,
336
+ chat: List[Dict[str, str]],
337
+ image: Optional[Union[str, Path]] = None,
338
+ plan: Optional[List[Dict[str, Any]]] = None,
339
+ ) -> Dict[str, Any]:
340
+ if len(chat) == 0:
341
+ raise ValueError("Input cannot be empty.")
342
+
343
+ if image is not None:
344
+ # append file names to all user messages
345
+ for chat_i in chat:
346
+ if chat_i["role"] == "user":
347
+ chat_i["content"] += f" Image name {image}"
348
+
349
+ working_code = ""
350
+ if plan is not None:
351
+ # grab the latest working code from a previous plan
352
+ for task in plan:
353
+ if "success" in task and "code" in task and task["success"]:
354
+ working_code = task["code"]
355
+
356
+ user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
357
+ _LOGGER.info(
358
+ f"""Plan:
359
+ {tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
360
+ )
361
+
362
+ working_test = ""
363
+ working_memory: Dict[str, List[str]] = {}
364
+ success = False
365
+ retries = 0
366
+
367
+ while not success and retries < self.max_retries:
368
+ working_code, working_test, plan, working_memory_i = run_plan(
369
+ user_req,
370
+ plan,
371
+ self.coder,
372
+ self.exec,
373
+ working_code,
374
+ self.tool_recommender,
375
+ self.long_term_memory,
376
+ self.verbosity,
377
+ )
378
+ success = all(task["success"] for task in plan)
379
+ working_memory.update(working_memory_i)
380
+
381
+ if not success:
382
+ # return to user and request feedback
383
+ break
384
+
385
+ retries += 1
386
+
387
+ return {
388
+ "code": working_code,
389
+ "test": working_test,
390
+ "success": success,
391
+ "working_memory": build_working_memory(working_memory),
392
+ "plan": plan,
393
+ }
394
+
395
+ def log_progress(self, description: str) -> None:
396
+ pass
@@ -0,0 +1,185 @@
1
+ USER_REQ_CONTEXT = """
2
+ ## User Requirement
3
+ {user_requirement}
4
+ """
5
+
6
+ USER_REQ_SUBTASK_CONTEXT = """
7
+ ## User Requirement
8
+ {user_requirement}
9
+
10
+ ## Current Subtask
11
+ {subtask}
12
+ """
13
+
14
+ USER_REQ_SUBTASK_WM_CONTEXT = """
15
+ ## User Requirement
16
+ {user_requirement}
17
+
18
+ ## Current Subtask
19
+ {subtask}
20
+
21
+ ## Previous Task
22
+ {working_memory}
23
+ """
24
+
25
+ PLAN = """
26
+ # Context
27
+ {context}
28
+
29
+ # Current Plan
30
+ {plan}
31
+
32
+ # Tools Available
33
+ {tool_desc}
34
+
35
+ # Task:
36
+ Based on the context and the tools you have available, write a plan of subtasks to achieve the user request that adhere to the following requirements:
37
+ - For each subtask, you should provide a short instruction on what to do. Ensure the subtasks are large enough to be meaningful, encompassing multiple lines of code.
38
+ - You do not need to have the agent rewrite any tool functionality you already have, you should instead instruct it to utilize one or more of those tools in each subtask.
39
+ - You can have agents either write coding tasks, to code some functionality or testing tasks to test previous functionality.
40
+ - If a current plan exists, examine each item in the plan to determine if it was successful. If there was an item that failed, i.e. 'success': False, then you should rewrite that item and all subsequent items to ensure that the rewritten plan is successful.
41
+
42
+ Output a list of jsons in the following format:
43
+
44
+ ```json
45
+ {{
46
+ "user_req": str, # "a summarized version of the user requirement"
47
+ "plan":
48
+ [
49
+ {{
50
+ "task_id": int, # "unique identifier for a task in plan, can be an ordinal"
51
+ "dependent_task_ids": list[int], # "ids of tasks prerequisite to this task"
52
+ "instruction": str, # "what you should do in this task, one short phrase or sentence"
53
+ "type": str, # "the type of the task, tasks can either be 'code' for coding tasks or 'test' for testing tasks"
54
+ }},
55
+ ...
56
+ ]
57
+ }}
58
+ ```
59
+ """
60
+
61
+
62
+ CODE_SYS_MSG = """You are an AI Python assistant. You need to help user to achieve their goal by implementing a function. Your code will be run in a jupyter notebook environment so don't use asyncio.run. Instead, use await if you need to call an async function. Do not use 'display' for showing images, instead use matplotlib or PIL."""
63
+
64
+
65
+ CODE = """
66
+ # Context
67
+ {context}
68
+
69
+ # Tool Info for Current Subtask
70
+ {tool_info}
71
+
72
+ # Previous Code
73
+ {code}
74
+
75
+ # Constraints
76
+ - Write a function that accomplishes the 'User Requirement'. You are supplied code from a previous task under 'Previous Code', feel free to copy over that code into your own implementation if you need it.
77
+ - Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info for Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
78
+ - You may recieve previous trials and errors under 'Previous Task', this is code, output and reflections from previous tasks. You can use these to avoid running in to the same issues when writing your code.
79
+ - Write clean, readable, and well-documented code.
80
+
81
+ # Output
82
+ While some concise thoughts are helpful, code is absolutely required. If possible, execute your defined functions in the code output. Output code in the following format:
83
+ ```python
84
+ from vision_agent.tools.tools_v2 imoprt *
85
+
86
+ # your code goes here
87
+ ```
88
+ """
89
+
90
+
91
+ DEBUG_SYS_MSG = """You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Your code will be run in a jupyter notebook environment. Write your full implementation."""
92
+
93
+
94
+ DEBUG_EXAMPLE = '''
95
+ [previous impl]:
96
+ ```python
97
+ def add(a: int, b: int) -> int:
98
+ """Given integers a and b, return the total value of a and b."""
99
+ return a - b
100
+ ```
101
+
102
+ [previous output]
103
+ Tests failed:
104
+ assert add(1, 2) == 3 # output: -1
105
+ assert add(1, 3) == 4 # output: -2
106
+
107
+ [reflection on previous impl]:
108
+ The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
109
+
110
+ [improved impl]:
111
+ def add(a: int, b: int) -> int:
112
+ """Given integers a and b, return the total value of a and b."""
113
+ return a + b
114
+ '''
115
+
116
+
117
+ PREV_CODE_CONTEXT = """
118
+ [previous impl]
119
+ ```python
120
+ {code}
121
+ ```
122
+
123
+ [previous output]
124
+ {result}
125
+ """
126
+
127
+
128
+ PREV_CODE_CONTEXT_WITH_REFLECTION = """
129
+ [reflection on previous impl]
130
+ {reflection}
131
+
132
+ [new impl]
133
+ ```python
134
+ {code}
135
+ ```
136
+
137
+ [new output]
138
+ {result}
139
+
140
+ """
141
+
142
+ # don't need [previous impl] because it will come from PREV_CODE_CONTEXT or PREV_CODE_CONTEXT_WITH_REFLECTION
143
+ DEBUG = """
144
+ [example]
145
+ Here is an example of debugging with reflection.
146
+ {debug_example}
147
+ [/example]
148
+
149
+ [context]
150
+ {context}
151
+
152
+ {previous_impl}
153
+
154
+ [instruction]
155
+ Analyze your previous code and error in [context] step by step, provide me with improved method and code. Remember to follow [context] requirement. Because you are writing code in a jupyter notebook, you can run `!pip install` to install missing packages. Output a json following the format:
156
+ ```json
157
+ {{
158
+ "reflection": str = "Reflection on previous implementation",
159
+ "improved_impl": str = "Refined code after reflection.",
160
+ }}
161
+ ```
162
+ """
163
+
164
+
165
+ TEST = """
166
+ # Context
167
+ {context}
168
+
169
+ # Tool Info for Current Subtask
170
+ {tool_info}
171
+
172
+ # Code to Test
173
+ {code}
174
+
175
+ # Constraints
176
+ - Write code to test the functionality of the provided code according to the 'Current Subtask'. If you cannot test the code, then write code to visualize the result by calling the code.
177
+ - Always prioritize using pre-defined tools for the same functionality.
178
+ - Write clean, readable, and well-documented code.
179
+
180
+ # Output
181
+ While some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:
182
+ ```python
183
+ your code
184
+ ```
185
+ """
vision_agent/llm/llm.py CHANGED
@@ -34,9 +34,10 @@ class OpenAILLM(LLM):
34
34
 
35
35
  def __init__(
36
36
  self,
37
- model_name: str = "gpt-4-turbo",
37
+ model_name: str = "gpt-4o",
38
38
  api_key: Optional[str] = None,
39
39
  json_mode: bool = False,
40
+ system_prompt: Optional[str] = None,
40
41
  **kwargs: Any
41
42
  ):
42
43
  if not api_key:
@@ -45,22 +46,29 @@ class OpenAILLM(LLM):
45
46
  self.client = OpenAI(api_key=api_key)
46
47
 
47
48
  self.model_name = model_name
49
+ self.system_prompt = system_prompt
48
50
  self.kwargs = kwargs
49
51
  if json_mode:
50
52
  self.kwargs["response_format"] = {"type": "json_object"}
51
53
 
52
54
  def generate(self, prompt: str) -> str:
55
+ messages = []
56
+ if self.system_prompt:
57
+ messages.append({"role": "system", "content": self.system_prompt})
58
+ messages.append({"role": "user", "content": prompt})
59
+
53
60
  response = self.client.chat.completions.create(
54
61
  model=self.model_name,
55
- messages=[
56
- {"role": "user", "content": prompt},
57
- ],
62
+ messages=messages, # type: ignore
58
63
  **self.kwargs,
59
64
  )
60
65
 
61
66
  return cast(str, response.choices[0].message.content)
62
67
 
63
68
  def chat(self, chat: List[Dict[str, str]]) -> str:
69
+ if self.system_prompt and not any(msg["role"] == "system" for msg in chat):
70
+ chat.insert(0, {"role": "system", "content": self.system_prompt})
71
+
64
72
  response = self.client.chat.completions.create(
65
73
  model=self.model_name,
66
74
  messages=chat, # type: ignore
@@ -3,7 +3,7 @@ from .tools import ( # Counter,
3
3
  CLIP,
4
4
  OCR,
5
5
  TOOLS,
6
- BboxArea,
6
+ BboxStats,
7
7
  BboxIoU,
8
8
  BoxDistance,
9
9
  Crop,
@@ -13,6 +13,8 @@ from .tools import ( # Counter,
13
13
  GroundingSAM,
14
14
  ImageCaption,
15
15
  ImageQuestionAnswering,
16
+ MaskDistance,
17
+ ObjectDistance,
16
18
  SegArea,
17
19
  SegIoU,
18
20
  Tool,
@@ -0,0 +1,30 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict
4
+
5
+ import requests
6
+
7
+ from vision_agent.utils.type_defs import LandingaiAPIKey
8
+
9
+ _LOGGER = logging.getLogger(__name__)
10
+ _LND_API_KEY = LandingaiAPIKey().api_key
11
+ _LND_API_URL = "https://api.dev.landing.ai/v1/agent"
12
+
13
+
14
+ def _send_inference_request(
15
+ payload: Dict[str, Any], endpoint_name: str
16
+ ) -> Dict[str, Any]:
17
+ if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
18
+ payload["runtime_tag"] = runtime_tag
19
+ res = requests.post(
20
+ f"{_LND_API_URL}/model/{endpoint_name}",
21
+ headers={
22
+ "Content-Type": "application/json",
23
+ "apikey": _LND_API_KEY,
24
+ },
25
+ json=payload,
26
+ )
27
+ if res.status_code != 200:
28
+ _LOGGER.error(f"Request failed: {res.text}")
29
+ raise ValueError(f"Request failed: {res.text}")
30
+ return res.json()["data"] # type: ignore