vision-agent 0.2.85__tar.gz → 0.2.87__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {vision_agent-0.2.85 → vision_agent-0.2.87}/PKG-INFO +1 -1
  2. {vision_agent-0.2.85 → vision_agent-0.2.87}/pyproject.toml +1 -1
  3. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/agent/vision_agent.py +49 -40
  4. vision_agent-0.2.87/vision_agent/tools/tool_utils.py +95 -0
  5. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/utils/exceptions.py +9 -0
  6. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/utils/execute.py +11 -0
  7. vision_agent-0.2.85/vision_agent/tools/tool_utils.py +0 -67
  8. {vision_agent-0.2.85 → vision_agent-0.2.87}/LICENSE +0 -0
  9. {vision_agent-0.2.85 → vision_agent-0.2.87}/README.md +0 -0
  10. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/__init__.py +0 -0
  11. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/agent/__init__.py +0 -0
  12. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/agent/agent.py +0 -0
  13. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/agent/vision_agent_prompts.py +0 -0
  14. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/fonts/__init__.py +0 -0
  15. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  16. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/lmm/__init__.py +0 -0
  17. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/lmm/lmm.py +0 -0
  18. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/tools/__init__.py +0 -0
  19. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/tools/prompts.py +0 -0
  20. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/tools/tools.py +0 -0
  21. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/utils/__init__.py +0 -0
  22. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/utils/image_utils.py +0 -0
  23. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/utils/sim.py +0 -0
  24. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/utils/type_defs.py +0 -0
  25. {vision_agent-0.2.85 → vision_agent-0.2.87}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.85
3
+ Version: 0.2.87
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.85"
7
+ version = "0.2.87"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -172,19 +172,25 @@ def write_plans(
172
172
  def pick_plan(
173
173
  chat: List[Message],
174
174
  plans: Dict[str, Any],
175
- tool_info: str,
175
+ tool_infos: Dict[str, str],
176
176
  model: LMM,
177
177
  code_interpreter: CodeInterpreter,
178
+ test_multi_plan: bool,
178
179
  verbosity: int = 0,
179
180
  max_retries: int = 3,
180
- ) -> Tuple[str, str]:
181
+ ) -> Tuple[Any, str, str]:
182
+ if not test_multi_plan:
183
+ k = list(plans.keys())[0]
184
+ return plans[k], tool_infos[k], ""
185
+
186
+ all_tool_info = tool_infos["all"]
181
187
  chat = copy.deepcopy(chat)
182
188
  if chat[-1]["role"] != "user":
183
189
  raise ValueError("Last chat message must be from the user.")
184
190
 
185
191
  plan_str = format_plans(plans)
186
192
  prompt = TEST_PLANS.format(
187
- docstring=tool_info, plans=plan_str, previous_attempts=""
193
+ docstring=all_tool_info, plans=plan_str, previous_attempts=""
188
194
  )
189
195
 
190
196
  code = extract_code(model(prompt))
@@ -201,7 +207,7 @@ def pick_plan(
201
207
  count = 0
202
208
  while (not tool_output.success or tool_output_str == "") and count < max_retries:
203
209
  prompt = TEST_PLANS.format(
204
- docstring=tool_info,
210
+ docstring=all_tool_info,
205
211
  plans=plan_str,
206
212
  previous_attempts=PREVIOUS_FAILED.format(
207
213
  code=code, error=tool_output.text()
@@ -237,7 +243,17 @@ def pick_plan(
237
243
  best_plan = extract_json(model(chat))
238
244
  if verbosity >= 1:
239
245
  _LOGGER.info(f"Best plan:\n{best_plan}")
240
- return best_plan["best_plan"], tool_output_str
246
+
247
+ plan = best_plan["best_plan"]
248
+ if plan in plans and plan in tool_infos:
249
+ return plans[plan], tool_infos[plan], tool_output_str
250
+ else:
251
+ if verbosity >= 1:
252
+ _LOGGER.warning(
253
+ f"Best plan {plan} not found in plans or tool_infos. Using the first plan and tool info."
254
+ )
255
+ k = list(plans.keys())[0]
256
+ return plans[k], tool_infos[k], tool_output_str
241
257
 
242
258
 
243
259
  @traceable
@@ -524,6 +540,13 @@ def retrieve_tools(
524
540
  )
525
541
  all_tools = "\n\n".join(set(tool_info))
526
542
  tool_lists_unique["all"] = all_tools
543
+ log_progress(
544
+ {
545
+ "type": "tools",
546
+ "status": "completed",
547
+ "payload": tool_lists[list(plans.keys())[0]],
548
+ }
549
+ )
527
550
  return tool_lists_unique
528
551
 
529
552
 
@@ -692,6 +715,14 @@ class VisionAgent(Agent):
692
715
  self.planner,
693
716
  )
694
717
 
718
+ self.log_progress(
719
+ {
720
+ "type": "plans",
721
+ "status": "completed",
722
+ "payload": plans[list(plans.keys())[0]],
723
+ }
724
+ )
725
+
695
726
  if self.verbosity >= 1 and test_multi_plan:
696
727
  for p in plans:
697
728
  _LOGGER.info(
@@ -705,47 +736,25 @@ class VisionAgent(Agent):
705
736
  self.verbosity,
706
737
  )
707
738
 
708
- if test_multi_plan:
709
- best_plan, tool_output_str = pick_plan(
710
- int_chat,
711
- plans,
712
- tool_infos["all"],
713
- self.coder,
714
- code_interpreter,
715
- verbosity=self.verbosity,
716
- )
717
- else:
718
- best_plan = list(plans.keys())[0]
719
- tool_output_str = ""
720
-
721
- if best_plan in plans and best_plan in tool_infos:
722
- plan_i = plans[best_plan]
723
- tool_info = tool_infos[best_plan]
724
- else:
725
- if self.verbosity >= 1:
726
- _LOGGER.warning(
727
- f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
728
- )
729
- k = list(plans.keys())[0]
730
- plan_i = plans[k]
731
- tool_info = tool_infos[k]
732
-
733
- self.log_progress(
734
- {
735
- "type": "plans",
736
- "status": "completed",
737
- "payload": plan_i,
738
- }
739
+ best_plan, best_tool_info, tool_output_str = pick_plan(
740
+ int_chat,
741
+ plans,
742
+ tool_infos,
743
+ self.coder,
744
+ code_interpreter,
745
+ test_multi_plan,
746
+ verbosity=self.verbosity,
739
747
  )
748
+
740
749
  if self.verbosity >= 1:
741
750
  _LOGGER.info(
742
- f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
751
+ f"Picked best plan:\n{tabulate(tabular_data=best_plan, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
743
752
  )
744
753
 
745
754
  results = write_and_test_code(
746
755
  chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
747
- plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
748
- tool_info=tool_info,
756
+ plan="\n-" + "\n-".join([e["instructions"] for e in best_plan]),
757
+ tool_info=best_tool_info,
749
758
  tool_output=tool_output_str,
750
759
  tool_utils=T.UTILITIES_DOCSTRING,
751
760
  working_memory=working_memory,
@@ -761,7 +770,7 @@ class VisionAgent(Agent):
761
770
  code = cast(str, results["code"])
762
771
  test = cast(str, results["test"])
763
772
  working_memory.extend(results["working_memory"]) # type: ignore
764
- plan.append({"code": code, "test": test, "plan": plan_i})
773
+ plan.append({"code": code, "test": test, "plan": best_plan})
765
774
 
766
775
  execution_result = cast(Execution, results["test_result"])
767
776
  self.log_progress(
@@ -0,0 +1,95 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict, MutableMapping, Optional
4
+
5
+ from IPython.display import display
6
+ from pydantic import BaseModel
7
+ from requests import Session
8
+ from requests.adapters import HTTPAdapter
9
+ from urllib3.util.retry import Retry
10
+
11
+ from vision_agent.utils.exceptions import RemoteToolCallFailed
12
+ from vision_agent.utils.execute import Error, MimeType
13
+ from vision_agent.utils.type_defs import LandingaiAPIKey
14
+
15
+ _LOGGER = logging.getLogger(__name__)
16
+ _LND_API_KEY = LandingaiAPIKey().api_key
17
+ _LND_API_URL = "https://api.staging.landing.ai/v1/agent"
18
+
19
+
20
+ class ToolCallTrace(BaseModel):
21
+ endpoint_url: str
22
+ request: MutableMapping[str, Any]
23
+ response: MutableMapping[str, Any]
24
+ error: Optional[Error]
25
+
26
+
27
+ def send_inference_request(
28
+ payload: Dict[str, Any], endpoint_name: str
29
+ ) -> Dict[str, Any]:
30
+ try:
31
+ if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
32
+ payload["runtime_tag"] = runtime_tag
33
+
34
+ url = f"{_LND_API_URL}/model/{endpoint_name}"
35
+ if "TOOL_ENDPOINT_URL" in os.environ:
36
+ url = os.environ["TOOL_ENDPOINT_URL"]
37
+
38
+ tool_call_trace = ToolCallTrace(
39
+ endpoint_url=url,
40
+ request=payload,
41
+ response={},
42
+ error=None,
43
+ )
44
+ headers = {"Content-Type": "application/json", "apikey": _LND_API_KEY}
45
+ if "TOOL_ENDPOINT_AUTH" in os.environ:
46
+ headers["Authorization"] = os.environ["TOOL_ENDPOINT_AUTH"]
47
+ headers.pop("apikey")
48
+
49
+ session = _create_requests_session(
50
+ url=url,
51
+ num_retry=3,
52
+ headers=headers,
53
+ )
54
+ res = session.post(url, json=payload)
55
+ if res.status_code != 200:
56
+ tool_call_trace.error = Error(
57
+ name="RemoteToolCallFailed",
58
+ value=f"{res.status_code} - {res.text}",
59
+ traceback_raw=[],
60
+ )
61
+ _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
62
+ raise RemoteToolCallFailed(payload["tool"], res.status_code, res.text)
63
+
64
+ resp = res.json()
65
+ tool_call_trace.response = resp
66
+ # TODO: consider making the response schema the same between below two sources
67
+ return resp if "TOOL_ENDPOINT_AUTH" in os.environ else resp["data"] # type: ignore
68
+ finally:
69
+ trace = tool_call_trace.model_dump()
70
+ trace["type"] = "tool_call"
71
+ display({MimeType.APPLICATION_JSON: trace}, raw=True)
72
+
73
+
74
+ def _create_requests_session(
75
+ url: str, num_retry: int, headers: Dict[str, str]
76
+ ) -> Session:
77
+ """Create a requests session with retry"""
78
+ session = Session()
79
+ retries = Retry(
80
+ total=num_retry,
81
+ backoff_factor=2,
82
+ raise_on_redirect=True,
83
+ raise_on_status=False,
84
+ allowed_methods=["GET", "POST", "PUT"],
85
+ status_forcelist=[
86
+ 408, # Request Timeout
87
+ 429, # Too Many Requests (ie. rate limiter).
88
+ 502, # Bad Gateway
89
+ 503, # Service Unavailable (include cloud circuit breaker)
90
+ 504, # Gateway Timeout
91
+ ],
92
+ )
93
+ session.mount(url, HTTPAdapter(max_retries=retries if num_retry > 0 else 0))
94
+ session.headers.update(headers)
95
+ return session
@@ -13,6 +13,15 @@ For more information, see https://landing-ai.github.io/landingai-python/landinga
13
13
  return self.message
14
14
 
15
15
 
16
+ class RemoteToolCallFailed(Exception):
17
+ """Exception raised when an error occurs during a tool call."""
18
+
19
+ def __init__(self, tool_name: str, status_code: int, message: str):
20
+ self.message = (
21
+ f"""Tool call ({tool_name}) failed due to {status_code} - {message}"""
22
+ )
23
+
24
+
16
25
  class RemoteSandboxError(Exception):
17
26
  """Exception related to remote sandbox."""
18
27
 
@@ -277,6 +277,17 @@ class Error(BaseModel):
277
277
  text = "\n".join(self.traceback_raw)
278
278
  return _remove_escape_and_color_codes(text) if return_clean_text else text
279
279
 
280
+ @staticmethod
281
+ def from_exception(e: Exception) -> "Error":
282
+ """
283
+ Creates an Error object from an exception.
284
+ """
285
+ return Error(
286
+ name=e.__class__.__name__,
287
+ value=str(e),
288
+ traceback_raw=traceback.format_exception(type(e), e, e.__traceback__),
289
+ )
290
+
280
291
 
281
292
  class Execution(BaseModel):
282
293
  """
@@ -1,67 +0,0 @@
1
- import logging
2
- import os
3
- from typing import Any, Dict
4
-
5
- from requests import Session
6
- from requests.adapters import HTTPAdapter
7
- from urllib3.util.retry import Retry
8
-
9
- from vision_agent.utils.type_defs import LandingaiAPIKey
10
-
11
- _LOGGER = logging.getLogger(__name__)
12
- _LND_API_KEY = LandingaiAPIKey().api_key
13
- _LND_API_URL = "https://api.staging.landing.ai/v1/agent"
14
-
15
-
16
- def send_inference_request(
17
- payload: Dict[str, Any], endpoint_name: str
18
- ) -> Dict[str, Any]:
19
- if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
20
- payload["runtime_tag"] = runtime_tag
21
-
22
- url = f"{_LND_API_URL}/model/{endpoint_name}"
23
- if "TOOL_ENDPOINT_URL" in os.environ:
24
- url = os.environ["TOOL_ENDPOINT_URL"]
25
-
26
- headers = {"Content-Type": "application/json", "apikey": _LND_API_KEY}
27
- if "TOOL_ENDPOINT_AUTH" in os.environ:
28
- headers["Authorization"] = os.environ["TOOL_ENDPOINT_AUTH"]
29
- headers.pop("apikey")
30
-
31
- session = _create_requests_session(
32
- url=url,
33
- num_retry=3,
34
- headers=headers,
35
- )
36
- res = session.post(url, json=payload)
37
- if res.status_code != 200:
38
- _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
39
- raise ValueError(f"Request failed: {res.status_code} {res.text}")
40
-
41
- resp = res.json()
42
- # TODO: consider making the response schema the same between below two sources
43
- return resp if "TOOL_ENDPOINT_AUTH" in os.environ else resp["data"] # type: ignore
44
-
45
-
46
- def _create_requests_session(
47
- url: str, num_retry: int, headers: Dict[str, str]
48
- ) -> Session:
49
- """Create a requests session with retry"""
50
- session = Session()
51
- retries = Retry(
52
- total=num_retry,
53
- backoff_factor=2,
54
- raise_on_redirect=True,
55
- raise_on_status=False,
56
- allowed_methods=["GET", "POST", "PUT"],
57
- status_forcelist=[
58
- 408, # Request Timeout
59
- 429, # Too Many Requests (ie. rate limiter).
60
- 502, # Bad Gateway
61
- 503, # Service Unavailable (include cloud circuit breaker)
62
- 504, # Gateway Timeout
63
- ],
64
- )
65
- session.mount(url, HTTPAdapter(max_retries=retries if num_retry > 0 else 0))
66
- session.headers.update(headers)
67
- return session
File without changes
File without changes