vision-agent 0.2.23__tar.gz → 0.2.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {vision_agent-0.2.23 → vision_agent-0.2.24}/PKG-INFO +4 -2
  2. {vision_agent-0.2.23 → vision_agent-0.2.24}/pyproject.toml +4 -2
  3. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/agent_coder.py +19 -5
  4. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/vision_agent_v2.py +9 -2
  5. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/vision_agent_v2_prompt.py +4 -3
  6. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/llm/llm.py +7 -4
  7. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/tools/tools_v2.py +28 -1
  8. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/utils/sim.py +6 -1
  9. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/utils/type_defs.py +1 -1
  10. {vision_agent-0.2.23 → vision_agent-0.2.24}/LICENSE +0 -0
  11. {vision_agent-0.2.23 → vision_agent-0.2.24}/README.md +0 -0
  12. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/__init__.py +0 -0
  13. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/__init__.py +0 -0
  14. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/agent.py +0 -0
  15. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/agent_coder_prompts.py +0 -0
  16. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/easytool.py +0 -0
  17. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/easytool_prompts.py +0 -0
  18. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/reflexion.py +0 -0
  19. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/reflexion_prompts.py +0 -0
  20. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/vision_agent.py +0 -0
  21. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/agent/vision_agent_prompts.py +0 -0
  22. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/fonts/__init__.py +0 -0
  23. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  24. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/llm/__init__.py +0 -0
  25. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/lmm/__init__.py +0 -0
  26. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/lmm/lmm.py +0 -0
  27. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/tools/__init__.py +0 -0
  28. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/tools/prompts.py +0 -0
  29. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/tools/tool_utils.py +0 -0
  30. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/tools/tools.py +0 -0
  31. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/utils/__init__.py +0 -0
  32. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/utils/image_utils.py +0 -0
  34. {vision_agent-0.2.23 → vision_agent-0.2.24}/vision_agent/utils/video.py +0 -0
@@ -1,14 +1,16 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.23
3
+ Version: 0.2.24
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
7
- Requires-Python: >=3.9
7
+ Requires-Python: >=3.9,<4.0
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
+ Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
13
+ Requires-Dist: langsmith (>=0.1.58,<0.2.0)
12
14
  Requires-Dist: moviepy (>=1.0.0,<2.0.0)
13
15
  Requires-Dist: nbclient (>=0.10.0,<0.11.0)
14
16
  Requires-Dist: nbformat (>=5.10.4,<6.0.0)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.23"
7
+ version = "0.2.24"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -16,7 +16,7 @@ packages = [{include = "vision_agent"}]
16
16
  "documentation" = "https://github.com/landing-ai/vision-agent"
17
17
 
18
18
  [tool.poetry.dependencies] # main dependency group
19
- python = ">=3.9"
19
+ python = ">=3.9,<4.0"
20
20
  numpy = ">=1.21.0,<2.0.0"
21
21
  pillow = "10.*"
22
22
  requests = "2.*"
@@ -32,6 +32,8 @@ scipy = "1.13.*"
32
32
  nbclient = "^0.10.0"
33
33
  nbformat = "^5.10.4"
34
34
  rich = "^13.7.1"
35
+ langsmith = "^0.1.58"
36
+ ipykernel = "^6.29.4"
35
37
 
36
38
  [tool.poetry.group.dev.dependencies]
37
39
  autoflake = "1.*"
@@ -5,6 +5,9 @@ import sys
5
5
  from pathlib import Path
6
6
  from typing import Dict, List, Optional, Union
7
7
 
8
+ from rich.console import Console
9
+ from rich.syntax import Syntax
10
+
8
11
  from vision_agent.agent import Agent
9
12
  from vision_agent.agent.agent_coder_prompts import (
10
13
  DEBUG,
@@ -40,6 +43,7 @@ from vision_agent.tools.tools_v2 import *
40
43
  logging.basicConfig(stream=sys.stdout)
41
44
  _LOGGER = logging.getLogger(__name__)
42
45
  _EXECUTE = Execute()
46
+ _CONSOLE = Console()
43
47
 
44
48
 
45
49
  def write_tests(question: str, code: str, model: LLM) -> str:
@@ -103,7 +107,7 @@ def run_visual_tests(
103
107
 
104
108
 
105
109
  def fix_bugs(code: str, tests: str, result: str, feedback: str, model: LLM) -> str:
106
- prompt = FIX_BUG.format(completion=code, test_case=tests, result=result)
110
+ prompt = FIX_BUG.format(code=code, tests=tests, result=result, feedback=feedback)
107
111
  completion = model(prompt)
108
112
  return preprocess_data(completion)
109
113
 
@@ -139,7 +143,8 @@ class AgentCoder(Agent):
139
143
  else visual_tester_agent
140
144
  )
141
145
  self.max_turns = 3
142
- if verbose:
146
+ self.verbose = verbose
147
+ if self.verbose:
143
148
  _LOGGER.setLevel(logging.INFO)
144
149
 
145
150
  def __call__(
@@ -164,9 +169,15 @@ class AgentCoder(Agent):
164
169
  feedback = ""
165
170
  for _ in range(self.max_turns):
166
171
  code = write_program(question, feedback, self.coder_agent)
167
- _LOGGER.info(f"code:\n{code}")
172
+ if self.verbose:
173
+ _CONSOLE.print(
174
+ Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
175
+ )
168
176
  debug = write_debug(question, code, feedback, self.tester_agent)
169
- _LOGGER.info(f"debug:\n{debug}")
177
+ if self.verbose:
178
+ _CONSOLE.print(
179
+ Syntax(debug, "python", theme="gruvbox-dark", line_numbers=True)
180
+ )
170
181
  results = execute_tests(code, debug)
171
182
  _LOGGER.info(
172
183
  f"execution results: passed: {results['passed']}\n{results['result']}"
@@ -176,7 +187,10 @@ class AgentCoder(Agent):
176
187
  code = fix_bugs(
177
188
  code, debug, results["result"].strip(), feedback, self.coder_agent # type: ignore
178
189
  )
179
- _LOGGER.info(f"fixed code:\n{code}")
190
+ if self.verbose:
191
+ _CONSOLE.print(
192
+ Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
193
+ )
180
194
  else:
181
195
  # TODO: Sometimes it prints nothing, so we need to handle that case
182
196
  # TODO: The visual agent reflection does not work very well, needs more testing
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
5
5
 
6
6
  import pandas as pd
7
+ from langsmith import traceable
7
8
  from rich.console import Console
8
9
  from rich.syntax import Syntax
9
10
  from tabulate import tabulate
@@ -66,6 +67,7 @@ def extract_json(json_str: str) -> Dict[str, Any]:
66
67
  return json_dict # type: ignore
67
68
 
68
69
 
70
+ @traceable(name="planning")
69
71
  def write_plan(
70
72
  chat: List[Dict[str, str]],
71
73
  plan: Optional[List[Dict[str, Any]]],
@@ -214,6 +216,7 @@ def write_and_exec_code(
214
216
  return success, code, result, working_memory
215
217
 
216
218
 
219
+ @traceable(name="plan execution")
217
220
  def run_plan(
218
221
  user_req: str,
219
222
  plan: List[Dict[str, Any]],
@@ -235,7 +238,7 @@ def run_plan(
235
238
  f"""
236
239
  {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
237
240
  )
238
- tools = tool_recommender.top_k(task["instruction"])
241
+ tools = tool_recommender.top_k(task["instruction"], thresh=0.3)
239
242
  tool_info = "\n".join([e["doc"] for e in tools])
240
243
 
241
244
  if verbosity == 2:
@@ -285,6 +288,7 @@ class VisionAgentV2(Agent):
285
288
  solve vision tasks. It is inspired by MetaGPT's Data Interpreter
286
289
  https://arxiv.org/abs/2402.18679. Vision Agent has several key features to help it
287
290
  generate code:
291
+
288
292
  - A planner to generate a plan of tasks to solve a user requirement. The planner
289
293
  can output code tasks or test tasks, where test tasks are used to verify the code.
290
294
  - Automatic debugging, if a task fails, the agent will attempt to debug the code
@@ -333,6 +337,7 @@ class VisionAgentV2(Agent):
333
337
  results = self.chat_with_workflow(input, image, plan)
334
338
  return results["code"] # type: ignore
335
339
 
340
+ @traceable
336
341
  def chat_with_workflow(
337
342
  self,
338
343
  chat: List[Dict[str, str]],
@@ -377,7 +382,9 @@ class VisionAgentV2(Agent):
377
382
  self.long_term_memory,
378
383
  self.verbosity,
379
384
  )
380
- success = all(task["success"] for task in plan)
385
+ success = all(
386
+ task["success"] if "success" in task else False for task in plan
387
+ )
381
388
  working_memory.update(working_memory_i)
382
389
 
383
390
  if not success:
@@ -34,7 +34,7 @@ PLAN = """
34
34
 
35
35
  # Task:
36
36
  Based on the context and the tools you have available, write a plan of subtasks to achieve the user request that adhere to the following requirements:
37
- - For each subtask, you should provide a short instruction on what to do. Ensure the subtasks are large enough to be meaningful, encompassing multiple lines of code.
37
+ - For each subtask, you should provide instructions on what to do. Write detailed subtasks, ensure they are large enough to be meaningful, encompassing multiple lines of code.
38
38
  - You do not need to have the agent rewrite any tool functionality you already have, you should instead instruct it to utilize one or more of those tools in each subtask.
39
39
  - You can have agents either write coding tasks, to code some functionality or testing tasks to test previous functionality.
40
40
  - If a current plan exists, examine each item in the plan to determine if it was successful. If there was an item that failed, i.e. 'success': False, then you should rewrite that item and all subsequent items to ensure that the rewritten plan is successful.
@@ -73,9 +73,10 @@ CODE = """
73
73
  {code}
74
74
 
75
75
  # Constraints
76
- - Write a function that accomplishes the 'User Requirement'. You are supplied code from a previous task under 'Previous Code', feel free to copy over that code into your own implementation if you need it.
77
- - Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info for Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
76
+ - Write a function that accomplishes the 'Current Subtask'. You are supplied code from a previous task under 'Previous Code', do not delete or change previous code unless it contains a bug or it is necessary to complete the 'Current Subtask'.
77
+ - Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
78
78
  - You may recieve previous trials and errors under 'Previous Task', this is code, output and reflections from previous tasks. You can use these to avoid running in to the same issues when writing your code.
79
+ - Use the `save_json` function from `vision_agent.tools.tools_v2` to save your output as a json file.
79
80
  - Write clean, readable, and well-documented code.
80
81
 
81
82
  # Output
@@ -3,6 +3,7 @@ import os
3
3
  from abc import ABC, abstractmethod
4
4
  from typing import Any, Callable, Dict, List, Mapping, Optional, Union, cast
5
5
 
6
+ from langsmith.wrappers import wrap_openai
6
7
  from openai import AzureOpenAI, OpenAI
7
8
 
8
9
  from vision_agent.tools import (
@@ -41,9 +42,9 @@ class OpenAILLM(LLM):
41
42
  **kwargs: Any
42
43
  ):
43
44
  if not api_key:
44
- self.client = OpenAI()
45
+ self.client = wrap_openai(OpenAI())
45
46
  else:
46
- self.client = OpenAI(api_key=api_key)
47
+ self.client = wrap_openai(OpenAI(api_key=api_key))
47
48
 
48
49
  self.model_name = model_name
49
50
  self.system_prompt = system_prompt
@@ -165,8 +166,10 @@ class AzureOpenAILLM(OpenAILLM):
165
166
  if not azure_endpoint:
166
167
  raise ValueError("Azure OpenAI endpoint is required.")
167
168
 
168
- self.client = AzureOpenAI(
169
- api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
169
+ self.client = wrap_openai(
170
+ AzureOpenAI(
171
+ api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
172
+ )
170
173
  )
171
174
  self.model_name = model_name
172
175
  self.kwargs = kwargs
@@ -1,5 +1,6 @@
1
1
  import inspect
2
2
  import io
3
+ import json
3
4
  import logging
4
5
  import tempfile
5
6
  from importlib import resources
@@ -285,6 +286,31 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
285
286
  # Utility and visualization functions
286
287
 
287
288
 
289
+ def save_json(data: Any, file_path: str) -> None:
290
+ """'save_json' is a utility function that saves data as a JSON file. It is helpful
291
+ for saving data that contains NumPy arrays which are not JSON serializable.
292
+
293
+ Parameters:
294
+ data (Any): The data to save.
295
+ file_path (str): The path to save the JSON file.
296
+
297
+ Example
298
+ -------
299
+ >>> save_json(data, "path/to/file.json")
300
+ """
301
+
302
+ class NumpyEncoder(json.JSONEncoder):
303
+ def default(self, obj: Any): # type: ignore
304
+ if isinstance(obj, np.ndarray):
305
+ return obj.tolist()
306
+ elif isinstance(obj, np.bool_):
307
+ return bool(obj)
308
+ return json.JSONEncoder.default(self, obj)
309
+
310
+ with open(file_path, "w") as f:
311
+ json.dump(data, f, cls=NumpyEncoder)
312
+
313
+
288
314
  def load_image(image_path: str) -> np.ndarray:
289
315
  """'load_image' is a utility function that loads an image from the given path.
290
316
 
@@ -480,6 +506,7 @@ TOOLS = [
480
506
  ocr,
481
507
  closest_mask_distance,
482
508
  closest_box_distance,
509
+ save_json,
483
510
  load_image,
484
511
  save_image,
485
512
  overlay_bounding_boxes,
@@ -489,5 +516,5 @@ TOOLS_DF = get_tools_df(TOOLS) # type: ignore
489
516
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
490
517
  TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
491
518
  UTILITIES_DOCSTRING = get_tool_documentation(
492
- [load_image, save_image, overlay_bounding_boxes]
519
+ [save_json, load_image, save_image, overlay_bounding_boxes]
493
520
  )
@@ -56,12 +56,15 @@ class Sim:
56
56
  df = df.drop("embs", axis=1)
57
57
  df.to_csv(sim_file / "df.csv", index=False)
58
58
 
59
- def top_k(self, query: str, k: int = 5) -> Sequence[Dict]:
59
+ def top_k(
60
+ self, query: str, k: int = 5, thresh: Optional[float] = None
61
+ ) -> Sequence[Dict]:
60
62
  """Returns the top k most similar items to the query.
61
63
 
62
64
  Parameters:
63
65
  query: str: The query to compare to.
64
66
  k: int: The number of items to return.
67
+ thresh: Optional[float]: The minimum similarity threshold.
65
68
 
66
69
  Returns:
67
70
  Sequence[Dict]: The top k most similar items.
@@ -70,6 +73,8 @@ class Sim:
70
73
  embedding = get_embedding(self.client, query, model=self.model)
71
74
  self.df["sim"] = self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
72
75
  res = self.df.sort_values("sim", ascending=False).head(k)
76
+ if thresh is not None:
77
+ res = res[res.sim > thresh]
73
78
  return res[[c for c in res.columns if c != "embs"]].to_dict(orient="records")
74
79
 
75
80
 
@@ -12,7 +12,7 @@ class LandingaiAPIKey(BaseSettings):
12
12
  """
13
13
 
14
14
  api_key: str = Field(
15
- default="land_sk_hw34v3tyEc35OAhP8F7hnGnrDv2C8hD2ycMyq0aMkVS1H40D22",
15
+ default="land_sk_PCRPYKqB3cq0JWGY83hjEk33SWSDOwdNoyUjTgCDMZO4NxeCXW",
16
16
  alias="LANDINGAI_API_KEY",
17
17
  description="The API key of LandingAI.",
18
18
  )
File without changes
File without changes