vision-agent 1.0.5__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {vision_agent-1.0.5 → vision_agent-1.0.7}/PKG-INFO +1 -1
  2. {vision_agent-1.0.5 → vision_agent-1.0.7}/pyproject.toml +1 -1
  3. vision_agent-1.0.7/vision_agent/agent/__init__.py +4 -0
  4. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/tools/__init__.py +0 -6
  5. vision_agent-1.0.7/vision_agent/tools/meta_tools.py +200 -0
  6. vision_agent-1.0.5/vision_agent/agent/__init__.py +0 -20
  7. vision_agent-1.0.5/vision_agent/agent/vision_agent.py +0 -605
  8. vision_agent-1.0.5/vision_agent/agent/vision_agent_coder.py +0 -742
  9. vision_agent-1.0.5/vision_agent/agent/vision_agent_coder_prompts.py +0 -290
  10. vision_agent-1.0.5/vision_agent/agent/vision_agent_planner.py +0 -564
  11. vision_agent-1.0.5/vision_agent/agent/vision_agent_planner_prompts.py +0 -199
  12. vision_agent-1.0.5/vision_agent/agent/vision_agent_prompts.py +0 -312
  13. vision_agent-1.0.5/vision_agent/tools/meta_tools.py +0 -691
  14. {vision_agent-1.0.5 → vision_agent-1.0.7}/LICENSE +0 -0
  15. {vision_agent-1.0.5 → vision_agent-1.0.7}/README.md +0 -0
  16. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/.sim_tools/df.csv +0 -0
  17. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/.sim_tools/embs.npy +0 -0
  18. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/__init__.py +0 -0
  19. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/README.md +0 -0
  20. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/agent.py +0 -0
  21. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
  22. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
  23. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
  24. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
  25. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
  26. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/agent/vision_agent_v2.py +0 -0
  27. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/clients/__init__.py +0 -0
  28. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/clients/http.py +0 -0
  29. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/configs/__init__.py +0 -0
  30. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/configs/anthropic_config.py +0 -0
  31. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/configs/config.py +0 -0
  32. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/configs/openai_config.py +0 -0
  33. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/fonts/__init__.py +0 -0
  34. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  35. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/lmm/__init__.py +0 -0
  36. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/lmm/lmm.py +0 -0
  37. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/models/__init__.py +0 -0
  38. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/models/agent_types.py +0 -0
  39. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/models/lmm_types.py +0 -0
  40. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/models/tools_types.py +0 -0
  41. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/sim/__init__.py +0 -0
  42. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/sim/sim.py +0 -0
  43. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/tools/planner_tools.py +0 -0
  44. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/tools/prompts.py +0 -0
  45. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/tools/tools.py +0 -0
  46. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/__init__.py +0 -0
  47. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/agent.py +0 -0
  48. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/exceptions.py +0 -0
  49. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/execute.py +0 -0
  50. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/image_utils.py +0 -0
  51. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/tools.py +0 -0
  52. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/tools_doc.py +0 -0
  53. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/video.py +0 -0
  54. {vision_agent-1.0.5 → vision_agent-1.0.7}/vision_agent/utils/video_tracking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "1.0.5"
7
+ version = "1.0.7"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -0,0 +1,4 @@
1
+ from .agent import Agent, AgentCoder, AgentPlanner
2
+ from .vision_agent_coder_v2 import VisionAgentCoderV2
3
+ from .vision_agent_planner_v2 import VisionAgentPlannerV2
4
+ from .vision_agent_v2 import VisionAgentV2
@@ -1,13 +1,7 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
3
  from .meta_tools import (
4
- create_code_artifact,
5
- edit_code_artifact,
6
- edit_vision_code,
7
- generate_vision_code,
8
4
  get_tool_descriptions,
9
- list_artifacts,
10
- open_code_artifact,
11
5
  view_media_artifact,
12
6
  )
13
7
  from .planner_tools import judge_od_results
@@ -0,0 +1,200 @@
1
+ import difflib
2
+ import os
3
+ import re
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Tuple, Union
6
+
7
+ from IPython.display import display
8
+
9
+ from vision_agent.tools.tools import get_tools_descriptions as _get_tool_descriptions
10
+ from vision_agent.utils.execute import Execution, MimeType
11
+ from vision_agent.utils.tools_doc import get_tool_documentation
12
+
13
+ CURRENT_FILE = None
14
+ CURRENT_LINE = 0
15
+ DEFAULT_WINDOW_SIZE = 100
16
+ ZMQ_PORT = os.environ.get("ZMQ_PORT", None)
17
+
18
+
19
+ def report_progress_callback(port: int, inp: Dict[str, Any]) -> None:
20
+ import zmq
21
+
22
+ context = zmq.Context()
23
+ socket = context.socket(zmq.PUSH)
24
+ socket.connect(f"tcp://localhost:{port}")
25
+ socket.send_json(inp)
26
+
27
+
28
+ def redisplay_results(execution: Execution) -> None:
29
+ """This function is used to add previous execution results to the current output.
30
+ This is handy if you are inside a notebook environment, call it notebook1, and you
31
+ have a nested notebook environment, call it notebook2, and you want the execution
32
+ results from notebook2 to be included in the execution results for notebook1.
33
+ """
34
+ for result in execution.results:
35
+ if result.text is not None:
36
+ display({MimeType.TEXT_PLAIN: result.text}, raw=True)
37
+ if result.html is not None:
38
+ display({MimeType.TEXT_HTML: result.html}, raw=True)
39
+ if result.markdown is not None:
40
+ display({MimeType.TEXT_MARKDOWN: result.markdown}, raw=True)
41
+ if result.svg is not None:
42
+ display({MimeType.IMAGE_SVG: result.svg}, raw=True)
43
+ if result.png is not None:
44
+ display({MimeType.IMAGE_PNG: result.png}, raw=True)
45
+ if result.jpeg is not None:
46
+ display({MimeType.IMAGE_JPEG: result.jpeg}, raw=True)
47
+ if result.mp4 is not None:
48
+ display({MimeType.VIDEO_MP4_B64: result.mp4}, raw=True)
49
+ if result.latex is not None:
50
+ display({MimeType.TEXT_LATEX: result.latex}, raw=True)
51
+ if result.json is not None:
52
+ display({MimeType.APPLICATION_JSON: result.json}, raw=True)
53
+ if result.artifact is not None:
54
+ display({MimeType.APPLICATION_ARTIFACT: result.artifact}, raw=True)
55
+ if result.extra is not None:
56
+ display(result.extra, raw=True)
57
+
58
+
59
+ class Artifacts:
60
+ """Artifacts is a class that allows you to sync files between a local and remote
61
+ environment. In our case, the remote environment could be where the VisionAgent is
62
+ executing code and as the user adds new images, files or modifies files, those
63
+ need to be in sync with the remote environment the VisionAgent is running in.
64
+ """
65
+
66
+ def __init__(self, cwd: Union[str, Path]) -> None:
67
+ """Initializes the Artifacts object with it's remote and local save paths.
68
+
69
+ Parameters:
70
+ cwd (Union[str, Path]): The path to save all the chat related files. For example "/home/user/chat_abc/".
71
+ """
72
+ self.cwd = Path(cwd)
73
+
74
+ def show(self) -> str:
75
+ """Prints out all the files in the curret working directory"""
76
+ output_str = "[Artifacts loaded]\n"
77
+ for k in self:
78
+ output_str += f"Artifact name: {k}, loaded to path: {str(self.cwd / k)}\n"
79
+ output_str += "[End of artifacts]\n"
80
+ print(output_str)
81
+ return output_str
82
+
83
+ def __iter__(self) -> Any:
84
+ return iter(os.listdir(self.cwd))
85
+
86
+ def __getitem__(self, name: str) -> Any:
87
+ file_path = self.cwd / name
88
+ if file_path.exists():
89
+ with open(file_path, "r") as file:
90
+ return file.read()
91
+ else:
92
+ raise KeyError(f"File '{name}' not found in artifacts")
93
+
94
+ def __setitem__(self, name: str, value: Any) -> None:
95
+ file_path = self.cwd / name
96
+ with open(file_path, "w") as file:
97
+ file.write(value)
98
+
99
+ def __contains__(self, name: str) -> bool:
100
+ return name in os.listdir(self.cwd)
101
+
102
+
103
+ def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
104
+ file_name_p = Path(file_name)
105
+ return (
106
+ file_name_p.is_file()
107
+ and "__pycache__" not in str(file_name_p)
108
+ and not file_name_p.name.startswith(".")
109
+ and file_name_p.suffix
110
+ in [".png", ".jpeg", ".jpg", ".mp4", ".txt", ".json", ".csv"]
111
+ ), file_name_p.suffix in [".png", ".jpeg", ".jpg", ".mp4"]
112
+
113
+
114
+ # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
115
+
116
+
117
+ def format_lines(lines: List[str], start_idx: int) -> str:
118
+ output = ""
119
+ for i, line in enumerate(lines):
120
+ output += f"{i + start_idx}|{line}"
121
+ return output
122
+
123
+
124
+ def view_lines(
125
+ lines: List[str],
126
+ line_num: int,
127
+ window_size: int,
128
+ name: str,
129
+ total_lines: int,
130
+ print_output: bool = True,
131
+ ) -> str:
132
+ start = max(0, line_num - window_size)
133
+ end = min(len(lines), line_num + window_size)
134
+ return_str = (
135
+ f"[Artifact: {name} ({total_lines} lines total)]\n"
136
+ + format_lines(lines[start:end], start)
137
+ + (
138
+ "\n[End of artifact]"
139
+ if end == len(lines)
140
+ else f"\n[{len(lines) - end} more lines]"
141
+ )
142
+ )
143
+
144
+ if print_output:
145
+ print(return_str)
146
+ return return_str
147
+
148
+
149
+ def check_and_load_image(code: str) -> List[str]:
150
+ if not code.strip():
151
+ return []
152
+
153
+ pattern = r"view_media_artifact\(\s*([^\)]+),\s*['\"]([^\)]+)['\"]\s*\)"
154
+ matches = re.findall(pattern, code)
155
+ return [match[1] for match in matches]
156
+
157
+
158
+ def view_media_artifact(artifacts: Artifacts, name: str) -> str:
159
+ """Allows only the agent to view the media artifact with the given name. DO NOT use
160
+ this to show media to the user, the user can already see all media saved in the
161
+ artifacts.
162
+
163
+ Parameters:
164
+ artifacts (Artifacts): The artifacts object to show the image from.
165
+ name (str): The name of the image artifact to show.
166
+ """
167
+ if name not in artifacts:
168
+ output_str = f"[Artifact {name} does not exist]"
169
+ else:
170
+ output_str = f"[Image {name} displayed]"
171
+ print(output_str)
172
+ return output_str
173
+
174
+
175
+ def get_tool_descriptions() -> str:
176
+ """Returns a description of all the tools that `generate_vision_code` has access to.
177
+ Helpful for answering questions about what types of vision tasks you can do with
178
+ `generate_vision_code`."""
179
+ return _get_tool_descriptions()
180
+
181
+
182
+ def get_diff(before: str, after: str) -> str:
183
+ return "".join(
184
+ difflib.unified_diff(
185
+ before.splitlines(keepends=True), after.splitlines(keepends=True)
186
+ )
187
+ )
188
+
189
+
190
+ def get_diff_with_prompts(name: str, before: str, after: str) -> str:
191
+ diff = get_diff(before, after)
192
+ return f"[Artifact {name} edits]\n{diff}\n[End of edits]"
193
+
194
+
195
+ META_TOOL_DOCSTRING = get_tool_documentation(
196
+ [
197
+ get_tool_descriptions,
198
+ view_media_artifact,
199
+ ]
200
+ )
@@ -1,20 +0,0 @@
1
- from .agent import Agent, AgentCoder, AgentPlanner
2
- from .vision_agent import VisionAgent
3
- from .vision_agent_coder import (
4
- AnthropicVisionAgentCoder,
5
- AzureVisionAgentCoder,
6
- OllamaVisionAgentCoder,
7
- OpenAIVisionAgentCoder,
8
- VisionAgentCoder,
9
- )
10
- from .vision_agent_coder_v2 import VisionAgentCoderV2
11
- from .vision_agent_planner import (
12
- AnthropicVisionAgentPlanner,
13
- AzureVisionAgentPlanner,
14
- OllamaVisionAgentPlanner,
15
- OpenAIVisionAgentPlanner,
16
- PlanContext,
17
- VisionAgentPlanner,
18
- )
19
- from .vision_agent_planner_v2 import VisionAgentPlannerV2
20
- from .vision_agent_v2 import VisionAgentV2