vision-agent 0.2.193__py3-none-any.whl → 0.2.196__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,22 @@
1
+ import copy
1
2
  import json
2
3
  import logging
3
4
  import re
4
5
  import sys
5
- from typing import Any, Dict, List, Optional
6
+ import tempfile
7
+ from typing import Any, Dict, List, Optional, Tuple, cast
6
8
 
9
+ import libcst as cst
10
+ from pydantic import BaseModel
7
11
  from rich.console import Console
8
12
  from rich.style import Style
9
13
  from rich.syntax import Syntax
14
+ from rich.table import Table
10
15
 
11
16
  import vision_agent.tools as T
17
+ from vision_agent.lmm.types import Message
18
+ from vision_agent.utils.execute import CodeInterpreter, Execution
19
+ from vision_agent.utils.image_utils import b64_to_pil, convert_to_b64
12
20
 
13
21
  logging.basicConfig(stream=sys.stdout)
14
22
  _LOGGER = logging.getLogger(__name__)
@@ -16,6 +24,19 @@ _CONSOLE = Console()
16
24
  _MAX_TABULATE_COL_WIDTH = 80
17
25
 
18
26
 
27
+ class PlanContext(BaseModel):
28
+ plan: str
29
+ instructions: List[str]
30
+ code: str
31
+
32
+
33
+ class CodeContext(BaseModel):
34
+ code: str
35
+ test: str
36
+ success: bool
37
+ test_result: Execution
38
+
39
+
19
40
  def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
20
41
  json_pattern = r"\{.*\}"
21
42
  match = re.search(json_pattern, json_str, re.DOTALL)
@@ -121,7 +142,7 @@ def remove_installs_from_code(code: str) -> str:
121
142
  return code
122
143
 
123
144
 
124
- def format_memory(memory: List[Dict[str, str]]) -> str:
145
+ def format_feedback(memory: List[Dict[str, str]]) -> str:
125
146
  output_str = ""
126
147
  for i, m in enumerate(memory):
127
148
  output_str += f"### Feedback {i}:\n"
@@ -134,6 +155,16 @@ def format_memory(memory: List[Dict[str, str]]) -> str:
134
155
  return output_str
135
156
 
136
157
 
158
+ def format_plan_v2(plan: PlanContext) -> str:
159
+ plan_str = plan.plan + "\n"
160
+ plan_str += "Instructions:\n"
161
+ for v in plan.instructions:
162
+ plan_str += f" - {v}\n"
163
+ plan_str += "Code:\n"
164
+ plan_str += plan.code
165
+ return plan_str
166
+
167
+
137
168
  def format_plans(plans: Dict[str, Any]) -> str:
138
169
  plan_str = ""
139
170
  for k, v in plans.items():
@@ -172,12 +203,189 @@ def print_code(title: str, code: str, test: Optional[str] = None) -> None:
172
203
  _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
173
204
  _CONSOLE.print(
174
205
  Syntax(
175
- DefaultImports.prepend_imports(code),
206
+ code,
176
207
  "python",
177
208
  theme="gruvbox-dark",
178
209
  line_numbers=True,
210
+ word_wrap=True,
179
211
  )
180
212
  )
181
213
  if test:
182
214
  _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
183
215
  _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
216
+
217
+
218
+ def print_table(title: str, columns: List[str], rows: List[List[str]]) -> None:
219
+ table = Table(title=title, show_header=True, header_style="bold magenta")
220
+ for col in columns:
221
+ table.add_column(col, style="cyan", no_wrap=True)
222
+
223
+ for i, row in enumerate(rows):
224
+ table.add_row(*row)
225
+ if i < len(rows) - 1:
226
+ table.add_row(*["-" * len(col) for col in row])
227
+ _CONSOLE.print(table)
228
+
229
+
230
+ def add_media_to_chat(
231
+ chat: List[Message], code_interpreter: CodeInterpreter
232
+ ) -> Tuple[List[Message], List[Message], List[str]]:
233
+ orig_chat = copy.deepcopy(chat)
234
+ int_chat = copy.deepcopy(chat)
235
+ media_list = []
236
+ for chat_i in int_chat:
237
+ if "media" in chat_i:
238
+ media_list_i = []
239
+ for media in chat_i["media"]:
240
+ if isinstance(media, str) and media.startswith("data:image/"):
241
+ media_pil = b64_to_pil(media)
242
+ with tempfile.NamedTemporaryFile(
243
+ mode="wb", suffix=".png", delete=False
244
+ ) as temp_file:
245
+ media_pil.save(temp_file, format="PNG")
246
+ media = str(temp_file.name)
247
+ media = str(code_interpreter.upload_file(media)) # type: ignore
248
+ media_list_i.append(media)
249
+ # don't duplicate appending media name
250
+ if not str(chat_i["content"]).endswith(f" Media name {media}"):
251
+ chat_i["content"] += f" Media name {media}" # type: ignore
252
+ chat_i["media"] = media_list_i
253
+ media_list.extend(media_list_i)
254
+
255
+ int_chat = cast(
256
+ List[Message],
257
+ [
258
+ (
259
+ {
260
+ "role": c["role"],
261
+ "content": c["content"],
262
+ "media": c["media"],
263
+ }
264
+ if "media" in c
265
+ else {"role": c["role"], "content": c["content"]}
266
+ )
267
+ for c in int_chat
268
+ ],
269
+ )
270
+ return int_chat, orig_chat, media_list
271
+
272
+
273
+ def capture_media_from_exec(execution: Execution) -> List[str]:
274
+ images = []
275
+ for result in execution.results:
276
+ for format in result.formats():
277
+ if format in ["png", "jpeg"]:
278
+ # converts the image to png and then to base64
279
+ images.append(
280
+ "data:image/png;base64,"
281
+ + convert_to_b64(b64_to_pil(result[format]))
282
+ )
283
+ return images
284
+
285
+
286
+ def strip_function_calls( # noqa: C901
287
+ code: str, exclusions: Optional[List[str]] = None
288
+ ) -> str:
289
+ """This will strip out all code that calls functions except for functions included
290
+ in exclusions.
291
+ """
292
+ if exclusions is None:
293
+ exclusions = []
294
+
295
+ def check_and_remove_node(node: cst.CSTNode, exclusions: List[str]) -> cst.CSTNode:
296
+ if hasattr(node, "value") and isinstance(node.value, cst.Call):
297
+ if (
298
+ isinstance(node.value.func, cst.Name)
299
+ and node.value.func.value in exclusions
300
+ ):
301
+ return node
302
+ return cst.RemoveFromParent() # type: ignore
303
+ return node
304
+
305
+ class StripFunctionCallsTransformer(cst.CSTTransformer):
306
+ def __init__(self, exclusions: List[str]):
307
+ # Store exclusions to skip removing certain function calls
308
+ self.exclusions = exclusions
309
+ self.in_function_or_class = False
310
+
311
+ def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]:
312
+ self.in_function_or_class = True
313
+ return True
314
+
315
+ def leave_FunctionDef(
316
+ self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef
317
+ ) -> cst.BaseStatement:
318
+ self.in_function_or_class = False
319
+ return updated_node
320
+
321
+ def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]:
322
+ self.in_function_or_class = True
323
+ return True
324
+
325
+ def leave_ClassDef(
326
+ self, node: cst.ClassDef, updated_node: cst.ClassDef
327
+ ) -> cst.BaseStatement:
328
+ self.in_function_or_class = False
329
+ return updated_node
330
+
331
+ def leave_Expr(
332
+ self, original_node: cst.Expr, updated_node: cst.Expr
333
+ ) -> cst.Expr:
334
+ if not self.in_function_or_class:
335
+ return cast(
336
+ cst.Expr, check_and_remove_node(updated_node, self.exclusions)
337
+ )
338
+ return updated_node
339
+
340
+ def leave_Assign(
341
+ self, original_node: cst.Assign, updated_node: cst.Assign
342
+ ) -> cst.Assign:
343
+ if not self.in_function_or_class:
344
+ return cast(
345
+ cst.Assign, check_and_remove_node(updated_node, self.exclusions)
346
+ )
347
+ return updated_node
348
+
349
+ def leave_If(self, original_node: cst.If, updated_node: cst.If) -> cst.If:
350
+ if not self.in_function_or_class:
351
+ return cast(
352
+ cst.If, check_and_remove_node(updated_node, self.exclusions)
353
+ )
354
+ return updated_node
355
+
356
+ def leave_For(self, original_node: cst.For, updated_node: cst.For) -> cst.For:
357
+ if not self.in_function_or_class:
358
+ return cast(
359
+ cst.For, check_and_remove_node(updated_node, self.exclusions)
360
+ )
361
+ return updated_node
362
+
363
+ def leave_While(
364
+ self, original_node: cst.While, updated_node: cst.While
365
+ ) -> cst.While:
366
+ if not self.in_function_or_class:
367
+ return cast(
368
+ cst.While, check_and_remove_node(updated_node, self.exclusions)
369
+ )
370
+ return updated_node
371
+
372
+ def leave_With(
373
+ self, original_node: cst.With, updated_node: cst.With
374
+ ) -> cst.With:
375
+ if not self.in_function_or_class:
376
+ return cast(
377
+ cst.With, check_and_remove_node(updated_node, self.exclusions)
378
+ )
379
+ return updated_node
380
+
381
+ def leave_Try(self, original_node: cst.Try, updated_node: cst.Try) -> cst.Try:
382
+ if not self.in_function_or_class:
383
+ return cast(
384
+ cst.Try, check_and_remove_node(updated_node, self.exclusions)
385
+ )
386
+ return updated_node
387
+
388
+ tree = cst.parse_module(code)
389
+ transformer = StripFunctionCallsTransformer(exclusions)
390
+ modified_tree = tree.visit(transformer)
391
+ return modified_tree.code
@@ -5,7 +5,6 @@ import sys
5
5
  from pathlib import Path
6
6
  from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
7
7
 
8
- import libcst as cst
9
8
  from tabulate import tabulate
10
9
 
11
10
  import vision_agent.tools as T
@@ -15,9 +14,10 @@ from vision_agent.agent.agent_utils import (
15
14
  DefaultImports,
16
15
  extract_code,
17
16
  extract_tag,
18
- format_memory,
17
+ format_feedback,
19
18
  print_code,
20
19
  remove_installs_from_code,
20
+ strip_function_calls,
21
21
  )
22
22
  from vision_agent.agent.vision_agent_coder_prompts import (
23
23
  CODE,
@@ -49,114 +49,6 @@ WORKSPACE = Path(os.getenv("WORKSPACE", ""))
49
49
  _LOGGER = logging.getLogger(__name__)
50
50
 
51
51
 
52
- def strip_function_calls( # noqa: C901
53
- code: str, exclusions: Optional[List[str]] = None
54
- ) -> str:
55
- """This will strip out all code that calls functions except for functions included
56
- in exclusions.
57
- """
58
- if exclusions is None:
59
- exclusions = []
60
-
61
- def check_and_remove_node(node: cst.CSTNode, exclusions: List[str]) -> cst.CSTNode:
62
- if hasattr(node, "value") and isinstance(node.value, cst.Call):
63
- if (
64
- isinstance(node.value.func, cst.Name)
65
- and node.value.func.value in exclusions
66
- ):
67
- return node
68
- return cst.RemoveFromParent() # type: ignore
69
- return node
70
-
71
- class StripFunctionCallsTransformer(cst.CSTTransformer):
72
- def __init__(self, exclusions: List[str]):
73
- # Store exclusions to skip removing certain function calls
74
- self.exclusions = exclusions
75
- self.in_function_or_class = False
76
-
77
- def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]:
78
- self.in_function_or_class = True
79
- return True
80
-
81
- def leave_FunctionDef(
82
- self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef
83
- ) -> cst.BaseStatement:
84
- self.in_function_or_class = False
85
- return updated_node
86
-
87
- def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]:
88
- self.in_function_or_class = True
89
- return True
90
-
91
- def leave_ClassDef(
92
- self, node: cst.ClassDef, updated_node: cst.ClassDef
93
- ) -> cst.BaseStatement:
94
- self.in_function_or_class = False
95
- return updated_node
96
-
97
- def leave_Expr(
98
- self, original_node: cst.Expr, updated_node: cst.Expr
99
- ) -> cst.Expr:
100
- if not self.in_function_or_class:
101
- return cast(
102
- cst.Expr, check_and_remove_node(updated_node, self.exclusions)
103
- )
104
- return updated_node
105
-
106
- def leave_Assign(
107
- self, original_node: cst.Assign, updated_node: cst.Assign
108
- ) -> cst.Assign:
109
- if not self.in_function_or_class:
110
- return cast(
111
- cst.Assign, check_and_remove_node(updated_node, self.exclusions)
112
- )
113
- return updated_node
114
-
115
- def leave_If(self, original_node: cst.If, updated_node: cst.If) -> cst.If:
116
- if not self.in_function_or_class:
117
- return cast(
118
- cst.If, check_and_remove_node(updated_node, self.exclusions)
119
- )
120
- return updated_node
121
-
122
- def leave_For(self, original_node: cst.For, updated_node: cst.For) -> cst.For:
123
- if not self.in_function_or_class:
124
- return cast(
125
- cst.For, check_and_remove_node(updated_node, self.exclusions)
126
- )
127
- return updated_node
128
-
129
- def leave_While(
130
- self, original_node: cst.While, updated_node: cst.While
131
- ) -> cst.While:
132
- if not self.in_function_or_class:
133
- return cast(
134
- cst.While, check_and_remove_node(updated_node, self.exclusions)
135
- )
136
- return updated_node
137
-
138
- def leave_With(
139
- self, original_node: cst.With, updated_node: cst.With
140
- ) -> cst.With:
141
- if not self.in_function_or_class:
142
- return cast(
143
- cst.With, check_and_remove_node(updated_node, self.exclusions)
144
- )
145
- return updated_node
146
-
147
- def leave_Try(self, original_node: cst.Try, updated_node: cst.Try) -> cst.Try:
148
- if not self.in_function_or_class:
149
- return cast(
150
- cst.Try, check_and_remove_node(updated_node, self.exclusions)
151
- )
152
- return updated_node
153
-
154
- tree = cst.parse_module(code)
155
- transformer = StripFunctionCallsTransformer(exclusions)
156
- modified_tree = tree.visit(transformer)
157
- return modified_tree.code
158
-
159
-
160
52
  def write_code(
161
53
  coder: LMM,
162
54
  chat: List[Message],
@@ -237,11 +129,11 @@ def write_and_test_code(
237
129
  tool_info,
238
130
  tool_output,
239
131
  plan_thoughts,
240
- format_memory(working_memory),
132
+ format_feedback(working_memory),
241
133
  )
242
134
  code = strip_function_calls(code)
243
135
  test = write_test(
244
- tester, chat, tool_utils, code, format_memory(working_memory), media
136
+ tester, chat, tool_utils, code, format_feedback(working_memory), media
245
137
  )
246
138
 
247
139
  log_progress(
@@ -350,7 +242,7 @@ def debug_code(
350
242
  result="\n".join(
351
243
  result.text(include_results=False).splitlines()[-50:]
352
244
  ),
353
- feedback=format_memory(working_memory + new_working_memory),
245
+ feedback=format_feedback(working_memory + new_working_memory),
354
246
  ),
355
247
  stream=False,
356
248
  )
@@ -0,0 +1,119 @@
1
+ FEEDBACK = """
2
+ ## This contains code and feedback from previous runs and is used for providing context so you do not make the same mistake again.
3
+
4
+ {feedback}
5
+ """
6
+
7
+
8
+ CODE = """
9
+ **Role**: You are an expoert software programmer.
10
+
11
+ **Task**: You are given a plan by a planning agent that solves a vision problem posed by the user. You are also given code snippets that the planning agent used to solve the task. Your job is to organize the code so that it can be easily called by the user to solve the task.
12
+
13
+ **Documentation**:
14
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`.
15
+
16
+ {docstring}
17
+
18
+ **User Instructions**:
19
+ {question}
20
+
21
+ **Plan**:
22
+ --- START PLAN ---
23
+ {plan}
24
+ --- END PLAN ---
25
+
26
+ **Instructions**:
27
+ 1. Reread the plan and all code and understand the task.
28
+ 2. Organize the code snippets into a single function that can be called by the user.
29
+ 3. DO NOT alter the code logic and ensure you utilize all the code provided as is without changing it.
30
+ 4. DO NOT create dummy input or functions, the code must be usable if the user provides new media.
31
+ 5. DO NOT hardcode the output, the function must work for any media provided by the user.
32
+ 6. Ensure the function is well-documented and follows the best practices and returns the expected output from the user.
33
+ 7. Output your code using <code> tags:
34
+
35
+ <code>
36
+ # your code here
37
+ </code>
38
+ """
39
+
40
+
41
+ TEST = """
42
+ **Role**: As a tester, your task is to create a simple test case for the provided code. This test case should verify the fundamental functionality under normal conditions.
43
+
44
+ **Documentation**:
45
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`. You do not need to test these functions, only the code provided by the user.
46
+
47
+ {docstring}
48
+
49
+ **User Instructions**:
50
+ {question}
51
+
52
+ **Input Code Snippet**:
53
+ <code>
54
+ ### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
55
+ {code}
56
+ </code>
57
+
58
+ **Instructions**:
59
+ 1. Verify the fundamental functionality under normal conditions.
60
+ 2. Ensure each test case is well-documented with comments explaining the scenario it covers.
61
+ 3. Your test case MUST run only on the given images which are {media}
62
+ 4. Your test case MUST run only with the given values which is available in the question - {question}
63
+ 5. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
64
+ 6. DO NOT mock any functions, you must test their functionality as is.
65
+ 7. DO NOT assert the output value, run the code and assert only the output format or data structure.
66
+ 8. DO NOT use try except block to handle the error, let the error be raised if the code is incorrect.
67
+ 9. DO NOT import the testing function as it will available in the testing environment.
68
+ 10. Print the output of the function that is being tested.
69
+ 11. Use the output of the function that is being tested as the return value of the testing function.
70
+ 12. Run the testing function in the end and don't assign a variable to its output.
71
+ 13. Output your test code using <code> tags:
72
+
73
+ <code>
74
+ # your test code here
75
+ </code>
76
+ """
77
+
78
+
79
+ FIX_BUG = """
80
+ **Role**: As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so you can run !pip install to install missing packages.
81
+
82
+ **Task**: A previous agent has written some code and some testing code according to a plan given to it. It has introduced a bug into it's code while trying to implement the plan. You are given the plan, code, test code and error. Your job is to fix the error in the code or test code.
83
+
84
+ **Documentation**:
85
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`.
86
+
87
+ {docstring}
88
+
89
+
90
+ **Plan**:
91
+ --- START PLAN ---
92
+ {plan}
93
+ --- END PLAN ---
94
+
95
+ **Instructions**:
96
+ Please re-complete the code to fix the error message. Here is the current version of the CODE:
97
+ <code>
98
+ {code}
99
+ </code>
100
+
101
+ When we run the TEST code:
102
+ <test>
103
+ {tests}
104
+ </test>
105
+
106
+ It raises this error, if the error is empty it means the code and tests were not run:
107
+ <error>
108
+ {result}
109
+ </error>
110
+
111
+ This is from your previous attempt to fix the bug, if it is empty no previous attempt has been made:
112
+ {debug}
113
+
114
+ Please fix the bug by correcting the error. ONLY change the code logic if it is necessary to fix the bug. Do not change the code logic for any other reason. Output your fixed code using <code> tags and fixed test using <test> tags:
115
+
116
+ <thoughts>Your thoughts here...</thoughts>
117
+ <code># your fixed code here</code>
118
+ <test># your fixed test here</test>
119
+ """