vision-agent 0.2.48__py3-none-any.whl → 0.2.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,11 +36,25 @@ logging.basicConfig(stream=sys.stdout)
36
36
  _LOGGER = logging.getLogger(__name__)
37
37
  _MAX_TABULATE_COL_WIDTH = 80
38
38
  _CONSOLE = Console()
39
- _DEFAULT_IMPORT = "\n".join(T.__new_tools__) + "\n".join(
40
- [
39
+
40
+
41
+ class DefaultImports:
42
+ """Container for default imports used in the code execution."""
43
+
44
+ common_imports = [
41
45
  "from typing import *",
42
46
  ]
43
- )
47
+
48
+ @staticmethod
49
+ def to_code_string() -> str:
50
+ return "\n".join(DefaultImports.common_imports + T.__new_tools__)
51
+
52
+ @staticmethod
53
+ def prepend_imports(code: str) -> str:
54
+ """Run this method to prepend the default imports to the code.
55
+ NOTE: be sure to run this method after the custom tools have been registered.
56
+ """
57
+ return DefaultImports.to_code_string() + "\n\n" + code
44
58
 
45
59
 
46
60
  def get_diff(before: str, after: str) -> str:
@@ -202,18 +216,20 @@ def write_and_test_code(
202
216
  "type": "code",
203
217
  "status": "running",
204
218
  "payload": {
205
- "code": code,
219
+ "code": DefaultImports.prepend_imports(code),
206
220
  "test": test,
207
221
  },
208
222
  }
209
223
  )
210
- result = code_interpreter.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
224
+ result = code_interpreter.exec_isolation(
225
+ f"{DefaultImports.to_code_string()}\n{code}\n{test}"
226
+ )
211
227
  log_progress(
212
228
  {
213
229
  "type": "code",
214
230
  "status": "completed" if result.success else "failed",
215
231
  "payload": {
216
- "code": code,
232
+ "code": DefaultImports.prepend_imports(code),
217
233
  "test": test,
218
234
  "result": result.to_json(),
219
235
  },
@@ -264,19 +280,21 @@ def write_and_test_code(
264
280
  "type": "code",
265
281
  "status": "running",
266
282
  "payload": {
267
- "code": code,
283
+ "code": DefaultImports.prepend_imports(code),
268
284
  "test": test,
269
285
  },
270
286
  }
271
287
  )
272
288
 
273
- result = code_interpreter.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
289
+ result = code_interpreter.exec_isolation(
290
+ f"{DefaultImports.to_code_string()}\n{code}\n{test}"
291
+ )
274
292
  log_progress(
275
293
  {
276
294
  "type": "code",
277
295
  "status": "completed" if result.success else "failed",
278
296
  "payload": {
279
- "code": code,
297
+ "code": DefaultImports.prepend_imports(code),
280
298
  "test": test,
281
299
  "result": result.to_json(),
282
300
  },
@@ -307,7 +325,14 @@ def write_and_test_code(
307
325
  def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
308
326
  _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
309
327
  _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
310
- _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
328
+ _CONSOLE.print(
329
+ Syntax(
330
+ DefaultImports.prepend_imports(code),
331
+ "python",
332
+ theme="gruvbox-dark",
333
+ line_numbers=True,
334
+ )
335
+ )
311
336
  if test:
312
337
  _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
313
338
  _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
@@ -464,10 +489,6 @@ class VisionAgent(Agent):
464
489
  if chat_i["role"] == "user":
465
490
  chat_i["content"] += f" Image name {media}"
466
491
 
467
- # re-grab custom tools
468
- global _DEFAULT_IMPORT
469
- _DEFAULT_IMPORT = "\n".join(T.__new_tools__)
470
-
471
492
  code = ""
472
493
  test = ""
473
494
  working_memory: List[Dict[str, str]] = []
@@ -531,38 +552,35 @@ class VisionAgent(Agent):
531
552
  working_memory.extend(results["working_memory"]) # type: ignore
532
553
  plan.append({"code": code, "test": test, "plan": plan_i})
533
554
 
534
- if self_reflection:
535
- self.log_progress(
536
- {
537
- "type": "self_reflection",
538
- "status": "started",
539
- }
540
- )
541
- reflection = reflect(
542
- chat,
543
- FULL_TASK.format(
544
- user_request=chat[0]["content"], subtasks=plan_i_str
545
- ),
546
- code,
547
- self.planner,
548
- )
549
- if self.verbosity > 0:
550
- _LOGGER.info(f"Reflection: {reflection}")
551
- feedback = cast(str, reflection["feedback"])
552
- success = cast(bool, reflection["success"])
553
- self.log_progress(
554
- {
555
- "type": "self_reflection",
556
- "status": "completed" if success else "failed",
557
- "payload": reflection,
558
- }
559
- )
560
- working_memory.append(
561
- {"code": f"{code}\n{test}", "feedback": feedback}
562
- )
563
- else:
555
+ if not self_reflection:
564
556
  break
565
557
 
558
+ self.log_progress(
559
+ {
560
+ "type": "self_reflection",
561
+ "status": "started",
562
+ }
563
+ )
564
+ reflection = reflect(
565
+ chat,
566
+ FULL_TASK.format(
567
+ user_request=chat[0]["content"], subtasks=plan_i_str
568
+ ),
569
+ code,
570
+ self.planner,
571
+ )
572
+ if self.verbosity > 0:
573
+ _LOGGER.info(f"Reflection: {reflection}")
574
+ feedback = cast(str, reflection["feedback"])
575
+ success = cast(bool, reflection["success"])
576
+ self.log_progress(
577
+ {
578
+ "type": "self_reflection",
579
+ "status": "completed" if success else "failed",
580
+ "payload": reflection,
581
+ }
582
+ )
583
+ working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
566
584
  retries += 1
567
585
 
568
586
  execution_result = cast(Execution, results["test_result"])
@@ -571,7 +589,7 @@ class VisionAgent(Agent):
571
589
  "type": "final_code",
572
590
  "status": "completed" if success else "failed",
573
591
  "payload": {
574
- "code": code,
592
+ "code": DefaultImports.prepend_imports(code),
575
593
  "test": test,
576
594
  "result": execution_result.to_json(),
577
595
  },
@@ -586,7 +604,7 @@ class VisionAgent(Agent):
586
604
  play_video(res.mp4)
587
605
 
588
606
  return {
589
- "code": code,
607
+ "code": DefaultImports.prepend_imports(code),
590
608
  "test": test,
591
609
  "test_result": execution_result,
592
610
  "plan": plan,
@@ -187,7 +187,7 @@ def extract_frames(
187
187
 
188
188
  Returns:
189
189
  List[Tuple[np.ndarray, float]]: A list of tuples containing the extracted frame
190
- and the timestamp in seconds.
190
+ as a numpy array and the timestamp in seconds.
191
191
 
192
192
  Example
193
193
  -------
@@ -515,7 +515,7 @@ def save_json(data: Any, file_path: str) -> None:
515
515
 
516
516
 
517
517
  def load_image(image_path: str) -> np.ndarray:
518
- """'load_image' is a utility function that loads an image from the given path.
518
+ """'load_image' is a utility function that loads an image from the given file path string.
519
519
 
520
520
  Parameters:
521
521
  image_path (str): The path to the image.
@@ -527,7 +527,9 @@ def load_image(image_path: str) -> np.ndarray:
527
527
  -------
528
528
  >>> load_image("path/to/image.jpg")
529
529
  """
530
-
530
+ # NOTE: sometimes the generated code pass in a NumPy array
531
+ if isinstance(image_path, np.ndarray):
532
+ return image_path
531
533
  image = Image.open(image_path).convert("RGB")
532
534
  return np.array(image)
533
535
 
@@ -647,12 +649,9 @@ def overlay_bounding_boxes(
647
649
  box = elt["bbox"]
648
650
  scores = elt["score"]
649
651
 
650
- box = [
651
- int(box[0] * width),
652
- int(box[1] * height),
653
- int(box[2] * width),
654
- int(box[3] * height),
655
- ]
652
+ # denormalize the box if it is normalized
653
+ box = denormalize_bbox(box, (height, width))
654
+
656
655
  draw.rectangle(box, outline=color[label], width=4)
657
656
  text = f"{label}: {scores:.2f}"
658
657
  text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.48
3
+ Version: 0.2.50
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
11
11
  vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
12
  vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
13
13
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
14
- vision_agent/agent/vision_agent.py,sha256=X_LF2wRXVYAr8xMuJs3Omi8n06uVgLNgtF25sidKtfM,20424
14
+ vision_agent/agent/vision_agent.py,sha256=0EqpLyyzpRGmT7fhS2XvLeUlktgCXTE5k1KGMQ8z3_s,20963
15
15
  vision_agent/agent/vision_agent_prompts.py,sha256=hgnTlaYp2HMBHLi3e4faPb-DI5jQL9jfhKq9jyEUEgY,8370
16
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=Sng6dChynJJCYWjraXXM0tep_VPdnYl3L9vb0HMy_P
23
23
  vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
24
24
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
25
25
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
26
- vision_agent/tools/tools.py,sha256=Vpn2SxtjEcnztovat6qMiH52gFsDHo3ikEPrAT4e5yc,26639
26
+ vision_agent/tools/tools.py,sha256=Sc6tAYbH03TbrPKAT8XIj1YZIwhd9j2k4ia8iKHhxzM,26743
27
27
  vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
28
28
  vision_agent/utils/execute.py,sha256=GqoAodxtwTPBr1nujPTsWiZO2rBGvWVXTe8lgxY4d_g,20603
29
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
32
  vision_agent/utils/video.py,sha256=BJ9fomy2giAl038JThQP1WQZ-u4J4J_nsZB7QEWvlcQ,8767
33
- vision_agent-0.2.48.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.48.dist-info/METADATA,sha256=sJSWNAHN2-JMNb5hi4iA-HTzKNskLioIse9sdrMDuy4,6817
35
- vision_agent-0.2.48.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.48.dist-info/RECORD,,
33
+ vision_agent-0.2.50.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.50.dist-info/METADATA,sha256=nLyeSFYnn4Bv_RyKzrP5iqnCRRkwCZT_d3euN1zgBOA,6817
35
+ vision_agent-0.2.50.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.50.dist-info/RECORD,,