vision-agent 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,9 @@ import sys
5
5
  from pathlib import Path
6
6
  from typing import Dict, List, Optional, Union
7
7
 
8
+ from rich.console import Console
9
+ from rich.syntax import Syntax
10
+
8
11
  from vision_agent.agent import Agent
9
12
  from vision_agent.agent.agent_coder_prompts import (
10
13
  DEBUG,
@@ -40,6 +43,7 @@ from vision_agent.tools.tools_v2 import *
40
43
  logging.basicConfig(stream=sys.stdout)
41
44
  _LOGGER = logging.getLogger(__name__)
42
45
  _EXECUTE = Execute()
46
+ _CONSOLE = Console()
43
47
 
44
48
 
45
49
  def write_tests(question: str, code: str, model: LLM) -> str:
@@ -103,7 +107,7 @@ def run_visual_tests(
103
107
 
104
108
 
105
109
  def fix_bugs(code: str, tests: str, result: str, feedback: str, model: LLM) -> str:
106
- prompt = FIX_BUG.format(completion=code, test_case=tests, result=result)
110
+ prompt = FIX_BUG.format(code=code, tests=tests, result=result, feedback=feedback)
107
111
  completion = model(prompt)
108
112
  return preprocess_data(completion)
109
113
 
@@ -139,7 +143,8 @@ class AgentCoder(Agent):
139
143
  else visual_tester_agent
140
144
  )
141
145
  self.max_turns = 3
142
- if verbose:
146
+ self.verbose = verbose
147
+ if self.verbose:
143
148
  _LOGGER.setLevel(logging.INFO)
144
149
 
145
150
  def __call__(
@@ -164,9 +169,15 @@ class AgentCoder(Agent):
164
169
  feedback = ""
165
170
  for _ in range(self.max_turns):
166
171
  code = write_program(question, feedback, self.coder_agent)
167
- _LOGGER.info(f"code:\n{code}")
172
+ if self.verbose:
173
+ _CONSOLE.print(
174
+ Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
175
+ )
168
176
  debug = write_debug(question, code, feedback, self.tester_agent)
169
- _LOGGER.info(f"debug:\n{debug}")
177
+ if self.verbose:
178
+ _CONSOLE.print(
179
+ Syntax(debug, "python", theme="gruvbox-dark", line_numbers=True)
180
+ )
170
181
  results = execute_tests(code, debug)
171
182
  _LOGGER.info(
172
183
  f"execution results: passed: {results['passed']}\n{results['result']}"
@@ -176,7 +187,10 @@ class AgentCoder(Agent):
176
187
  code = fix_bugs(
177
188
  code, debug, results["result"].strip(), feedback, self.coder_agent # type: ignore
178
189
  )
179
- _LOGGER.info(f"fixed code:\n{code}")
190
+ if self.verbose:
191
+ _CONSOLE.print(
192
+ Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
193
+ )
180
194
  else:
181
195
  # TODO: Sometimes it prints nothing, so we need to handle that case
182
196
  # TODO: The visual agent reflection does not work very well, needs more testing
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
5
5
 
6
6
  import pandas as pd
7
+ from langsmith import traceable
7
8
  from rich.console import Console
8
9
  from rich.syntax import Syntax
9
10
  from tabulate import tabulate
@@ -66,6 +67,7 @@ def extract_json(json_str: str) -> Dict[str, Any]:
66
67
  return json_dict # type: ignore
67
68
 
68
69
 
70
+ @traceable(name="planning")
69
71
  def write_plan(
70
72
  chat: List[Dict[str, str]],
71
73
  plan: Optional[List[Dict[str, Any]]],
@@ -214,6 +216,7 @@ def write_and_exec_code(
214
216
  return success, code, result, working_memory
215
217
 
216
218
 
219
+ @traceable(name="plan execution")
217
220
  def run_plan(
218
221
  user_req: str,
219
222
  plan: List[Dict[str, Any]],
@@ -235,7 +238,7 @@ def run_plan(
235
238
  f"""
236
239
  {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
237
240
  )
238
- tools = tool_recommender.top_k(task["instruction"])
241
+ tools = tool_recommender.top_k(task["instruction"], thresh=0.3)
239
242
  tool_info = "\n".join([e["doc"] for e in tools])
240
243
 
241
244
  if verbosity == 2:
@@ -285,6 +288,7 @@ class VisionAgentV2(Agent):
285
288
  solve vision tasks. It is inspired by MetaGPT's Data Interpreter
286
289
  https://arxiv.org/abs/2402.18679. Vision Agent has several key features to help it
287
290
  generate code:
291
+
288
292
  - A planner to generate a plan of tasks to solve a user requirement. The planner
289
293
  can output code tasks or test tasks, where test tasks are used to verify the code.
290
294
  - Automatic debugging, if a task fails, the agent will attempt to debug the code
@@ -333,6 +337,7 @@ class VisionAgentV2(Agent):
333
337
  results = self.chat_with_workflow(input, image, plan)
334
338
  return results["code"] # type: ignore
335
339
 
340
+ @traceable
336
341
  def chat_with_workflow(
337
342
  self,
338
343
  chat: List[Dict[str, str]],
@@ -377,7 +382,9 @@ class VisionAgentV2(Agent):
377
382
  self.long_term_memory,
378
383
  self.verbosity,
379
384
  )
380
- success = all(task["success"] for task in plan)
385
+ success = all(
386
+ task["success"] if "success" in task else False for task in plan
387
+ )
381
388
  working_memory.update(working_memory_i)
382
389
 
383
390
  if not success:
@@ -34,7 +34,7 @@ PLAN = """
34
34
 
35
35
  # Task:
36
36
  Based on the context and the tools you have available, write a plan of subtasks to achieve the user request that adhere to the following requirements:
37
- - For each subtask, you should provide a short instruction on what to do. Ensure the subtasks are large enough to be meaningful, encompassing multiple lines of code.
37
+ - For each subtask, you should provide instructions on what to do. Write detailed subtasks, ensure they are large enough to be meaningful, encompassing multiple lines of code.
38
38
  - You do not need to have the agent rewrite any tool functionality you already have, you should instead instruct it to utilize one or more of those tools in each subtask.
39
39
  - You can have agents either write coding tasks, to code some functionality or testing tasks to test previous functionality.
40
40
  - If a current plan exists, examine each item in the plan to determine if it was successful. If there was an item that failed, i.e. 'success': False, then you should rewrite that item and all subsequent items to ensure that the rewritten plan is successful.
@@ -73,9 +73,10 @@ CODE = """
73
73
  {code}
74
74
 
75
75
  # Constraints
76
- - Write a function that accomplishes the 'User Requirement'. You are supplied code from a previous task under 'Previous Code', feel free to copy over that code into your own implementation if you need it.
77
- - Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info for Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
76
+ - Write a function that accomplishes the 'Current Subtask'. You are supplied code from a previous task under 'Previous Code', do not delete or change previous code unless it contains a bug or it is necessary to complete the 'Current Subtask'.
77
+ - Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
78
78
  - You may recieve previous trials and errors under 'Previous Task', this is code, output and reflections from previous tasks. You can use these to avoid running in to the same issues when writing your code.
79
+ - Use the `save_json` function from `vision_agent.tools.tools_v2` to save your output as a json file.
79
80
  - Write clean, readable, and well-documented code.
80
81
 
81
82
  # Output
vision_agent/llm/llm.py CHANGED
@@ -3,6 +3,7 @@ import os
3
3
  from abc import ABC, abstractmethod
4
4
  from typing import Any, Callable, Dict, List, Mapping, Optional, Union, cast
5
5
 
6
+ from langsmith.wrappers import wrap_openai
6
7
  from openai import AzureOpenAI, OpenAI
7
8
 
8
9
  from vision_agent.tools import (
@@ -41,9 +42,9 @@ class OpenAILLM(LLM):
41
42
  **kwargs: Any
42
43
  ):
43
44
  if not api_key:
44
- self.client = OpenAI()
45
+ self.client = wrap_openai(OpenAI())
45
46
  else:
46
- self.client = OpenAI(api_key=api_key)
47
+ self.client = wrap_openai(OpenAI(api_key=api_key))
47
48
 
48
49
  self.model_name = model_name
49
50
  self.system_prompt = system_prompt
@@ -165,8 +166,10 @@ class AzureOpenAILLM(OpenAILLM):
165
166
  if not azure_endpoint:
166
167
  raise ValueError("Azure OpenAI endpoint is required.")
167
168
 
168
- self.client = AzureOpenAI(
169
- api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
169
+ self.client = wrap_openai(
170
+ AzureOpenAI(
171
+ api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
172
+ )
170
173
  )
171
174
  self.model_name = model_name
172
175
  self.kwargs = kwargs
@@ -8,7 +8,7 @@ from vision_agent.utils.type_defs import LandingaiAPIKey
8
8
 
9
9
  _LOGGER = logging.getLogger(__name__)
10
10
  _LND_API_KEY = LandingaiAPIKey().api_key
11
- _LND_API_URL = "https://api.dev.landing.ai/v1/agent"
11
+ _LND_API_URL = "https://api.staging.landing.ai/v1/agent"
12
12
 
13
13
 
14
14
  def _send_inference_request(
@@ -53,7 +53,7 @@ class NoOp(Tool):
53
53
 
54
54
 
55
55
  class CLIP(Tool):
56
- r"""CLIP is a tool that can classify or tag any image given a set if input classes
56
+ r"""CLIP is a tool that can classify or tag any image given a set of input classes
57
57
  or tags.
58
58
 
59
59
  Example
@@ -1,5 +1,6 @@
1
1
  import inspect
2
2
  import io
3
+ import json
3
4
  import logging
4
5
  import tempfile
5
6
  from importlib import resources
@@ -14,7 +15,14 @@ from scipy.spatial import distance # type: ignore
14
15
 
15
16
  from vision_agent.tools.tool_utils import _send_inference_request
16
17
  from vision_agent.utils import extract_frames_from_video
17
- from vision_agent.utils.image_utils import convert_to_b64, normalize_bbox, rle_decode
18
+ from vision_agent.utils.image_utils import (
19
+ convert_to_b64,
20
+ normalize_bbox,
21
+ rle_decode,
22
+ b64_to_pil,
23
+ get_image_size,
24
+ denormalize_bbox,
25
+ )
18
26
 
19
27
  COLORS = [
20
28
  (158, 218, 229),
@@ -48,7 +56,7 @@ def grounding_dino(
48
56
  prompt: str,
49
57
  image: np.ndarray,
50
58
  box_threshold: float = 0.20,
51
- iou_threshold: float = 0.75,
59
+ iou_threshold: float = 0.20,
52
60
  ) -> List[Dict[str, Any]]:
53
61
  """'grounding_dino' is a tool that can detect and count objects given a text prompt
54
62
  such as category names or referring expressions. It returns a list and count of
@@ -60,12 +68,13 @@ def grounding_dino(
60
68
  box_threshold (float, optional): The threshold for the box detection. Defaults
61
69
  to 0.20.
62
70
  iou_threshold (float, optional): The threshold for the Intersection over Union
63
- (IoU). Defaults to 0.75.
71
+ (IoU). Defaults to 0.20.
64
72
 
65
73
  Returns:
66
74
  List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
67
75
  bounding box of the detected objects with normalized coordinates
68
- (x1, y1, x2, y2).
76
+ (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left and
77
+ xmax and ymax are the coordinates of the bottom-right of the bounding box.
69
78
 
70
79
  Example
71
80
  -------
@@ -76,7 +85,7 @@ def grounding_dino(
76
85
  ]
77
86
  """
78
87
  image_size = image.shape[:2]
79
- image_b64 = convert_to_b64(Image.fromarray(image))
88
+ image_b64 = convert_to_b64(image)
80
89
  request_data = {
81
90
  "prompt": prompt,
82
91
  "image": image_b64,
@@ -100,7 +109,7 @@ def grounding_sam(
100
109
  prompt: str,
101
110
  image: np.ndarray,
102
111
  box_threshold: float = 0.20,
103
- iou_threshold: float = 0.75,
112
+ iou_threshold: float = 0.20,
104
113
  ) -> List[Dict[str, Any]]:
105
114
  """'grounding_sam' is a tool that can detect and segment objects given a text
106
115
  prompt such as category names or referring expressions. It returns a list of
@@ -112,12 +121,15 @@ def grounding_sam(
112
121
  box_threshold (float, optional): The threshold for the box detection. Defaults
113
122
  to 0.20.
114
123
  iou_threshold (float, optional): The threshold for the Intersection over Union
115
- (IoU). Defaults to 0.75.
124
+ (IoU). Defaults to 0.20.
116
125
 
117
126
  Returns:
118
127
  List[Dict[str, Any]]: A list of dictionaries containing the score, label,
119
128
  bounding box, and mask of the detected objects with normalized coordinates
120
- (x1, y1, x2, y2).
129
+ (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left and
130
+ xmax and ymax are the coordinates of the bottom-right of the bounding box.
131
+ The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
132
+ the background.
121
133
 
122
134
  Example
123
135
  -------
@@ -136,7 +148,7 @@ def grounding_sam(
136
148
  ]
137
149
  """
138
150
  image_size = image.shape[:2]
139
- image_b64 = convert_to_b64(Image.fromarray(image))
151
+ image_b64 = convert_to_b64(image)
140
152
  request_data = {
141
153
  "prompt": prompt,
142
154
  "image": image_b64,
@@ -234,6 +246,152 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
234
246
  return output
235
247
 
236
248
 
249
+ def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
250
+ """'zero_shot_counting' is a tool that counts the dominant foreground object given an image and no other information about the content.
251
+ It returns only the count of the objects in the image.
252
+
253
+ Parameters:
254
+ image (np.ndarray): The image that contains lot of instances of a single object
255
+
256
+ Returns:
257
+ Dict[str, Any]: A dictionary containing the key 'count' and the count as a value. E.g. {count: 12}.
258
+
259
+ Example
260
+ -------
261
+ >>> zero_shot_counting(image)
262
+ {'count': 45},
263
+
264
+ """
265
+
266
+ image_b64 = convert_to_b64(image)
267
+ data = {
268
+ "image": image_b64,
269
+ "tool": "zero_shot_counting",
270
+ }
271
+ resp_data = _send_inference_request(data, "tools")
272
+ resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
273
+ return resp_data
274
+
275
+
276
+ def visual_prompt_counting(
277
+ image: np.ndarray, visual_prompt: Dict[str, List[float]]
278
+ ) -> Dict[str, Any]:
279
+ """'visual_prompt_counting' is a tool that counts the dominant foreground object given an image and a visual prompt which is a bounding box describing the object.
280
+ It returns only the count of the objects in the image.
281
+
282
+ Parameters:
283
+ image (np.ndarray): The image that contains lot of instances of a single object
284
+
285
+ Returns:
286
+ Dict[str, Any]: A dictionary containing the key 'count' and the count as a value. E.g. {count: 12}.
287
+
288
+ Example
289
+ -------
290
+ >>> visual_prompt_counting(image, {"bbox": [0.1, 0.1, 0.4, 0.42]})
291
+ {'count': 45},
292
+
293
+ """
294
+
295
+ image_size = get_image_size(image)
296
+ bbox = visual_prompt["bbox"]
297
+ bbox_str = ", ".join(map(str, denormalize_bbox(bbox, image_size)))
298
+ image_b64 = convert_to_b64(image)
299
+
300
+ data = {
301
+ "image": image_b64,
302
+ "prompt": bbox_str,
303
+ "tool": "few_shot_counting",
304
+ }
305
+ resp_data = _send_inference_request(data, "tools")
306
+ resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
307
+ return resp_data
308
+
309
+
310
+ def image_question_answering(image: np.ndarray, prompt: str) -> str:
311
+ """'image_question_answering_' is a tool that can answer questions about the visual contents of an image given a question and an image.
312
+ It returns an answer to the question
313
+
314
+ Parameters:
315
+ image (np.ndarray): The reference image used for the question
316
+ prompt (str): The question about the image
317
+
318
+ Returns:
319
+ str: A string which is the answer to the given prompt. E.g. {'text': 'This image contains a cat sitting on a table with a bowl of milk.'}.
320
+
321
+ Example
322
+ -------
323
+ >>> image_question_answering(image, 'What is the cat doing ?')
324
+ 'drinking milk'
325
+
326
+ """
327
+
328
+ image_b64 = convert_to_b64(image)
329
+ data = {
330
+ "image": image_b64,
331
+ "prompt": prompt,
332
+ "tool": "image_question_answering",
333
+ }
334
+
335
+ answer = _send_inference_request(data, "tools")
336
+ return answer["text"][0] # type: ignore
337
+
338
+
339
+ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
340
+ """'clip' is a tool that can classify an image given a list of input classes or tags.
341
+ It returns the same list of the input classes along with their probability scores based on image content.
342
+
343
+ Parameters:
344
+ image (np.ndarray): The image to classify or tag
345
+ classes (List[str]): The list of classes or tags that is associated with the image
346
+
347
+ Returns:
348
+ Dict[str, Any]: A dictionary containing the labels and scores. One dictionary contains a list of given labels and other a list of scores.
349
+
350
+ Example
351
+ -------
352
+ >>> clip(image, ['dog', 'cat', 'bird'])
353
+ {"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
354
+
355
+ """
356
+
357
+ image_b64 = convert_to_b64(image)
358
+ data = {
359
+ "prompt": ",".join(classes),
360
+ "image": image_b64,
361
+ "tool": "closed_set_image_classification",
362
+ }
363
+ resp_data = _send_inference_request(data, "tools")
364
+ resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
365
+ return resp_data
366
+
367
+
368
+ def image_caption(image: np.ndarray) -> str:
369
+ """'image_caption' is a tool that can caption an image based on its contents.
370
+ It returns a text describing the image.
371
+
372
+ Parameters:
373
+ image (np.ndarray): The image to caption
374
+
375
+ Returns:
376
+ str: A string which is the caption for the given image.
377
+
378
+ Example
379
+ -------
380
+ >>> image_caption(image)
381
+ 'This image contains a cat sitting on a table with a bowl of milk.'
382
+
383
+ """
384
+
385
+ image_b64 = convert_to_b64(image)
386
+ data = {
387
+ "image": image_b64,
388
+ "tool": "image_captioning",
389
+ }
390
+
391
+ answer = _send_inference_request(data, "tools")
392
+ return answer["text"][0] # type: ignore
393
+
394
+
237
395
  def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
238
396
  """'closest_mask_distance' calculates the closest distance between two masks.
239
397
 
@@ -285,6 +443,31 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
285
443
  # Utility and visualization functions
286
444
 
287
445
 
446
+ def save_json(data: Any, file_path: str) -> None:
447
+ """'save_json' is a utility function that saves data as a JSON file. It is helpful
448
+ for saving data that contains NumPy arrays which are not JSON serializable.
449
+
450
+ Parameters:
451
+ data (Any): The data to save.
452
+ file_path (str): The path to save the JSON file.
453
+
454
+ Example
455
+ -------
456
+ >>> save_json(data, "path/to/file.json")
457
+ """
458
+
459
+ class NumpyEncoder(json.JSONEncoder):
460
+ def default(self, obj: Any): # type: ignore
461
+ if isinstance(obj, np.ndarray):
462
+ return obj.tolist()
463
+ elif isinstance(obj, np.bool_):
464
+ return bool(obj)
465
+ return json.JSONEncoder.default(self, obj)
466
+
467
+ with open(file_path, "w") as f:
468
+ json.dump(data, f, cls=NumpyEncoder)
469
+
470
+
288
471
  def load_image(image_path: str) -> np.ndarray:
289
472
  """'load_image' is a utility function that loads an image from the given path.
290
473
 
@@ -478,8 +661,14 @@ TOOLS = [
478
661
  grounding_sam,
479
662
  extract_frames,
480
663
  ocr,
664
+ clip,
665
+ zero_shot_counting,
666
+ visual_prompt_counting,
667
+ image_question_answering,
668
+ image_caption,
481
669
  closest_mask_distance,
482
670
  closest_box_distance,
671
+ save_json,
483
672
  load_image,
484
673
  save_image,
485
674
  overlay_bounding_boxes,
@@ -489,5 +678,5 @@ TOOLS_DF = get_tools_df(TOOLS) # type: ignore
489
678
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
490
679
  TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
491
680
  UTILITIES_DOCSTRING = get_tool_documentation(
492
- [load_image, save_image, overlay_bounding_boxes]
681
+ [save_json, load_image, save_image, overlay_bounding_boxes]
493
682
  )
@@ -104,15 +104,20 @@ def convert_to_b64(data: Union[str, Path, np.ndarray, ImageType]) -> str:
104
104
  """
105
105
  if data is None:
106
106
  raise ValueError(f"Invalid input image: {data}. Input image can't be None.")
107
+
107
108
  if isinstance(data, (str, Path)):
108
109
  data = Image.open(data)
110
+ elif isinstance(data, np.ndarray):
111
+ data = Image.fromarray(data)
112
+
109
113
  if isinstance(data, Image.Image):
110
114
  buffer = BytesIO()
111
115
  data.convert("RGB").save(buffer, format="PNG")
112
116
  return base64.b64encode(buffer.getvalue()).decode("utf-8")
113
117
  else:
114
- arr_bytes = data.tobytes()
115
- return base64.b64encode(arr_bytes).decode("utf-8")
118
+ raise ValueError(
119
+ f"Invalid input image: {data}. Input image must be a PIL Image or a numpy array."
120
+ )
116
121
 
117
122
 
118
123
  def denormalize_bbox(
vision_agent/utils/sim.py CHANGED
@@ -56,12 +56,15 @@ class Sim:
56
56
  df = df.drop("embs", axis=1)
57
57
  df.to_csv(sim_file / "df.csv", index=False)
58
58
 
59
- def top_k(self, query: str, k: int = 5) -> Sequence[Dict]:
59
+ def top_k(
60
+ self, query: str, k: int = 5, thresh: Optional[float] = None
61
+ ) -> Sequence[Dict]:
60
62
  """Returns the top k most similar items to the query.
61
63
 
62
64
  Parameters:
63
65
  query: str: The query to compare to.
64
66
  k: int: The number of items to return.
67
+ thresh: Optional[float]: The minimum similarity threshold.
65
68
 
66
69
  Returns:
67
70
  Sequence[Dict]: The top k most similar items.
@@ -70,6 +73,8 @@ class Sim:
70
73
  embedding = get_embedding(self.client, query, model=self.model)
71
74
  self.df["sim"] = self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
72
75
  res = self.df.sort_values("sim", ascending=False).head(k)
76
+ if thresh is not None:
77
+ res = res[res.sim > thresh]
73
78
  return res[[c for c in res.columns if c != "embs"]].to_dict(orient="records")
74
79
 
75
80
 
@@ -12,7 +12,7 @@ class LandingaiAPIKey(BaseSettings):
12
12
  """
13
13
 
14
14
  api_key: str = Field(
15
- default="land_sk_hw34v3tyEc35OAhP8F7hnGnrDv2C8hD2ycMyq0aMkVS1H40D22",
15
+ default="land_sk_IJrojHarPXRjqDj1Fng76mX7yCbzVm1s5rZYxaNXu5v0cNLn0w",
16
16
  alias="LANDINGAI_API_KEY",
17
17
  description="The API key of LandingAI.",
18
18
  )
@@ -1,14 +1,16 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.23
3
+ Version: 0.2.25
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
7
- Requires-Python: >=3.9
7
+ Requires-Python: >=3.9,<4.0
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
+ Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
13
+ Requires-Dist: langsmith (>=0.1.58,<0.2.0)
12
14
  Requires-Dist: moviepy (>=1.0.0,<2.0.0)
13
15
  Requires-Dist: nbclient (>=0.10.0,<0.11.0)
14
16
  Requires-Dist: nbformat (>=5.10.4,<6.0.0)
@@ -1,7 +1,7 @@
1
1
  vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
2
2
  vision_agent/agent/__init__.py,sha256=Zv8lc91mPy0iDySId38_vc4mo56JQ9mCMvUWdAKQjh0,206
3
3
  vision_agent/agent/agent.py,sha256=X7kON-g9ePUKumCDaYfQNBX_MEFE-ax5PnRp7-Cc5Wo,529
4
- vision_agent/agent/agent_coder.py,sha256=e3mQn1xenahYk_uGflvuQ10s6dSHHM6p0jZN9UT1ZpE,6508
4
+ vision_agent/agent/agent_coder.py,sha256=4iB732bX4wDnPAuyYBk6HWlf4aFq2l9EcL695qfDIXw,7004
5
5
  vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
6
6
  vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMVg,11511
7
7
  vision_agent/agent/easytool_prompts.py,sha256=Bikw-PPLkm78dwywTlnv32Y1Tw6JMeC-R7oCnXWLcTk,4656
@@ -9,26 +9,26 @@ vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6w
9
9
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
10
10
  vision_agent/agent/vision_agent.py,sha256=pnx7gtTPazR7Dck5_kfZC3S3QWKu4e28YVigzOicOX0,27130
11
11
  vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
- vision_agent/agent/vision_agent_v2.py,sha256=0-bJH_KiYB9fdfN5rbutnyJgQr1XYeszNYqmR69IxZc,13045
13
- vision_agent/agent/vision_agent_v2_prompt.py,sha256=dd9m9Vqp91r4dpsKMDwXr54jG_GTBdJNDzpgR115S8Q,5997
12
+ vision_agent/agent/vision_agent_v2.py,sha256=3qjvaj-yyrXmoY_cecUsiuY4Rn6MmJanFZeoXFJRK2c,13229
13
+ vision_agent/agent/vision_agent_v2_prompt.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
14
14
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
16
16
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
17
- vision_agent/llm/llm.py,sha256=A-gN0vMb79fSxhSK1qBs6PTu1fba9Gvy6pitOyjW2gM,5779
17
+ vision_agent/llm/llm.py,sha256=_Klwngc35JdRuzezWe1P5BMBRkfRQSGJqNOtS44rM9s,5891
18
18
  vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
19
19
  vision_agent/lmm/lmm.py,sha256=gK90vMxh0OcGSuIZQikBkDXm4pfkdFk1R2y7rtWDl84,10539
20
20
  vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJxM,452
21
21
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
22
- vision_agent/tools/tool_utils.py,sha256=mK6QfbYr6oo9ci979-_6R1DrxU2i8HGhwosADyvciI0,865
23
- vision_agent/tools/tools.py,sha256=sVxN7SpDkz_XTc_SKwkoRF4EwaMTuHvTsCHwtR942Fc,47373
24
- vision_agent/tools/tools_v2.py,sha256=Dh5Rs1iaEs5ijRDwVI3Na9ylC7eOjtrIqtYOZSredH8,15364
22
+ vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
23
+ vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
24
+ vision_agent/tools/tools_v2.py,sha256=Tdam-cWBI4ipXWwGyxim-SK07zP97_hcdUtYd1a4CnI,21404
25
25
  vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
26
26
  vision_agent/utils/execute.py,sha256=RC_jKrm2kOWwzNe9xKuA2xJcbsNcD0Hb95_o3_Le0_E,3820
27
- vision_agent/utils/image_utils.py,sha256=1dggPBhW8_hUXDItCRLa23h-hdBwS50cjL4v1hsoUbg,7586
28
- vision_agent/utils/sim.py,sha256=SO4-pj2Fjs3yr-KT8S0nuUd66lf7m7XvMAp7_ecvKuQ,2813
29
- vision_agent/utils/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
27
+ vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
28
+ vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
29
+ vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
30
30
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
31
- vision_agent-0.2.23.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
- vision_agent-0.2.23.dist-info/METADATA,sha256=r3JWwYu2mKPjViXrm50ZS_9juGciOrYfEyz2YhPeczQ,9121
33
- vision_agent-0.2.23.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
34
- vision_agent-0.2.23.dist-info/RECORD,,
31
+ vision_agent-0.2.25.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
+ vision_agent-0.2.25.dist-info/METADATA,sha256=5bycdwOp0pnRpUBQo_JM1c1Abq2fmWJcVYE_7YgtoUY,9212
33
+ vision_agent-0.2.25.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
34
+ vision_agent-0.2.25.dist-info/RECORD,,