vision-agent 0.2.230__tar.gz → 0.2.231__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {vision_agent-0.2.230 → vision_agent-0.2.231}/PKG-INFO +3 -11
  2. {vision_agent-0.2.230 → vision_agent-0.2.231}/README.md +2 -10
  3. {vision_agent-0.2.230 → vision_agent-0.2.231}/pyproject.toml +1 -1
  4. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/types.py +1 -0
  5. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_planner_v2.py +1 -0
  6. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/configs/anthropic_config.py +2 -2
  7. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/configs/openai_config.py +2 -2
  8. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/tools/planner_tools.py +14 -8
  9. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/tools/tool_utils.py +3 -0
  10. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/tools/tools.py +39 -23
  11. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/sim.py +33 -12
  12. {vision_agent-0.2.230 → vision_agent-0.2.231}/LICENSE +0 -0
  13. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/.sim_tools/df.csv +0 -0
  14. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/.sim_tools/embs.npy +0 -0
  15. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/__init__.py +0 -0
  16. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/README.md +0 -0
  17. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/__init__.py +0 -0
  18. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/agent.py +0 -0
  19. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/agent_utils.py +0 -0
  20. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent.py +0 -0
  21. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_coder.py +0 -0
  22. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  23. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
  24. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
  25. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_planner.py +0 -0
  26. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  27. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
  28. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_prompts.py +0 -0
  29. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
  30. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/agent/vision_agent_v2.py +0 -0
  31. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/clients/__init__.py +0 -0
  32. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/clients/http.py +0 -0
  33. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/clients/landing_public_api.py +0 -0
  34. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/configs/__init__.py +0 -0
  35. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/configs/anthropic_openai_config.py +0 -0
  36. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/configs/config.py +0 -0
  37. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/fonts/__init__.py +0 -0
  38. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  39. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/lmm/__init__.py +0 -0
  40. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/lmm/lmm.py +0 -0
  41. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/lmm/types.py +0 -0
  42. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/tools/__init__.py +4 -4
  43. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/tools/meta_tools.py +0 -0
  44. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/tools/prompts.py +0 -0
  45. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/tools/tools_types.py +0 -0
  46. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/__init__.py +0 -0
  47. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/exceptions.py +0 -0
  48. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/execute.py +0 -0
  49. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/image_utils.py +0 -0
  50. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/type_defs.py +0 -0
  51. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/video.py +0 -0
  52. {vision_agent-0.2.230 → vision_agent-0.2.231}/vision_agent/utils/video_tracking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.230
3
+ Version: 0.2.231
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -71,12 +71,7 @@ export ANTHROPIC_API_KEY="your-api-key"
71
71
  export OPENAI_API_KEY="your-api-key"
72
72
  ```
73
73
 
74
- ---
75
- **NOTE**
76
- We found using both Anthropic Claude-3.5 and OpenAI o1 to be provide the best performance
77
- for VisionAgent. If you want to use a different LLM provider or only one, see
78
- 'Using Other LLM Providers' below.
79
- ---
74
+ > **_NOTE:_** We found using both Anthropic Claude-3.5 and OpenAI o1 to be provide the best performance for VisionAgent. If you want to use a different LLM provider or only one, see 'Using Other LLM Providers' below.
80
75
 
81
76
  ## Documentation
82
77
 
@@ -149,8 +144,5 @@ directory. For example to change to Anthropic simply just run:
149
144
  cp vision_agent/configs/anthropic_config.py vision_agent/configs/config.py
150
145
  ```
151
146
 
152
- **NOTE**
153
- VisionAgent moves fast and we are constantly updating and changing the library. If you
154
- have any questions or need help, please reach out to us on our discord channel.
155
- ---
147
+ > **_NOTE:_** VisionAgent moves fast and we are constantly updating and changing the library. If you have any questions or need help, please reach out to us on our discord channel.
156
148
 
@@ -26,12 +26,7 @@ export ANTHROPIC_API_KEY="your-api-key"
26
26
  export OPENAI_API_KEY="your-api-key"
27
27
  ```
28
28
 
29
- ---
30
- **NOTE**
31
- We found using both Anthropic Claude-3.5 and OpenAI o1 to be provide the best performance
32
- for VisionAgent. If you want to use a different LLM provider or only one, see
33
- 'Using Other LLM Providers' below.
34
- ---
29
+ > **_NOTE:_** We found using both Anthropic Claude-3.5 and OpenAI o1 to be provide the best performance for VisionAgent. If you want to use a different LLM provider or only one, see 'Using Other LLM Providers' below.
35
30
 
36
31
  ## Documentation
37
32
 
@@ -104,7 +99,4 @@ directory. For example to change to Anthropic simply just run:
104
99
  cp vision_agent/configs/anthropic_config.py vision_agent/configs/config.py
105
100
  ```
106
101
 
107
- **NOTE**
108
- VisionAgent moves fast and we are constantly updating and changing the library. If you
109
- have any questions or need help, please reach out to us on our discord channel.
110
- ---
102
+ > **_NOTE:_** VisionAgent moves fast and we are constantly updating and changing the library. If you have any questions or need help, please reach out to us on our discord channel.
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.230"
7
+ version = "0.2.231"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -33,6 +33,7 @@ class AgentMessage(BaseModel):
33
33
  Literal["interaction_response"],
34
34
  Literal["conversation"],
35
35
  Literal["planner"],
36
+ Literal["planner_update"],
36
37
  Literal["coder"],
37
38
  ]
38
39
  content: str
@@ -513,6 +513,7 @@ class VisionAgentPlannerV2(AgentPlanner):
513
513
  code = extract_tag(response, "execute_python")
514
514
  finalize_plan = extract_tag(response, "finalize_plan")
515
515
  finished = finalize_plan is not None
516
+ self.update_callback({"role": "planner_update", "content": response})
516
517
 
517
518
  if self.verbose:
518
519
  _CONSOLE.print(
@@ -81,7 +81,7 @@ class Config(BaseModel):
81
81
  tool_tester_kwargs: dict = Field(
82
82
  default_factory=lambda: {
83
83
  "model_name": "claude-3-5-sonnet-20241022",
84
- "temperature": 1.0,
84
+ "temperature": 0.0,
85
85
  "image_size": 768,
86
86
  }
87
87
  )
@@ -111,7 +111,7 @@ class Config(BaseModel):
111
111
  vqa_kwargs: dict = Field(
112
112
  default_factory=lambda: {
113
113
  "model_name": "claude-3-5-sonnet-20241022",
114
- "temperature": 1.0,
114
+ "temperature": 0.0,
115
115
  "image_size": 768,
116
116
  }
117
117
  )
@@ -98,7 +98,7 @@ class Config(BaseModel):
98
98
  tool_chooser_kwargs: dict = Field(
99
99
  default_factory=lambda: {
100
100
  "model_name": "gpt-4o-2024-08-06",
101
- "temperature": 0.0,
101
+ "temperature": 1.0,
102
102
  "image_size": 768,
103
103
  "image_detail": "low",
104
104
  }
@@ -109,7 +109,7 @@ class Config(BaseModel):
109
109
  suggester_kwargs: dict = Field(
110
110
  default_factory=lambda: {
111
111
  "model_name": "gpt-4o-2024-08-06",
112
- "temperature": 0.0,
112
+ "temperature": 1.0,
113
113
  "image_size": 768,
114
114
  "image_detail": "low",
115
115
  }
@@ -10,12 +10,7 @@ from IPython.display import display
10
10
  from PIL import Image
11
11
 
12
12
  import vision_agent.tools as T
13
- from vision_agent.agent.agent_utils import (
14
- DefaultImports,
15
- extract_code,
16
- extract_json,
17
- extract_tag,
18
- )
13
+ from vision_agent.agent.agent_utils import DefaultImports, extract_json, extract_tag
19
14
  from vision_agent.agent.vision_agent_planner_prompts_v2 import (
20
15
  CATEGORIZE_TOOL_REQUEST,
21
16
  FINALIZE_PLAN,
@@ -36,6 +31,9 @@ from vision_agent.utils.image_utils import convert_to_b64
36
31
  from vision_agent.utils.sim import get_tool_recommender
37
32
 
38
33
  TOOL_FUNCTIONS = {tool.__name__: tool for tool in T.TOOLS}
34
+ LOAD_TOOLS_DOCSTRING = T.get_tool_documentation(
35
+ [T.load_image, T.extract_frames_and_timestamps]
36
+ )
39
37
 
40
38
  CONFIG = Config()
41
39
  _LOGGER = logging.getLogger(__name__)
@@ -179,6 +177,7 @@ def run_tool_testing(
179
177
  cleaned_tool_docs.append(tool_doc)
180
178
  tool_docs = cleaned_tool_docs
181
179
  tool_docs_str = "\n".join([e["doc"] for e in tool_docs])
180
+ tool_docs_str += "\n" + LOAD_TOOLS_DOCSTRING
182
181
 
183
182
  prompt = TEST_TOOLS.format(
184
183
  tool_docs=tool_docs_str,
@@ -217,8 +216,15 @@ def run_tool_testing(
217
216
  examples=EXAMPLES,
218
217
  media=str(image_paths),
219
218
  )
220
- code = extract_code(lmm.generate(prompt, media=image_paths)) # type: ignore
221
- code = process_code(code)
219
+ response = cast(str, lmm.generate(prompt, media=image_paths))
220
+ code = extract_tag(response, "code")
221
+ if code is None:
222
+ code = response
223
+
224
+ try:
225
+ code = process_code(code)
226
+ except Exception as e:
227
+ _LOGGER.error(f"Error processing code: {e}")
222
228
  tool_output = code_interpreter.exec_isolation(
223
229
  DefaultImports.prepend_imports(code)
224
230
  )
@@ -318,6 +318,9 @@ def single_nms(
318
318
  def nms(
319
319
  all_preds: List[List[Dict[str, Any]]], iou_threshold: float
320
320
  ) -> List[List[Dict[str, Any]]]:
321
+ if not isinstance(all_preds[0], List):
322
+ all_preds = [all_preds]
323
+
321
324
  return_preds = []
322
325
  for frame_preds in all_preds:
323
326
  frame_preds = single_nms(frame_preds, iou_threshold)
@@ -222,7 +222,7 @@ def sam2(
222
222
  ret = _sam2(image, detections, image_size)
223
223
  _display_tool_trace(
224
224
  sam2.__name__,
225
- {},
225
+ {"detections": detections},
226
226
  ret["display_data"],
227
227
  ret["files"],
228
228
  )
@@ -314,18 +314,29 @@ def od_sam2_video_tracking(
314
314
 
315
315
  # Process each segment and collect detections
316
316
  detections_per_segment: List[Any] = []
317
- for segment_index, segment in enumerate(segments):
318
- segment_detections = process_segment(
319
- segment_frames=segment,
320
- od_model=od_model,
321
- prompt=prompt,
322
- fine_tune_id=fine_tune_id,
323
- chunk_length=chunk_length,
324
- image_size=image_size,
325
- segment_index=segment_index,
326
- object_detection_tool=_apply_object_detection,
327
- )
328
- detections_per_segment.append(segment_detections)
317
+ with ThreadPoolExecutor() as executor:
318
+ futures = {
319
+ executor.submit(
320
+ process_segment,
321
+ segment_frames=segment,
322
+ od_model=od_model,
323
+ prompt=prompt,
324
+ fine_tune_id=fine_tune_id,
325
+ chunk_length=chunk_length,
326
+ image_size=image_size,
327
+ segment_index=segment_index,
328
+ object_detection_tool=_apply_object_detection,
329
+ ): segment_index
330
+ for segment_index, segment in enumerate(segments)
331
+ }
332
+
333
+ for future in as_completed(futures):
334
+ segment_index = futures[future]
335
+ detections_per_segment.append((segment_index, future.result()))
336
+
337
+ detections_per_segment = [
338
+ x[1] for x in sorted(detections_per_segment, key=lambda x: x[0])
339
+ ]
329
340
 
330
341
  merged_detections = merge_segments(detections_per_segment)
331
342
  post_processed = post_process(merged_detections, image_size)
@@ -390,7 +401,7 @@ def _owlv2_object_detection(
390
401
  {
391
402
  "label": bbox["label"],
392
403
  "bbox": normalize_bbox(bbox["bounding_box"], image_size),
393
- "score": bbox["score"],
404
+ "score": round(bbox["score"], 2),
394
405
  }
395
406
  for bbox in bboxes
396
407
  ]
@@ -398,7 +409,7 @@ def _owlv2_object_detection(
398
409
  {
399
410
  "label": bbox["label"],
400
411
  "bbox": bbox["bounding_box"],
401
- "score": bbox["score"],
412
+ "score": round(bbox["score"], 2),
402
413
  }
403
414
  for bbox in bboxes
404
415
  ]
@@ -582,7 +593,7 @@ def owlv2_sam2_video_tracking(
582
593
  )
583
594
  _display_tool_trace(
584
595
  owlv2_sam2_video_tracking.__name__,
585
- {},
596
+ {"prompt": prompt, "chunk_length": chunk_length},
586
597
  ret["display_data"],
587
598
  ret["files"],
588
599
  )
@@ -1681,7 +1692,7 @@ def video_temporal_localization(
1681
1692
  prompt: str,
1682
1693
  frames: List[np.ndarray],
1683
1694
  model: str = "qwen2vl",
1684
- chunk_length_frames: Optional[int] = 2,
1695
+ chunk_length_frames: int = 2,
1685
1696
  ) -> List[float]:
1686
1697
  """'video_temporal_localization' will run qwen2vl on each chunk_length_frames
1687
1698
  value selected for the video. It can detect multiple objects independently per
@@ -1695,7 +1706,7 @@ def video_temporal_localization(
1695
1706
  frames (List[np.ndarray]): The reference frames used for the question
1696
1707
  model (str): The model to use for the inference. Valid values are
1697
1708
  'qwen2vl', 'gpt4o'.
1698
- chunk_length_frames (Optional[int]): length of each chunk in frames
1709
+ chunk_length_frames (int): length of each chunk in frames
1699
1710
 
1700
1711
  Returns:
1701
1712
  List[float]: A list of floats with a value of 1.0 if the objects to be found
@@ -1714,8 +1725,7 @@ def video_temporal_localization(
1714
1725
  "model": model,
1715
1726
  "function_name": "video_temporal_localization",
1716
1727
  }
1717
- if chunk_length_frames is not None:
1718
- payload["chunk_length_frames"] = chunk_length_frames
1728
+ payload["chunk_length_frames"] = chunk_length_frames
1719
1729
 
1720
1730
  data = send_inference_request(
1721
1731
  payload, "video-temporal-localization", files=files, v2=True
@@ -1726,7 +1736,13 @@ def video_temporal_localization(
1726
1736
  data,
1727
1737
  files,
1728
1738
  )
1729
- return [cast(float, value) for value in data]
1739
+ chunked_data = [cast(float, value) for value in data]
1740
+
1741
+ full_data = []
1742
+ for value in chunked_data:
1743
+ full_data.extend([value] * chunk_length_frames)
1744
+
1745
+ return full_data[: len(frames)]
1730
1746
 
1731
1747
 
1732
1748
  def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
@@ -2150,7 +2166,7 @@ def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any
2150
2166
  return response
2151
2167
 
2152
2168
 
2153
- # agentic od tools
2169
+ # Agentic OD Tools
2154
2170
 
2155
2171
 
2156
2172
  def _agentic_object_detection(
@@ -2646,7 +2662,7 @@ def save_image(image: np.ndarray, file_path: str) -> None:
2646
2662
 
2647
2663
 
2648
2664
  def save_video(
2649
- frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 1
2665
+ frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 5
2650
2666
  ) -> str:
2651
2667
  """'save_video' is a utility function that saves a list of frames as a mp4 video file on disk.
2652
2668
 
@@ -98,10 +98,12 @@ class Sim:
98
98
  raise ValueError("key is required if no column 'embs' is present.")
99
99
 
100
100
  if sim_key is not None:
101
- self.df["embs"] = self.df[sim_key].apply(
102
- lambda x: get_embedding(
103
- self.emb_call,
104
- x,
101
+ self.df = self.df.assign(
102
+ embs=self.df[sim_key].apply(
103
+ lambda x: get_embedding(
104
+ self.emb_call,
105
+ x,
106
+ )
105
107
  )
106
108
  )
107
109
 
@@ -141,7 +143,9 @@ class Sim:
141
143
 
142
144
  df_load = pd.read_csv(load_dir / "df.csv")
143
145
  if platform.system() == "Windows":
144
- df_load["doc"] = df_load["doc"].apply(lambda x: x.replace("\r", ""))
146
+ df_load = df_load.assign(
147
+ doc=df_load.doc.apply(lambda x: x.replace("\r", ""))
148
+ )
145
149
  return df.equals(df_load) # type: ignore
146
150
 
147
151
  @lru_cache(maxsize=256)
@@ -166,7 +170,9 @@ class Sim:
166
170
  self.emb_call,
167
171
  query,
168
172
  )
169
- self.df["sim"] = self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
173
+ self.df = self.df.assign(
174
+ sim=self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
175
+ )
170
176
  res = self.df.sort_values("sim", ascending=False).head(k)
171
177
  if thresh is not None:
172
178
  res = res[res.sim > thresh]
@@ -214,8 +220,13 @@ class AzureSim(Sim):
214
220
  raise ValueError("key is required if no column 'embs' is present.")
215
221
 
216
222
  if sim_key is not None:
217
- self.df["embs"] = self.df[sim_key].apply(
218
- lambda x: get_embedding(self.emb_call, x)
223
+ self.df = self.df.assign(
224
+ embs=self.df[sim_key].apply(
225
+ lambda x: get_embedding(
226
+ self.emb_call,
227
+ x,
228
+ )
229
+ )
219
230
  )
220
231
 
221
232
 
@@ -245,8 +256,13 @@ class OllamaSim(Sim):
245
256
  raise ValueError("key is required if no column 'embs' is present.")
246
257
 
247
258
  if sim_key is not None:
248
- self.df["embs"] = self.df[sim_key].apply(
249
- lambda x: get_embedding(emb_call, x)
259
+ self.df = self.df.assign(
260
+ embs=self.df[sim_key].apply(
261
+ lambda x: get_embedding(
262
+ self.emb_call,
263
+ x,
264
+ )
265
+ )
250
266
  )
251
267
 
252
268
 
@@ -267,8 +283,13 @@ class StellaSim(Sim):
267
283
  raise ValueError("key is required if no column 'embs' is present.")
268
284
 
269
285
  if sim_key is not None:
270
- self.df["embs"] = self.df[sim_key].apply(
271
- lambda x: get_embedding(emb_call, x)
286
+ self.df = self.df.assign(
287
+ embs=self.df[sim_key].apply(
288
+ lambda x: get_embedding(
289
+ self.emb_call,
290
+ x,
291
+ )
292
+ )
272
293
  )
273
294
 
274
295
  @staticmethod
File without changes
@@ -23,6 +23,9 @@ from .tools import (
23
23
  TOOLS_INFO,
24
24
  UTIL_TOOLS,
25
25
  UTILITIES_DOCSTRING,
26
+ agentic_object_detection,
27
+ agentic_sam2_instance_segmentation,
28
+ agentic_sam2_video_tracking,
26
29
  claude35_text_extraction,
27
30
  closest_box_distance,
28
31
  closest_mask_distance,
@@ -30,6 +33,7 @@ from .tools import (
30
33
  countgd_sam2_instance_segmentation,
31
34
  countgd_sam2_video_tracking,
32
35
  countgd_visual_prompt_object_detection,
36
+ custom_object_detection,
33
37
  depth_anything_v2,
34
38
  detr_segmentation,
35
39
  document_extraction,
@@ -63,10 +67,6 @@ from .tools import (
63
67
  video_temporal_localization,
64
68
  vit_image_classification,
65
69
  vit_nsfw_classification,
66
- custom_object_detection,
67
- agentic_object_detection,
68
- agentic_sam2_instance_segmentation,
69
- agentic_sam2_video_tracking,
70
70
  )
71
71
 
72
72
  __new_tools__ = [