vision-agent 1.1.12__tar.gz → 1.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-1.1.12 → vision_agent-1.1.14}/PKG-INFO +1 -1
- {vision_agent-1.1.12 → vision_agent-1.1.14}/pyproject.toml +1 -2
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/.sim_tools/df.csv +1 -1
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/lmm/lmm.py +139 -11
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/tools/tools.py +4 -7
- {vision_agent-1.1.12 → vision_agent-1.1.14}/.gitignore +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/LICENSE +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/README.md +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/.sim_tools/embs.npy +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/README.md +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/agent.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_v2.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/clients/http.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/configs/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/configs/anthropic_config.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/configs/config.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/configs/openai_config.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/models/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/models/agent_types.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/models/lmm_types.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/models/tools_types.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/sim/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/sim/sim.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/tools/planner_tools.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/agent.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/execute.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/tools.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/tools_doc.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/video.py +0 -0
- {vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/utils/video_tracking.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "vision-agent"
|
7
|
-
version = "1.1.
|
7
|
+
version = "1.1.14"
|
8
8
|
description = "Toolset for Vision Agent"
|
9
9
|
authors = [{ name = "Landing AI", email = "dev@landing.ai" }]
|
10
10
|
requires-python = ">=3.9,<4.0"
|
@@ -58,7 +58,6 @@ dev = [
|
|
58
58
|
"types-pillow>=9.5.0.4,<10",
|
59
59
|
"data-science-types>=0.2.23,<0.3",
|
60
60
|
"types-tqdm>=4.65.0.1,<5",
|
61
|
-
"setuptools>=68.0.0,<69",
|
62
61
|
"griffe>=0.45.3,<0.46",
|
63
62
|
"mkdocs>=1.5.3,<2",
|
64
63
|
"mkdocstrings[python]>=0.23.0,<0.24",
|
@@ -3,12 +3,16 @@ import os
|
|
3
3
|
from abc import ABC, abstractmethod
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Any, Dict, Iterator, List, Optional, Sequence, Union, cast
|
6
|
+
import base64
|
6
7
|
|
7
8
|
import anthropic
|
8
9
|
import requests
|
9
10
|
from anthropic.types import ImageBlockParam, MessageParam, TextBlockParam
|
10
11
|
from openai import AzureOpenAI, OpenAI
|
11
12
|
|
13
|
+
from google import genai # type: ignore
|
14
|
+
from google.genai import types # type: ignore
|
15
|
+
|
12
16
|
from vision_agent.models import Message
|
13
17
|
from vision_agent.utils.image_utils import encode_media
|
14
18
|
|
@@ -516,28 +520,152 @@ class AnthropicLMM(LMM):
|
|
516
520
|
return cast(str, response.content[0].text)
|
517
521
|
|
518
522
|
|
519
|
-
class GoogleLMM(
|
523
|
+
class GoogleLMM(LMM):
|
520
524
|
r"""An LMM class for the Google LMMs."""
|
521
525
|
|
522
526
|
def __init__(
|
523
527
|
self,
|
528
|
+
model_name: str = "gemini-2.5-pro-preview-03-25",
|
524
529
|
api_key: Optional[str] = None,
|
525
|
-
model_name: str = "gemini-2.0-flash-exp",
|
526
|
-
max_tokens: int = 4096,
|
527
|
-
image_detail: str = "low",
|
528
530
|
image_size: int = 768,
|
531
|
+
image_detail: str = "low",
|
529
532
|
**kwargs: Any,
|
530
533
|
):
|
531
|
-
base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
532
534
|
if not api_key:
|
533
|
-
api_key = os.environ.get("
|
534
|
-
|
535
|
-
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
535
|
+
api_key = os.environ.get("GOOGLE_API_KEY")
|
536
536
|
|
537
|
+
# Create the client using the Google Genai client
|
538
|
+
self.client = genai.Client(api_key=api_key)
|
537
539
|
self.model_name = model_name
|
538
540
|
self.image_size = image_size
|
539
541
|
self.image_detail = image_detail
|
540
|
-
|
541
|
-
if "max_tokens" not in kwargs:
|
542
|
-
kwargs["max_tokens"] = max_tokens
|
543
542
|
self.kwargs = kwargs
|
543
|
+
|
544
|
+
def __call__(
|
545
|
+
self,
|
546
|
+
input: Union[str, Sequence[Dict[str, Any]]],
|
547
|
+
**kwargs: Any,
|
548
|
+
) -> Union[str, Iterator[Optional[str]]]:
|
549
|
+
if isinstance(input, str):
|
550
|
+
return self.generate(input, **kwargs)
|
551
|
+
return self.chat(input, **kwargs)
|
552
|
+
|
553
|
+
def chat(
|
554
|
+
self,
|
555
|
+
chat: Sequence[Dict[str, Any]],
|
556
|
+
**kwargs: Any,
|
557
|
+
) -> Union[str, Iterator[Optional[str]]]:
|
558
|
+
prompt_parts = []
|
559
|
+
for message in chat:
|
560
|
+
if message["role"] != "user":
|
561
|
+
continue # Gemini expects only user input
|
562
|
+
prompt_parts.extend(self._convert_message_parts(message, **kwargs))
|
563
|
+
|
564
|
+
tmp_kwargs = self.kwargs | kwargs
|
565
|
+
generation_config = self._create_generation_config(tmp_kwargs)
|
566
|
+
|
567
|
+
if tmp_kwargs.get("stream"):
|
568
|
+
|
569
|
+
def f() -> Iterator[Optional[str]]:
|
570
|
+
# Use the client to stream content
|
571
|
+
response_stream = self.client.models.generate_content_stream(
|
572
|
+
model=self.model_name,
|
573
|
+
contents=prompt_parts,
|
574
|
+
config=generation_config,
|
575
|
+
)
|
576
|
+
for chunk in response_stream:
|
577
|
+
if chunk.text:
|
578
|
+
yield chunk.text
|
579
|
+
|
580
|
+
return f()
|
581
|
+
else:
|
582
|
+
# Use the client for non-streaming
|
583
|
+
response = self.client.models.generate_content(
|
584
|
+
model=self.model_name,
|
585
|
+
contents=prompt_parts,
|
586
|
+
config=generation_config,
|
587
|
+
)
|
588
|
+
return cast(str, response.text)
|
589
|
+
|
590
|
+
def generate(
|
591
|
+
self,
|
592
|
+
prompt: str,
|
593
|
+
media: Optional[Sequence[Union[str, Path]]] = None,
|
594
|
+
**kwargs: Any,
|
595
|
+
) -> Union[str, Iterator[Optional[str]]]:
|
596
|
+
prompt_parts = [{"text": prompt}]
|
597
|
+
if media:
|
598
|
+
for m in media:
|
599
|
+
prompt_parts.append(self._convert_media_part(m, **kwargs))
|
600
|
+
|
601
|
+
tmp_kwargs = self.kwargs | kwargs
|
602
|
+
generation_config = self._create_generation_config(tmp_kwargs)
|
603
|
+
|
604
|
+
if tmp_kwargs.get("stream"):
|
605
|
+
|
606
|
+
def f() -> Iterator[Optional[str]]:
|
607
|
+
response_stream = self.client.models.generate_content_stream(
|
608
|
+
model=self.model_name,
|
609
|
+
contents=prompt_parts,
|
610
|
+
config=generation_config,
|
611
|
+
)
|
612
|
+
for chunk in response_stream:
|
613
|
+
if chunk.text:
|
614
|
+
yield chunk.text
|
615
|
+
|
616
|
+
return f()
|
617
|
+
else:
|
618
|
+
response = self.client.models.generate_content(
|
619
|
+
model=self.model_name,
|
620
|
+
contents=prompt_parts,
|
621
|
+
config=generation_config,
|
622
|
+
)
|
623
|
+
return cast(str, response.text)
|
624
|
+
|
625
|
+
def _convert_message_parts(
|
626
|
+
self, message: Dict[str, Any], **kwargs: Any
|
627
|
+
) -> List[Any]:
|
628
|
+
parts = [{"text": message["content"]}]
|
629
|
+
if "media" in message:
|
630
|
+
for media_path in message["media"]:
|
631
|
+
parts.append(self._convert_media_part(media_path, **kwargs))
|
632
|
+
return parts
|
633
|
+
|
634
|
+
def _convert_media_part(self, media: Union[str, Path], **kwargs: Any) -> types.Part:
|
635
|
+
resize = kwargs.get("resize", self.image_size)
|
636
|
+
encoded_media = encode_media(str(media), resize=resize)
|
637
|
+
|
638
|
+
if encoded_media.startswith("data:image/"):
|
639
|
+
encoded_media = encoded_media.split(",", 1)[-1]
|
640
|
+
|
641
|
+
binary_data = base64.b64decode(encoded_media)
|
642
|
+
|
643
|
+
return types.Part.from_bytes(
|
644
|
+
data=binary_data,
|
645
|
+
mime_type="image/png",
|
646
|
+
)
|
647
|
+
|
648
|
+
def _create_generation_config(
|
649
|
+
self, kwargs: Dict[str, Any]
|
650
|
+
) -> types.GenerateContentConfig:
|
651
|
+
# Extract generation-specific parameters
|
652
|
+
config_params = {}
|
653
|
+
|
654
|
+
# Handle known parameters
|
655
|
+
for param in [
|
656
|
+
"max_output_tokens",
|
657
|
+
"temperature",
|
658
|
+
"top_p",
|
659
|
+
"top_k",
|
660
|
+
"response_mime_type",
|
661
|
+
"stop_sequences",
|
662
|
+
"candidate_count",
|
663
|
+
"seed",
|
664
|
+
"safety_settings",
|
665
|
+
"system_instruction",
|
666
|
+
]:
|
667
|
+
if param in kwargs:
|
668
|
+
config_params[param] = kwargs[param]
|
669
|
+
|
670
|
+
# Create a GenerateContentConfig object
|
671
|
+
return types.GenerateContentConfig(**config_params)
|
@@ -2959,13 +2959,10 @@ def gemini_image_generation(
|
|
2959
2959
|
return image
|
2960
2960
|
else:
|
2961
2961
|
try:
|
2962
|
-
|
2963
|
-
|
2964
|
-
|
2965
|
-
|
2966
|
-
num_retries=1,
|
2967
|
-
)
|
2968
|
-
|
2962
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
2963
|
+
img_path = os.path.join(current_dir, "../../assets/gemini.png")
|
2964
|
+
with open(img_path, "rb") as img_file:
|
2965
|
+
output_image_bytes = img_file.read()
|
2969
2966
|
except Exception as e:
|
2970
2967
|
raise ValueError(f"Fallback generation failed: {str(e)}")
|
2971
2968
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_coder_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
{vision_agent-1.1.12 → vision_agent-1.1.14}/vision_agent/agent/vision_agent_planner_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|