vision-agent 0.2.56__py3-none-any.whl → 0.2.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/__init__.py +1 -2
- vision_agent/agent/agent.py +3 -1
- vision_agent/agent/vision_agent.py +110 -81
- vision_agent/agent/vision_agent_prompts.py +1 -1
- vision_agent/lmm/__init__.py +1 -1
- vision_agent/lmm/lmm.py +54 -116
- vision_agent/tools/__init__.py +2 -1
- vision_agent/tools/tools.py +3 -3
- {vision_agent-0.2.56.dist-info → vision_agent-0.2.58.dist-info}/METADATA +36 -7
- vision_agent-0.2.58.dist-info/RECORD +23 -0
- vision_agent/agent/agent_coder.py +0 -216
- vision_agent/agent/agent_coder_prompts.py +0 -135
- vision_agent/agent/data_interpreter.py +0 -475
- vision_agent/agent/data_interpreter_prompts.py +0 -186
- vision_agent/agent/easytool.py +0 -346
- vision_agent/agent/easytool_prompts.py +0 -89
- vision_agent/agent/easytool_v2.py +0 -781
- vision_agent/agent/easytool_v2_prompts.py +0 -152
- vision_agent/agent/reflexion.py +0 -299
- vision_agent/agent/reflexion_prompts.py +0 -100
- vision_agent/llm/__init__.py +0 -1
- vision_agent/llm/llm.py +0 -176
- vision_agent/tools/easytool_tools.py +0 -1242
- vision_agent-0.2.56.dist-info/RECORD +0 -36
- {vision_agent-0.2.56.dist-info → vision_agent-0.2.58.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.56.dist-info → vision_agent-0.2.58.dist-info}/WHEEL +0 -0
vision_agent/llm/llm.py
DELETED
@@ -1,176 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import os
|
3
|
-
from abc import ABC, abstractmethod
|
4
|
-
from typing import Any, Callable, Dict, List, Mapping, Optional, Union, cast
|
5
|
-
|
6
|
-
from langsmith.wrappers import wrap_openai
|
7
|
-
from openai import AzureOpenAI, OpenAI
|
8
|
-
|
9
|
-
from vision_agent.tools.easytool_tools import (
|
10
|
-
CLIP,
|
11
|
-
GroundingDINO,
|
12
|
-
GroundingSAM,
|
13
|
-
ZeroShotCounting,
|
14
|
-
)
|
15
|
-
from vision_agent.tools.prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
|
16
|
-
|
17
|
-
|
18
|
-
class LLM(ABC):
|
19
|
-
@abstractmethod
|
20
|
-
def generate(self, prompt: str) -> str:
|
21
|
-
pass
|
22
|
-
|
23
|
-
@abstractmethod
|
24
|
-
def chat(self, chat: List[Dict[str, str]]) -> str:
|
25
|
-
pass
|
26
|
-
|
27
|
-
@abstractmethod
|
28
|
-
def __call__(self, input: Union[str, List[Dict[str, str]]]) -> str:
|
29
|
-
pass
|
30
|
-
|
31
|
-
|
32
|
-
class OpenAILLM(LLM):
|
33
|
-
r"""An LLM class for any OpenAI LLM model."""
|
34
|
-
|
35
|
-
def __init__(
|
36
|
-
self,
|
37
|
-
model_name: str = "gpt-4o",
|
38
|
-
api_key: Optional[str] = None,
|
39
|
-
json_mode: bool = False,
|
40
|
-
system_prompt: Optional[str] = None,
|
41
|
-
**kwargs: Any
|
42
|
-
):
|
43
|
-
if not api_key:
|
44
|
-
self.client = wrap_openai(OpenAI())
|
45
|
-
else:
|
46
|
-
self.client = wrap_openai(OpenAI(api_key=api_key))
|
47
|
-
|
48
|
-
self.model_name = model_name
|
49
|
-
self.system_prompt = system_prompt
|
50
|
-
self.kwargs = kwargs
|
51
|
-
if json_mode:
|
52
|
-
self.kwargs["response_format"] = {"type": "json_object"}
|
53
|
-
|
54
|
-
def generate(self, prompt: str) -> str:
|
55
|
-
messages = []
|
56
|
-
if self.system_prompt:
|
57
|
-
messages.append({"role": "system", "content": self.system_prompt})
|
58
|
-
messages.append({"role": "user", "content": prompt})
|
59
|
-
|
60
|
-
response = self.client.chat.completions.create(
|
61
|
-
model=self.model_name,
|
62
|
-
messages=messages, # type: ignore
|
63
|
-
**self.kwargs,
|
64
|
-
)
|
65
|
-
|
66
|
-
return cast(str, response.choices[0].message.content)
|
67
|
-
|
68
|
-
def chat(self, chat: List[Dict[str, str]]) -> str:
|
69
|
-
if self.system_prompt and not any(msg["role"] == "system" for msg in chat):
|
70
|
-
chat.insert(0, {"role": "system", "content": self.system_prompt})
|
71
|
-
|
72
|
-
response = self.client.chat.completions.create(
|
73
|
-
model=self.model_name,
|
74
|
-
messages=chat, # type: ignore
|
75
|
-
**self.kwargs,
|
76
|
-
)
|
77
|
-
|
78
|
-
return cast(str, response.choices[0].message.content)
|
79
|
-
|
80
|
-
def __call__(self, input: Union[str, List[Dict[str, str]]]) -> str:
|
81
|
-
if isinstance(input, str):
|
82
|
-
return self.generate(input)
|
83
|
-
return self.chat(input)
|
84
|
-
|
85
|
-
def generate_classifier(self, question: str) -> Callable:
|
86
|
-
api_doc = CLIP.description + "\n" + str(CLIP.usage)
|
87
|
-
prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
|
88
|
-
response = self.client.chat.completions.create(
|
89
|
-
model=self.model_name,
|
90
|
-
response_format={"type": "json_object"},
|
91
|
-
messages=[
|
92
|
-
{"role": "system", "content": SYSTEM_PROMPT},
|
93
|
-
{"role": "user", "content": prompt},
|
94
|
-
],
|
95
|
-
)
|
96
|
-
|
97
|
-
params = json.loads(cast(str, response.choices[0].message.content))[
|
98
|
-
"Parameters"
|
99
|
-
]
|
100
|
-
|
101
|
-
return lambda x: CLIP()(**{"prompt": params["prompt"], "image": x})
|
102
|
-
|
103
|
-
def generate_detector(self, question: str) -> Callable:
|
104
|
-
api_doc = GroundingDINO.description + "\n" + str(GroundingDINO.usage)
|
105
|
-
prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
|
106
|
-
response = self.client.chat.completions.create(
|
107
|
-
model=self.model_name,
|
108
|
-
response_format={"type": "json_object"},
|
109
|
-
messages=[
|
110
|
-
{"role": "system", "content": SYSTEM_PROMPT},
|
111
|
-
{"role": "user", "content": prompt},
|
112
|
-
],
|
113
|
-
)
|
114
|
-
|
115
|
-
params: Mapping = json.loads(cast(str, response.choices[0].message.content))[
|
116
|
-
"Parameters"
|
117
|
-
]
|
118
|
-
|
119
|
-
return lambda x: GroundingDINO()(**{"prompt": params["prompt"], "image": x})
|
120
|
-
|
121
|
-
def generate_segmentor(self, question: str) -> Callable:
|
122
|
-
api_doc = GroundingSAM.description + "\n" + str(GroundingSAM.usage)
|
123
|
-
prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
|
124
|
-
response = self.client.chat.completions.create(
|
125
|
-
model=self.model_name,
|
126
|
-
response_format={"type": "json_object"},
|
127
|
-
messages=[
|
128
|
-
{"role": "system", "content": SYSTEM_PROMPT},
|
129
|
-
{"role": "user", "content": prompt},
|
130
|
-
],
|
131
|
-
)
|
132
|
-
|
133
|
-
params: Mapping = json.loads(cast(str, response.choices[0].message.content))[
|
134
|
-
"Parameters"
|
135
|
-
]
|
136
|
-
|
137
|
-
return lambda x: GroundingSAM()(**{"prompt": params["prompt"], "image": x})
|
138
|
-
|
139
|
-
def generate_zero_shot_counter(self, question: str) -> Callable:
|
140
|
-
return lambda x: ZeroShotCounting()(**{"image": x})
|
141
|
-
|
142
|
-
def generate_image_qa_tool(self, question: str) -> Callable:
|
143
|
-
from vision_agent.tools.easytool_tools import ImageQuestionAnswering
|
144
|
-
|
145
|
-
return lambda x: ImageQuestionAnswering()(**{"prompt": question, "image": x})
|
146
|
-
|
147
|
-
|
148
|
-
class AzureOpenAILLM(OpenAILLM):
|
149
|
-
def __init__(
|
150
|
-
self,
|
151
|
-
model_name: str = "gpt-4o",
|
152
|
-
api_key: Optional[str] = None,
|
153
|
-
api_version: str = "2024-02-01",
|
154
|
-
azure_endpoint: Optional[str] = None,
|
155
|
-
json_mode: bool = False,
|
156
|
-
**kwargs: Any
|
157
|
-
):
|
158
|
-
if not api_key:
|
159
|
-
api_key = os.getenv("AZURE_OPENAI_API_KEY")
|
160
|
-
if not azure_endpoint:
|
161
|
-
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
|
162
|
-
|
163
|
-
if not api_key:
|
164
|
-
raise ValueError("Azure OpenAI API key is required.")
|
165
|
-
if not azure_endpoint:
|
166
|
-
raise ValueError("Azure OpenAI endpoint is required.")
|
167
|
-
|
168
|
-
self.client = wrap_openai(
|
169
|
-
AzureOpenAI(
|
170
|
-
api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
|
171
|
-
)
|
172
|
-
)
|
173
|
-
self.model_name = model_name
|
174
|
-
self.kwargs = kwargs
|
175
|
-
if json_mode:
|
176
|
-
self.kwargs["response_format"] = {"type": "json_object"}
|