vision-agent 0.2.56__py3-none-any.whl → 0.2.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vision_agent/llm/llm.py DELETED
@@ -1,176 +0,0 @@
1
- import json
2
- import os
3
- from abc import ABC, abstractmethod
4
- from typing import Any, Callable, Dict, List, Mapping, Optional, Union, cast
5
-
6
- from langsmith.wrappers import wrap_openai
7
- from openai import AzureOpenAI, OpenAI
8
-
9
- from vision_agent.tools.easytool_tools import (
10
- CLIP,
11
- GroundingDINO,
12
- GroundingSAM,
13
- ZeroShotCounting,
14
- )
15
- from vision_agent.tools.prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
16
-
17
-
18
- class LLM(ABC):
19
- @abstractmethod
20
- def generate(self, prompt: str) -> str:
21
- pass
22
-
23
- @abstractmethod
24
- def chat(self, chat: List[Dict[str, str]]) -> str:
25
- pass
26
-
27
- @abstractmethod
28
- def __call__(self, input: Union[str, List[Dict[str, str]]]) -> str:
29
- pass
30
-
31
-
32
- class OpenAILLM(LLM):
33
- r"""An LLM class for any OpenAI LLM model."""
34
-
35
- def __init__(
36
- self,
37
- model_name: str = "gpt-4o",
38
- api_key: Optional[str] = None,
39
- json_mode: bool = False,
40
- system_prompt: Optional[str] = None,
41
- **kwargs: Any
42
- ):
43
- if not api_key:
44
- self.client = wrap_openai(OpenAI())
45
- else:
46
- self.client = wrap_openai(OpenAI(api_key=api_key))
47
-
48
- self.model_name = model_name
49
- self.system_prompt = system_prompt
50
- self.kwargs = kwargs
51
- if json_mode:
52
- self.kwargs["response_format"] = {"type": "json_object"}
53
-
54
- def generate(self, prompt: str) -> str:
55
- messages = []
56
- if self.system_prompt:
57
- messages.append({"role": "system", "content": self.system_prompt})
58
- messages.append({"role": "user", "content": prompt})
59
-
60
- response = self.client.chat.completions.create(
61
- model=self.model_name,
62
- messages=messages, # type: ignore
63
- **self.kwargs,
64
- )
65
-
66
- return cast(str, response.choices[0].message.content)
67
-
68
- def chat(self, chat: List[Dict[str, str]]) -> str:
69
- if self.system_prompt and not any(msg["role"] == "system" for msg in chat):
70
- chat.insert(0, {"role": "system", "content": self.system_prompt})
71
-
72
- response = self.client.chat.completions.create(
73
- model=self.model_name,
74
- messages=chat, # type: ignore
75
- **self.kwargs,
76
- )
77
-
78
- return cast(str, response.choices[0].message.content)
79
-
80
- def __call__(self, input: Union[str, List[Dict[str, str]]]) -> str:
81
- if isinstance(input, str):
82
- return self.generate(input)
83
- return self.chat(input)
84
-
85
- def generate_classifier(self, question: str) -> Callable:
86
- api_doc = CLIP.description + "\n" + str(CLIP.usage)
87
- prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
88
- response = self.client.chat.completions.create(
89
- model=self.model_name,
90
- response_format={"type": "json_object"},
91
- messages=[
92
- {"role": "system", "content": SYSTEM_PROMPT},
93
- {"role": "user", "content": prompt},
94
- ],
95
- )
96
-
97
- params = json.loads(cast(str, response.choices[0].message.content))[
98
- "Parameters"
99
- ]
100
-
101
- return lambda x: CLIP()(**{"prompt": params["prompt"], "image": x})
102
-
103
- def generate_detector(self, question: str) -> Callable:
104
- api_doc = GroundingDINO.description + "\n" + str(GroundingDINO.usage)
105
- prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
106
- response = self.client.chat.completions.create(
107
- model=self.model_name,
108
- response_format={"type": "json_object"},
109
- messages=[
110
- {"role": "system", "content": SYSTEM_PROMPT},
111
- {"role": "user", "content": prompt},
112
- ],
113
- )
114
-
115
- params: Mapping = json.loads(cast(str, response.choices[0].message.content))[
116
- "Parameters"
117
- ]
118
-
119
- return lambda x: GroundingDINO()(**{"prompt": params["prompt"], "image": x})
120
-
121
- def generate_segmentor(self, question: str) -> Callable:
122
- api_doc = GroundingSAM.description + "\n" + str(GroundingSAM.usage)
123
- prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
124
- response = self.client.chat.completions.create(
125
- model=self.model_name,
126
- response_format={"type": "json_object"},
127
- messages=[
128
- {"role": "system", "content": SYSTEM_PROMPT},
129
- {"role": "user", "content": prompt},
130
- ],
131
- )
132
-
133
- params: Mapping = json.loads(cast(str, response.choices[0].message.content))[
134
- "Parameters"
135
- ]
136
-
137
- return lambda x: GroundingSAM()(**{"prompt": params["prompt"], "image": x})
138
-
139
- def generate_zero_shot_counter(self, question: str) -> Callable:
140
- return lambda x: ZeroShotCounting()(**{"image": x})
141
-
142
- def generate_image_qa_tool(self, question: str) -> Callable:
143
- from vision_agent.tools.easytool_tools import ImageQuestionAnswering
144
-
145
- return lambda x: ImageQuestionAnswering()(**{"prompt": question, "image": x})
146
-
147
-
148
- class AzureOpenAILLM(OpenAILLM):
149
- def __init__(
150
- self,
151
- model_name: str = "gpt-4o",
152
- api_key: Optional[str] = None,
153
- api_version: str = "2024-02-01",
154
- azure_endpoint: Optional[str] = None,
155
- json_mode: bool = False,
156
- **kwargs: Any
157
- ):
158
- if not api_key:
159
- api_key = os.getenv("AZURE_OPENAI_API_KEY")
160
- if not azure_endpoint:
161
- azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
162
-
163
- if not api_key:
164
- raise ValueError("Azure OpenAI API key is required.")
165
- if not azure_endpoint:
166
- raise ValueError("Azure OpenAI endpoint is required.")
167
-
168
- self.client = wrap_openai(
169
- AzureOpenAI(
170
- api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
171
- )
172
- )
173
- self.model_name = model_name
174
- self.kwargs = kwargs
175
- if json_mode:
176
- self.kwargs["response_format"] = {"type": "json_object"}