PyPI - vision-agent - Versions diffs - 0.2.103__tar.gz → 0.2.104__tar.gz - Mend

vision-agent 0.2.103tar.gz → 0.2.104tar.gz

Files changed (33) hide show

{vision_agent-0.2.103 → vision_agent-0.2.104}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.103
+Version: 0.2.104
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.103 → vision_agent-0.2.104}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.103"
+version = "0.2.104"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.103 → vision_agent-0.2.104}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -63,7 +63,7 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
         dir=WORKSPACE,
         conversation=conversation,
     )
-    return extract_json(orch([{"role": "user", "content": prompt}]))
+    return extract_json(orch([{"role": "user", "content": prompt}], stream=False))  # type: ignore
 def run_code_action(code: str, code_interpreter: CodeInterpreter) -> str:

{vision_agent-0.2.103 → vision_agent-0.2.104}/vision_agent/agent/vision_agent_coder.py RENAMED Viewed

@@ -129,7 +129,7 @@ def write_plans(
     context = USER_REQ.format(user_request=user_request)
     prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
     chat[-1]["content"] = prompt
-    return extract_json(model.chat(chat))
+    return extract_json(model(chat, stream=False))  # type: ignore
 def pick_plan(
@@ -160,7 +160,7 @@ def pick_plan(
         docstring=tool_info, plans=plan_str, previous_attempts="", media=media
     )
-    code = extract_code(model(prompt))
+    code = extract_code(model(prompt, stream=False))  # type: ignore
     log_progress(
         {
             "type": "log",
@@ -211,7 +211,7 @@ def pick_plan(
                 "code": DefaultImports.prepend_imports(code),
             }
         )
-        code = extract_code(model(prompt))
+        code = extract_code(model(prompt, stream=False))  # type: ignore
         tool_output = code_interpreter.exec_isolation(
             DefaultImports.prepend_imports(code)
         )
@@ -251,7 +251,7 @@ def pick_plan(
         tool_output=tool_output_str[:20_000],
     )
     chat[-1]["content"] = prompt
-    best_plan = extract_json(model(chat))
+    best_plan = extract_json(model(chat, stream=False))  # type: ignore
     if verbosity >= 1:
         _LOGGER.info(f"Best plan:\n{best_plan}")
@@ -286,7 +286,7 @@ def write_code(
         feedback=feedback,
     )
     chat[-1]["content"] = prompt
-    return extract_code(coder(chat))
+    return extract_code(coder(chat, stream=False))  # type: ignore
 def write_test(
@@ -310,7 +310,7 @@ def write_test(
         media=media,
     )
     chat[-1]["content"] = prompt
-    return extract_code(tester(chat))
+    return extract_code(tester(chat, stream=False))  # type: ignore
 def write_and_test_code(
@@ -439,13 +439,14 @@ def debug_code(
     while not success and count < 3:
         try:
             fixed_code_and_test = extract_json(
-                debugger(
+                debugger(  # type: ignore
                     FIX_BUG.format(
                         code=code,
                         tests=test,
                         result="\n".join(result.text().splitlines()[-50:]),
                         feedback=format_memory(working_memory + new_working_memory),
-                    )
+                    ),
+                    stream=False,
                 )
             )
             success = True

{vision_agent-0.2.103 → vision_agent-0.2.104}/vision_agent/lmm/lmm.py RENAMED Viewed

@@ -5,7 +5,7 @@ import logging
 import os
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union, cast
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union, cast
 import anthropic
 import requests
@@ -58,22 +58,24 @@ def encode_media(media: Union[str, Path]) -> str:
 class LMM(ABC):
     @abstractmethod
     def generate(
-        self, prompt: str, media: Optional[List[Union[str, Path]]] = None
-    ) -> str:
+        self, prompt: str, media: Optional[List[Union[str, Path]]] = None, **kwargs: Any
+    ) -> Union[str, Iterator[Optional[str]]]:
         pass
     @abstractmethod
     def chat(
         self,
         chat: List[Message],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         pass
     @abstractmethod
     def __call__(
         self,
         input: Union[str, List[Message]],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         pass
@@ -104,15 +106,17 @@ class OpenAILMM(LMM):
     def __call__(
         self,
         input: Union[str, List[Message]],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         if isinstance(input, str):
-            return self.generate(input)
-        return self.chat(input)
+            return self.generate(input, **kwargs)
+        return self.chat(input, **kwargs)
     def chat(
         self,
         chat: List[Message],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         """Chat with the LMM model.
         Parameters:
@@ -141,17 +145,28 @@ class OpenAILMM(LMM):
                     )
             fixed_chat.append(fixed_c)
+        # prefers kwargs from second dictionary over first
+        tmp_kwargs = self.kwargs | kwargs
         response = self.client.chat.completions.create(
-            model=self.model_name, messages=fixed_chat, **self.kwargs  # type: ignore
+            model=self.model_name, messages=fixed_chat, **tmp_kwargs  # type: ignore
         )
+        if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
+            def f() -> Iterator[Optional[str]]:
+                for chunk in response:
+                    chunk_message = chunk.choices[0].delta.content  # type: ignore
+                    yield chunk_message
-        return cast(str, response.choices[0].message.content)
+            return f()
+        else:
+            return cast(str, response.choices[0].message.content)
     def generate(
         self,
         prompt: str,
         media: Optional[List[Union[str, Path]]] = None,
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         message: List[Dict[str, Any]] = [
             {
                 "role": "user",
@@ -173,10 +188,21 @@ class OpenAILMM(LMM):
                     },
                 )
+        # prefers kwargs from second dictionary over first
+        tmp_kwargs = self.kwargs | kwargs
         response = self.client.chat.completions.create(
-            model=self.model_name, messages=message, **self.kwargs  # type: ignore
+            model=self.model_name, messages=message, **tmp_kwargs  # type: ignore
         )
-        return cast(str, response.choices[0].message.content)
+        if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
+            def f() -> Iterator[Optional[str]]:
+                for chunk in response:
+                    chunk_message = chunk.choices[0].delta.content  # type: ignore
+                    yield chunk_message
+            return f()
+        else:
+            return cast(str, response.choices[0].message.content)
     def generate_classifier(self, question: str) -> Callable:
         api_doc = T.get_tool_documentation([T.clip])
@@ -309,20 +335,22 @@ class OllamaLMM(LMM):
         self.url = base_url
         self.model_name = model_name
         self.json_mode = json_mode
-        self.stream = False
+        self.kwargs = kwargs
     def __call__(
         self,
         input: Union[str, List[Message]],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         if isinstance(input, str):
-            return self.generate(input)
-        return self.chat(input)
+            return self.generate(input, **kwargs)
+        return self.chat(input, **kwargs)
     def chat(
         self,
         chat: List[Message],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         """Chat with the LMM model.
         Parameters:
@@ -341,40 +369,85 @@ class OllamaLMM(LMM):
         url = f"{self.url}/chat"
         model = self.model_name
         messages = fixed_chat
-        data = {"model": model, "messages": messages, "stream": self.stream}
+        data = {"model": model, "messages": messages}
+        tmp_kwargs = self.kwargs | kwargs
+        data.update(tmp_kwargs)
         json_data = json.dumps(data)
-        response = requests.post(url, data=json_data)
-        if response.status_code != 200:
-            raise ValueError(f"Request failed with status code {response.status_code}")
-        response = response.json()
-        return response["message"]["content"]  # type: ignore
+        if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
+            def f() -> Iterator[Optional[str]]:
+                with requests.post(url, data=json_data, stream=True) as stream:
+                    if stream.status_code != 200:
+                        raise ValueError(
+                            f"Request failed with status code {stream.status_code}"
+                        )
+                    for chunk in stream.iter_content(chunk_size=None):
+                        chunk_data = json.loads(chunk)
+                        if chunk_data["done"]:
+                            yield None
+                        else:
+                            yield chunk_data["message"]["content"]
+            return f()
+        else:
+            stream = requests.post(url, data=json_data)
+            if stream.status_code != 200:
+                raise ValueError(
+                    f"Request failed with status code {stream.status_code}"
+                )
+            stream = stream.json()
+            return stream["message"]["content"]  # type: ignore
     def generate(
         self,
         prompt: str,
         media: Optional[List[Union[str, Path]]] = None,
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         url = f"{self.url}/generate"
         data = {
             "model": self.model_name,
             "prompt": prompt,
             "images": [],
-            "stream": self.stream,
         }
-        json_data = json.dumps(data)
         if media and len(media) > 0:
             for m in media:
                 data["images"].append(encode_media(m))  # type: ignore
-        response = requests.post(url, data=json_data)
+        tmp_kwargs = self.kwargs | kwargs
+        data.update(tmp_kwargs)
+        json_data = json.dumps(data)
+        if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
+            def f() -> Iterator[Optional[str]]:
+                with requests.post(url, data=json_data, stream=True) as stream:
+                    if stream.status_code != 200:
+                        raise ValueError(
+                            f"Request failed with status code {stream.status_code}"
+                        )
+                    for chunk in stream.iter_content(chunk_size=None):
+                        chunk_data = json.loads(chunk)
+                        if chunk_data["done"]:
+                            yield None
+                        else:
+                            yield chunk_data["response"]
-        if response.status_code != 200:
-            raise ValueError(f"Request failed with status code {response.status_code}")
+            return f()
+        else:
+            stream = requests.post(url, data=json_data)
+            if stream.status_code != 200:
+                raise ValueError(
+                    f"Request failed with status code {stream.status_code}"
+                )
-        response = response.json()
-        return response["response"]  # type: ignore
+            stream = stream.json()
+            return stream["response"]  # type: ignore
 class ClaudeSonnetLMM(LMM):
@@ -385,27 +458,28 @@ class ClaudeSonnetLMM(LMM):
         api_key: Optional[str] = None,
         model_name: str = "claude-3-sonnet-20240229",
         max_tokens: int = 4096,
-        temperature: float = 0.7,
         **kwargs: Any,
     ):
         self.client = anthropic.Anthropic(api_key=api_key)
         self.model_name = model_name
-        self.max_tokens = max_tokens
-        self.temperature = temperature
+        if "max_tokens" not in kwargs:
+            kwargs["max_tokens"] = max_tokens
         self.kwargs = kwargs
     def __call__(
         self,
         input: Union[str, List[Dict[str, Any]]],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         if isinstance(input, str):
-            return self.generate(input)
-        return self.chat(input)
+            return self.generate(input, **kwargs)
+        return self.chat(input, **kwargs)
     def chat(
         self,
         chat: List[Dict[str, Any]],
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         messages: List[MessageParam] = []
         for msg in chat:
             content: List[Union[TextBlockParam, ImageBlockParam]] = [
@@ -426,20 +500,35 @@ class ClaudeSonnetLMM(LMM):
                     )
             messages.append({"role": msg["role"], "content": content})
+        # prefers kwargs from second dictionary over first
+        tmp_kwargs = self.kwargs | kwargs
         response = self.client.messages.create(
-            model=self.model_name,
-            max_tokens=self.max_tokens,
-            temperature=self.temperature,
-            messages=messages,
-            **self.kwargs,
+            model=self.model_name, messages=messages, **tmp_kwargs
         )
-        return cast(str, response.content[0].text)
+        if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
+            def f() -> Iterator[Optional[str]]:
+                for chunk in response:
+                    if (
+                        chunk.type == "message_start"
+                        or chunk.type == "content_block_start"
+                    ):
+                        continue
+                    elif chunk.type == "content_block_delta":
+                        yield chunk.delta.text
+                    elif chunk.type == "message_stop":
+                        yield None
+            return f()
+        else:
+            return cast(str, response.content[0].text)
     def generate(
         self,
         prompt: str,
         media: Optional[List[Union[str, Path]]] = None,
-    ) -> str:
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
         content: List[Union[TextBlockParam, ImageBlockParam]] = [
             TextBlockParam(type="text", text=prompt)
         ]
@@ -456,11 +545,28 @@ class ClaudeSonnetLMM(LMM):
                         },
                     )
                 )
+        # prefers kwargs from second dictionary over first
+        tmp_kwargs = self.kwargs | kwargs
         response = self.client.messages.create(
             model=self.model_name,
-            max_tokens=self.max_tokens,
-            temperature=self.temperature,
             messages=[{"role": "user", "content": content}],
-            **self.kwargs,
+            **tmp_kwargs,
         )
-        return cast(str, response.content[0].text)
+        if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
+            def f() -> Iterator[Optional[str]]:
+                for chunk in response:
+                    if (
+                        chunk.type == "message_start"
+                        or chunk.type == "content_block_start"
+                    ):
+                        continue
+                    elif chunk.type == "content_block_delta":
+                        yield chunk.delta.text
+                    elif chunk.type == "message_stop":
+                        yield None
+            return f()
+        else:
+            return cast(str, response.content[0].text)