vision-agent 0.2.103__py3-none-any.whl → 0.2.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,7 +63,7 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
63
63
  dir=WORKSPACE,
64
64
  conversation=conversation,
65
65
  )
66
- return extract_json(orch([{"role": "user", "content": prompt}]))
66
+ return extract_json(orch([{"role": "user", "content": prompt}], stream=False)) # type: ignore
67
67
 
68
68
 
69
69
  def run_code_action(code: str, code_interpreter: CodeInterpreter) -> str:
@@ -129,7 +129,7 @@ def write_plans(
129
129
  context = USER_REQ.format(user_request=user_request)
130
130
  prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
131
131
  chat[-1]["content"] = prompt
132
- return extract_json(model.chat(chat))
132
+ return extract_json(model(chat, stream=False)) # type: ignore
133
133
 
134
134
 
135
135
  def pick_plan(
@@ -160,7 +160,7 @@ def pick_plan(
160
160
  docstring=tool_info, plans=plan_str, previous_attempts="", media=media
161
161
  )
162
162
 
163
- code = extract_code(model(prompt))
163
+ code = extract_code(model(prompt, stream=False)) # type: ignore
164
164
  log_progress(
165
165
  {
166
166
  "type": "log",
@@ -211,7 +211,7 @@ def pick_plan(
211
211
  "code": DefaultImports.prepend_imports(code),
212
212
  }
213
213
  )
214
- code = extract_code(model(prompt))
214
+ code = extract_code(model(prompt, stream=False)) # type: ignore
215
215
  tool_output = code_interpreter.exec_isolation(
216
216
  DefaultImports.prepend_imports(code)
217
217
  )
@@ -251,7 +251,7 @@ def pick_plan(
251
251
  tool_output=tool_output_str[:20_000],
252
252
  )
253
253
  chat[-1]["content"] = prompt
254
- best_plan = extract_json(model(chat))
254
+ best_plan = extract_json(model(chat, stream=False)) # type: ignore
255
255
 
256
256
  if verbosity >= 1:
257
257
  _LOGGER.info(f"Best plan:\n{best_plan}")
@@ -286,7 +286,7 @@ def write_code(
286
286
  feedback=feedback,
287
287
  )
288
288
  chat[-1]["content"] = prompt
289
- return extract_code(coder(chat))
289
+ return extract_code(coder(chat, stream=False)) # type: ignore
290
290
 
291
291
 
292
292
  def write_test(
@@ -310,7 +310,7 @@ def write_test(
310
310
  media=media,
311
311
  )
312
312
  chat[-1]["content"] = prompt
313
- return extract_code(tester(chat))
313
+ return extract_code(tester(chat, stream=False)) # type: ignore
314
314
 
315
315
 
316
316
  def write_and_test_code(
@@ -439,13 +439,14 @@ def debug_code(
439
439
  while not success and count < 3:
440
440
  try:
441
441
  fixed_code_and_test = extract_json(
442
- debugger(
442
+ debugger( # type: ignore
443
443
  FIX_BUG.format(
444
444
  code=code,
445
445
  tests=test,
446
446
  result="\n".join(result.text().splitlines()[-50:]),
447
447
  feedback=format_memory(working_memory + new_working_memory),
448
- )
448
+ ),
449
+ stream=False,
449
450
  )
450
451
  )
451
452
  success = True
vision_agent/lmm/lmm.py CHANGED
@@ -5,7 +5,7 @@ import logging
5
5
  import os
6
6
  from abc import ABC, abstractmethod
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Dict, List, Optional, Union, cast
8
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Union, cast
9
9
 
10
10
  import anthropic
11
11
  import requests
@@ -58,22 +58,24 @@ def encode_media(media: Union[str, Path]) -> str:
58
58
  class LMM(ABC):
59
59
  @abstractmethod
60
60
  def generate(
61
- self, prompt: str, media: Optional[List[Union[str, Path]]] = None
62
- ) -> str:
61
+ self, prompt: str, media: Optional[List[Union[str, Path]]] = None, **kwargs: Any
62
+ ) -> Union[str, Iterator[Optional[str]]]:
63
63
  pass
64
64
 
65
65
  @abstractmethod
66
66
  def chat(
67
67
  self,
68
68
  chat: List[Message],
69
- ) -> str:
69
+ **kwargs: Any,
70
+ ) -> Union[str, Iterator[Optional[str]]]:
70
71
  pass
71
72
 
72
73
  @abstractmethod
73
74
  def __call__(
74
75
  self,
75
76
  input: Union[str, List[Message]],
76
- ) -> str:
77
+ **kwargs: Any,
78
+ ) -> Union[str, Iterator[Optional[str]]]:
77
79
  pass
78
80
 
79
81
 
@@ -104,15 +106,17 @@ class OpenAILMM(LMM):
104
106
  def __call__(
105
107
  self,
106
108
  input: Union[str, List[Message]],
107
- ) -> str:
109
+ **kwargs: Any,
110
+ ) -> Union[str, Iterator[Optional[str]]]:
108
111
  if isinstance(input, str):
109
- return self.generate(input)
110
- return self.chat(input)
112
+ return self.generate(input, **kwargs)
113
+ return self.chat(input, **kwargs)
111
114
 
112
115
  def chat(
113
116
  self,
114
117
  chat: List[Message],
115
- ) -> str:
118
+ **kwargs: Any,
119
+ ) -> Union[str, Iterator[Optional[str]]]:
116
120
  """Chat with the LMM model.
117
121
 
118
122
  Parameters:
@@ -141,17 +145,28 @@ class OpenAILMM(LMM):
141
145
  )
142
146
  fixed_chat.append(fixed_c)
143
147
 
148
+ # prefers kwargs from second dictionary over first
149
+ tmp_kwargs = self.kwargs | kwargs
144
150
  response = self.client.chat.completions.create(
145
- model=self.model_name, messages=fixed_chat, **self.kwargs # type: ignore
151
+ model=self.model_name, messages=fixed_chat, **tmp_kwargs # type: ignore
146
152
  )
153
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
154
+
155
+ def f() -> Iterator[Optional[str]]:
156
+ for chunk in response:
157
+ chunk_message = chunk.choices[0].delta.content # type: ignore
158
+ yield chunk_message
147
159
 
148
- return cast(str, response.choices[0].message.content)
160
+ return f()
161
+ else:
162
+ return cast(str, response.choices[0].message.content)
149
163
 
150
164
  def generate(
151
165
  self,
152
166
  prompt: str,
153
167
  media: Optional[List[Union[str, Path]]] = None,
154
- ) -> str:
168
+ **kwargs: Any,
169
+ ) -> Union[str, Iterator[Optional[str]]]:
155
170
  message: List[Dict[str, Any]] = [
156
171
  {
157
172
  "role": "user",
@@ -173,10 +188,21 @@ class OpenAILMM(LMM):
173
188
  },
174
189
  )
175
190
 
191
+ # prefers kwargs from second dictionary over first
192
+ tmp_kwargs = self.kwargs | kwargs
176
193
  response = self.client.chat.completions.create(
177
- model=self.model_name, messages=message, **self.kwargs # type: ignore
194
+ model=self.model_name, messages=message, **tmp_kwargs # type: ignore
178
195
  )
179
- return cast(str, response.choices[0].message.content)
196
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
197
+
198
+ def f() -> Iterator[Optional[str]]:
199
+ for chunk in response:
200
+ chunk_message = chunk.choices[0].delta.content # type: ignore
201
+ yield chunk_message
202
+
203
+ return f()
204
+ else:
205
+ return cast(str, response.choices[0].message.content)
180
206
 
181
207
  def generate_classifier(self, question: str) -> Callable:
182
208
  api_doc = T.get_tool_documentation([T.clip])
@@ -309,20 +335,22 @@ class OllamaLMM(LMM):
309
335
  self.url = base_url
310
336
  self.model_name = model_name
311
337
  self.json_mode = json_mode
312
- self.stream = False
338
+ self.kwargs = kwargs
313
339
 
314
340
  def __call__(
315
341
  self,
316
342
  input: Union[str, List[Message]],
317
- ) -> str:
343
+ **kwargs: Any,
344
+ ) -> Union[str, Iterator[Optional[str]]]:
318
345
  if isinstance(input, str):
319
- return self.generate(input)
320
- return self.chat(input)
346
+ return self.generate(input, **kwargs)
347
+ return self.chat(input, **kwargs)
321
348
 
322
349
  def chat(
323
350
  self,
324
351
  chat: List[Message],
325
- ) -> str:
352
+ **kwargs: Any,
353
+ ) -> Union[str, Iterator[Optional[str]]]:
326
354
  """Chat with the LMM model.
327
355
 
328
356
  Parameters:
@@ -341,40 +369,85 @@ class OllamaLMM(LMM):
341
369
  url = f"{self.url}/chat"
342
370
  model = self.model_name
343
371
  messages = fixed_chat
344
- data = {"model": model, "messages": messages, "stream": self.stream}
372
+ data = {"model": model, "messages": messages}
373
+
374
+ tmp_kwargs = self.kwargs | kwargs
375
+ data.update(tmp_kwargs)
345
376
  json_data = json.dumps(data)
346
- response = requests.post(url, data=json_data)
347
- if response.status_code != 200:
348
- raise ValueError(f"Request failed with status code {response.status_code}")
349
- response = response.json()
350
- return response["message"]["content"] # type: ignore
377
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
378
+
379
+ def f() -> Iterator[Optional[str]]:
380
+ with requests.post(url, data=json_data, stream=True) as stream:
381
+ if stream.status_code != 200:
382
+ raise ValueError(
383
+ f"Request failed with status code {stream.status_code}"
384
+ )
385
+
386
+ for chunk in stream.iter_content(chunk_size=None):
387
+ chunk_data = json.loads(chunk)
388
+ if chunk_data["done"]:
389
+ yield None
390
+ else:
391
+ yield chunk_data["message"]["content"]
392
+
393
+ return f()
394
+ else:
395
+ stream = requests.post(url, data=json_data)
396
+ if stream.status_code != 200:
397
+ raise ValueError(
398
+ f"Request failed with status code {stream.status_code}"
399
+ )
400
+ stream = stream.json()
401
+ return stream["message"]["content"] # type: ignore
351
402
 
352
403
  def generate(
353
404
  self,
354
405
  prompt: str,
355
406
  media: Optional[List[Union[str, Path]]] = None,
356
- ) -> str:
407
+ **kwargs: Any,
408
+ ) -> Union[str, Iterator[Optional[str]]]:
357
409
 
358
410
  url = f"{self.url}/generate"
359
411
  data = {
360
412
  "model": self.model_name,
361
413
  "prompt": prompt,
362
414
  "images": [],
363
- "stream": self.stream,
364
415
  }
365
416
 
366
- json_data = json.dumps(data)
367
417
  if media and len(media) > 0:
368
418
  for m in media:
369
419
  data["images"].append(encode_media(m)) # type: ignore
370
420
 
371
- response = requests.post(url, data=json_data)
421
+ tmp_kwargs = self.kwargs | kwargs
422
+ data.update(tmp_kwargs)
423
+ json_data = json.dumps(data)
424
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
425
+
426
+ def f() -> Iterator[Optional[str]]:
427
+ with requests.post(url, data=json_data, stream=True) as stream:
428
+ if stream.status_code != 200:
429
+ raise ValueError(
430
+ f"Request failed with status code {stream.status_code}"
431
+ )
432
+
433
+ for chunk in stream.iter_content(chunk_size=None):
434
+ chunk_data = json.loads(chunk)
435
+ if chunk_data["done"]:
436
+ yield None
437
+ else:
438
+ yield chunk_data["response"]
372
439
 
373
- if response.status_code != 200:
374
- raise ValueError(f"Request failed with status code {response.status_code}")
440
+ return f()
441
+ else:
442
+ stream = requests.post(url, data=json_data)
443
+
444
+ if stream.status_code != 200:
445
+ raise ValueError(
446
+ f"Request failed with status code {stream.status_code}"
447
+ )
375
448
 
376
- response = response.json()
377
- return response["response"] # type: ignore
449
+ stream = stream.json()
450
+ return stream["response"] # type: ignore
378
451
 
379
452
 
380
453
  class ClaudeSonnetLMM(LMM):
@@ -385,27 +458,28 @@ class ClaudeSonnetLMM(LMM):
385
458
  api_key: Optional[str] = None,
386
459
  model_name: str = "claude-3-sonnet-20240229",
387
460
  max_tokens: int = 4096,
388
- temperature: float = 0.7,
389
461
  **kwargs: Any,
390
462
  ):
391
463
  self.client = anthropic.Anthropic(api_key=api_key)
392
464
  self.model_name = model_name
393
- self.max_tokens = max_tokens
394
- self.temperature = temperature
465
+ if "max_tokens" not in kwargs:
466
+ kwargs["max_tokens"] = max_tokens
395
467
  self.kwargs = kwargs
396
468
 
397
469
  def __call__(
398
470
  self,
399
471
  input: Union[str, List[Dict[str, Any]]],
400
- ) -> str:
472
+ **kwargs: Any,
473
+ ) -> Union[str, Iterator[Optional[str]]]:
401
474
  if isinstance(input, str):
402
- return self.generate(input)
403
- return self.chat(input)
475
+ return self.generate(input, **kwargs)
476
+ return self.chat(input, **kwargs)
404
477
 
405
478
  def chat(
406
479
  self,
407
480
  chat: List[Dict[str, Any]],
408
- ) -> str:
481
+ **kwargs: Any,
482
+ ) -> Union[str, Iterator[Optional[str]]]:
409
483
  messages: List[MessageParam] = []
410
484
  for msg in chat:
411
485
  content: List[Union[TextBlockParam, ImageBlockParam]] = [
@@ -426,20 +500,35 @@ class ClaudeSonnetLMM(LMM):
426
500
  )
427
501
  messages.append({"role": msg["role"], "content": content})
428
502
 
503
+ # prefers kwargs from second dictionary over first
504
+ tmp_kwargs = self.kwargs | kwargs
429
505
  response = self.client.messages.create(
430
- model=self.model_name,
431
- max_tokens=self.max_tokens,
432
- temperature=self.temperature,
433
- messages=messages,
434
- **self.kwargs,
506
+ model=self.model_name, messages=messages, **tmp_kwargs
435
507
  )
436
- return cast(str, response.content[0].text)
508
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
509
+
510
+ def f() -> Iterator[Optional[str]]:
511
+ for chunk in response:
512
+ if (
513
+ chunk.type == "message_start"
514
+ or chunk.type == "content_block_start"
515
+ ):
516
+ continue
517
+ elif chunk.type == "content_block_delta":
518
+ yield chunk.delta.text
519
+ elif chunk.type == "message_stop":
520
+ yield None
521
+
522
+ return f()
523
+ else:
524
+ return cast(str, response.content[0].text)
437
525
 
438
526
  def generate(
439
527
  self,
440
528
  prompt: str,
441
529
  media: Optional[List[Union[str, Path]]] = None,
442
- ) -> str:
530
+ **kwargs: Any,
531
+ ) -> Union[str, Iterator[Optional[str]]]:
443
532
  content: List[Union[TextBlockParam, ImageBlockParam]] = [
444
533
  TextBlockParam(type="text", text=prompt)
445
534
  ]
@@ -456,11 +545,28 @@ class ClaudeSonnetLMM(LMM):
456
545
  },
457
546
  )
458
547
  )
548
+
549
+ # prefers kwargs from second dictionary over first
550
+ tmp_kwargs = self.kwargs | kwargs
459
551
  response = self.client.messages.create(
460
552
  model=self.model_name,
461
- max_tokens=self.max_tokens,
462
- temperature=self.temperature,
463
553
  messages=[{"role": "user", "content": content}],
464
- **self.kwargs,
554
+ **tmp_kwargs,
465
555
  )
466
- return cast(str, response.content[0].text)
556
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
557
+
558
+ def f() -> Iterator[Optional[str]]:
559
+ for chunk in response:
560
+ if (
561
+ chunk.type == "message_start"
562
+ or chunk.type == "content_block_start"
563
+ ):
564
+ continue
565
+ elif chunk.type == "content_block_delta":
566
+ yield chunk.delta.text
567
+ elif chunk.type == "message_stop":
568
+ yield None
569
+
570
+ return f()
571
+ else:
572
+ return cast(str, response.content[0].text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.103
3
+ Version: 0.2.104
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,8 +2,8 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
3
  vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
4
  vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
5
- vision_agent/agent/vision_agent.py,sha256=U7VqUR-Io0xkGHpcF03Kq87Y0YQIdZQGqxuXdwjQzgk,8441
6
- vision_agent/agent/vision_agent_coder.py,sha256=A3x1vb2iYq1Bi0AfUodFh2b0w9G0XfN0Kq0gjY8f5f0,30700
5
+ vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
6
+ vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +12,7 @@ vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=KcS6h-8whGFmwt7t4LNlj0hZ4U-rBojYBLKLmrMsF48,15075
15
+ vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
16
  vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
17
  vision_agent/tools/__init__.py,sha256=MK0D8NtIChwGHwqsTz3LeV5BGuQecNVrNzUsyaEwuGA,1926
18
18
  vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU
27
27
  vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.103.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.103.dist-info/METADATA,sha256=DfZa2bcKHvQxsgAJRBdIEpPGdBjt18TuOwMzXOUIV_w,10729
32
- vision_agent-0.2.103.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.103.dist-info/RECORD,,
30
+ vision_agent-0.2.104.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.104.dist-info/METADATA,sha256=aSP8goyL8RZS_6SZSzrJZCsIzySrN_domJ2vvvbedQg,10729
32
+ vision_agent-0.2.104.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.104.dist-info/RECORD,,