vision-agent 0.2.103__py3-none-any.whl → 0.2.104__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -63,7 +63,7 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
63
63
  dir=WORKSPACE,
64
64
  conversation=conversation,
65
65
  )
66
- return extract_json(orch([{"role": "user", "content": prompt}]))
66
+ return extract_json(orch([{"role": "user", "content": prompt}], stream=False)) # type: ignore
67
67
 
68
68
 
69
69
  def run_code_action(code: str, code_interpreter: CodeInterpreter) -> str:
@@ -129,7 +129,7 @@ def write_plans(
129
129
  context = USER_REQ.format(user_request=user_request)
130
130
  prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
131
131
  chat[-1]["content"] = prompt
132
- return extract_json(model.chat(chat))
132
+ return extract_json(model(chat, stream=False)) # type: ignore
133
133
 
134
134
 
135
135
  def pick_plan(
@@ -160,7 +160,7 @@ def pick_plan(
160
160
  docstring=tool_info, plans=plan_str, previous_attempts="", media=media
161
161
  )
162
162
 
163
- code = extract_code(model(prompt))
163
+ code = extract_code(model(prompt, stream=False)) # type: ignore
164
164
  log_progress(
165
165
  {
166
166
  "type": "log",
@@ -211,7 +211,7 @@ def pick_plan(
211
211
  "code": DefaultImports.prepend_imports(code),
212
212
  }
213
213
  )
214
- code = extract_code(model(prompt))
214
+ code = extract_code(model(prompt, stream=False)) # type: ignore
215
215
  tool_output = code_interpreter.exec_isolation(
216
216
  DefaultImports.prepend_imports(code)
217
217
  )
@@ -251,7 +251,7 @@ def pick_plan(
251
251
  tool_output=tool_output_str[:20_000],
252
252
  )
253
253
  chat[-1]["content"] = prompt
254
- best_plan = extract_json(model(chat))
254
+ best_plan = extract_json(model(chat, stream=False)) # type: ignore
255
255
 
256
256
  if verbosity >= 1:
257
257
  _LOGGER.info(f"Best plan:\n{best_plan}")
@@ -286,7 +286,7 @@ def write_code(
286
286
  feedback=feedback,
287
287
  )
288
288
  chat[-1]["content"] = prompt
289
- return extract_code(coder(chat))
289
+ return extract_code(coder(chat, stream=False)) # type: ignore
290
290
 
291
291
 
292
292
  def write_test(
@@ -310,7 +310,7 @@ def write_test(
310
310
  media=media,
311
311
  )
312
312
  chat[-1]["content"] = prompt
313
- return extract_code(tester(chat))
313
+ return extract_code(tester(chat, stream=False)) # type: ignore
314
314
 
315
315
 
316
316
  def write_and_test_code(
@@ -439,13 +439,14 @@ def debug_code(
439
439
  while not success and count < 3:
440
440
  try:
441
441
  fixed_code_and_test = extract_json(
442
- debugger(
442
+ debugger( # type: ignore
443
443
  FIX_BUG.format(
444
444
  code=code,
445
445
  tests=test,
446
446
  result="\n".join(result.text().splitlines()[-50:]),
447
447
  feedback=format_memory(working_memory + new_working_memory),
448
- )
448
+ ),
449
+ stream=False,
449
450
  )
450
451
  )
451
452
  success = True
vision_agent/lmm/lmm.py CHANGED
@@ -5,7 +5,7 @@ import logging
5
5
  import os
6
6
  from abc import ABC, abstractmethod
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Dict, List, Optional, Union, cast
8
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Union, cast
9
9
 
10
10
  import anthropic
11
11
  import requests
@@ -58,22 +58,24 @@ def encode_media(media: Union[str, Path]) -> str:
58
58
  class LMM(ABC):
59
59
  @abstractmethod
60
60
  def generate(
61
- self, prompt: str, media: Optional[List[Union[str, Path]]] = None
62
- ) -> str:
61
+ self, prompt: str, media: Optional[List[Union[str, Path]]] = None, **kwargs: Any
62
+ ) -> Union[str, Iterator[Optional[str]]]:
63
63
  pass
64
64
 
65
65
  @abstractmethod
66
66
  def chat(
67
67
  self,
68
68
  chat: List[Message],
69
- ) -> str:
69
+ **kwargs: Any,
70
+ ) -> Union[str, Iterator[Optional[str]]]:
70
71
  pass
71
72
 
72
73
  @abstractmethod
73
74
  def __call__(
74
75
  self,
75
76
  input: Union[str, List[Message]],
76
- ) -> str:
77
+ **kwargs: Any,
78
+ ) -> Union[str, Iterator[Optional[str]]]:
77
79
  pass
78
80
 
79
81
 
@@ -104,15 +106,17 @@ class OpenAILMM(LMM):
104
106
  def __call__(
105
107
  self,
106
108
  input: Union[str, List[Message]],
107
- ) -> str:
109
+ **kwargs: Any,
110
+ ) -> Union[str, Iterator[Optional[str]]]:
108
111
  if isinstance(input, str):
109
- return self.generate(input)
110
- return self.chat(input)
112
+ return self.generate(input, **kwargs)
113
+ return self.chat(input, **kwargs)
111
114
 
112
115
  def chat(
113
116
  self,
114
117
  chat: List[Message],
115
- ) -> str:
118
+ **kwargs: Any,
119
+ ) -> Union[str, Iterator[Optional[str]]]:
116
120
  """Chat with the LMM model.
117
121
 
118
122
  Parameters:
@@ -141,17 +145,28 @@ class OpenAILMM(LMM):
141
145
  )
142
146
  fixed_chat.append(fixed_c)
143
147
 
148
+ # prefers kwargs from second dictionary over first
149
+ tmp_kwargs = self.kwargs | kwargs
144
150
  response = self.client.chat.completions.create(
145
- model=self.model_name, messages=fixed_chat, **self.kwargs # type: ignore
151
+ model=self.model_name, messages=fixed_chat, **tmp_kwargs # type: ignore
146
152
  )
153
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
154
+
155
+ def f() -> Iterator[Optional[str]]:
156
+ for chunk in response:
157
+ chunk_message = chunk.choices[0].delta.content # type: ignore
158
+ yield chunk_message
147
159
 
148
- return cast(str, response.choices[0].message.content)
160
+ return f()
161
+ else:
162
+ return cast(str, response.choices[0].message.content)
149
163
 
150
164
  def generate(
151
165
  self,
152
166
  prompt: str,
153
167
  media: Optional[List[Union[str, Path]]] = None,
154
- ) -> str:
168
+ **kwargs: Any,
169
+ ) -> Union[str, Iterator[Optional[str]]]:
155
170
  message: List[Dict[str, Any]] = [
156
171
  {
157
172
  "role": "user",
@@ -173,10 +188,21 @@ class OpenAILMM(LMM):
173
188
  },
174
189
  )
175
190
 
191
+ # prefers kwargs from second dictionary over first
192
+ tmp_kwargs = self.kwargs | kwargs
176
193
  response = self.client.chat.completions.create(
177
- model=self.model_name, messages=message, **self.kwargs # type: ignore
194
+ model=self.model_name, messages=message, **tmp_kwargs # type: ignore
178
195
  )
179
- return cast(str, response.choices[0].message.content)
196
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
197
+
198
+ def f() -> Iterator[Optional[str]]:
199
+ for chunk in response:
200
+ chunk_message = chunk.choices[0].delta.content # type: ignore
201
+ yield chunk_message
202
+
203
+ return f()
204
+ else:
205
+ return cast(str, response.choices[0].message.content)
180
206
 
181
207
  def generate_classifier(self, question: str) -> Callable:
182
208
  api_doc = T.get_tool_documentation([T.clip])
@@ -309,20 +335,22 @@ class OllamaLMM(LMM):
309
335
  self.url = base_url
310
336
  self.model_name = model_name
311
337
  self.json_mode = json_mode
312
- self.stream = False
338
+ self.kwargs = kwargs
313
339
 
314
340
  def __call__(
315
341
  self,
316
342
  input: Union[str, List[Message]],
317
- ) -> str:
343
+ **kwargs: Any,
344
+ ) -> Union[str, Iterator[Optional[str]]]:
318
345
  if isinstance(input, str):
319
- return self.generate(input)
320
- return self.chat(input)
346
+ return self.generate(input, **kwargs)
347
+ return self.chat(input, **kwargs)
321
348
 
322
349
  def chat(
323
350
  self,
324
351
  chat: List[Message],
325
- ) -> str:
352
+ **kwargs: Any,
353
+ ) -> Union[str, Iterator[Optional[str]]]:
326
354
  """Chat with the LMM model.
327
355
 
328
356
  Parameters:
@@ -341,40 +369,85 @@ class OllamaLMM(LMM):
341
369
  url = f"{self.url}/chat"
342
370
  model = self.model_name
343
371
  messages = fixed_chat
344
- data = {"model": model, "messages": messages, "stream": self.stream}
372
+ data = {"model": model, "messages": messages}
373
+
374
+ tmp_kwargs = self.kwargs | kwargs
375
+ data.update(tmp_kwargs)
345
376
  json_data = json.dumps(data)
346
- response = requests.post(url, data=json_data)
347
- if response.status_code != 200:
348
- raise ValueError(f"Request failed with status code {response.status_code}")
349
- response = response.json()
350
- return response["message"]["content"] # type: ignore
377
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
378
+
379
+ def f() -> Iterator[Optional[str]]:
380
+ with requests.post(url, data=json_data, stream=True) as stream:
381
+ if stream.status_code != 200:
382
+ raise ValueError(
383
+ f"Request failed with status code {stream.status_code}"
384
+ )
385
+
386
+ for chunk in stream.iter_content(chunk_size=None):
387
+ chunk_data = json.loads(chunk)
388
+ if chunk_data["done"]:
389
+ yield None
390
+ else:
391
+ yield chunk_data["message"]["content"]
392
+
393
+ return f()
394
+ else:
395
+ stream = requests.post(url, data=json_data)
396
+ if stream.status_code != 200:
397
+ raise ValueError(
398
+ f"Request failed with status code {stream.status_code}"
399
+ )
400
+ stream = stream.json()
401
+ return stream["message"]["content"] # type: ignore
351
402
 
352
403
  def generate(
353
404
  self,
354
405
  prompt: str,
355
406
  media: Optional[List[Union[str, Path]]] = None,
356
- ) -> str:
407
+ **kwargs: Any,
408
+ ) -> Union[str, Iterator[Optional[str]]]:
357
409
 
358
410
  url = f"{self.url}/generate"
359
411
  data = {
360
412
  "model": self.model_name,
361
413
  "prompt": prompt,
362
414
  "images": [],
363
- "stream": self.stream,
364
415
  }
365
416
 
366
- json_data = json.dumps(data)
367
417
  if media and len(media) > 0:
368
418
  for m in media:
369
419
  data["images"].append(encode_media(m)) # type: ignore
370
420
 
371
- response = requests.post(url, data=json_data)
421
+ tmp_kwargs = self.kwargs | kwargs
422
+ data.update(tmp_kwargs)
423
+ json_data = json.dumps(data)
424
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
425
+
426
+ def f() -> Iterator[Optional[str]]:
427
+ with requests.post(url, data=json_data, stream=True) as stream:
428
+ if stream.status_code != 200:
429
+ raise ValueError(
430
+ f"Request failed with status code {stream.status_code}"
431
+ )
432
+
433
+ for chunk in stream.iter_content(chunk_size=None):
434
+ chunk_data = json.loads(chunk)
435
+ if chunk_data["done"]:
436
+ yield None
437
+ else:
438
+ yield chunk_data["response"]
372
439
 
373
- if response.status_code != 200:
374
- raise ValueError(f"Request failed with status code {response.status_code}")
440
+ return f()
441
+ else:
442
+ stream = requests.post(url, data=json_data)
443
+
444
+ if stream.status_code != 200:
445
+ raise ValueError(
446
+ f"Request failed with status code {stream.status_code}"
447
+ )
375
448
 
376
- response = response.json()
377
- return response["response"] # type: ignore
449
+ stream = stream.json()
450
+ return stream["response"] # type: ignore
378
451
 
379
452
 
380
453
  class ClaudeSonnetLMM(LMM):
@@ -385,27 +458,28 @@ class ClaudeSonnetLMM(LMM):
385
458
  api_key: Optional[str] = None,
386
459
  model_name: str = "claude-3-sonnet-20240229",
387
460
  max_tokens: int = 4096,
388
- temperature: float = 0.7,
389
461
  **kwargs: Any,
390
462
  ):
391
463
  self.client = anthropic.Anthropic(api_key=api_key)
392
464
  self.model_name = model_name
393
- self.max_tokens = max_tokens
394
- self.temperature = temperature
465
+ if "max_tokens" not in kwargs:
466
+ kwargs["max_tokens"] = max_tokens
395
467
  self.kwargs = kwargs
396
468
 
397
469
  def __call__(
398
470
  self,
399
471
  input: Union[str, List[Dict[str, Any]]],
400
- ) -> str:
472
+ **kwargs: Any,
473
+ ) -> Union[str, Iterator[Optional[str]]]:
401
474
  if isinstance(input, str):
402
- return self.generate(input)
403
- return self.chat(input)
475
+ return self.generate(input, **kwargs)
476
+ return self.chat(input, **kwargs)
404
477
 
405
478
  def chat(
406
479
  self,
407
480
  chat: List[Dict[str, Any]],
408
- ) -> str:
481
+ **kwargs: Any,
482
+ ) -> Union[str, Iterator[Optional[str]]]:
409
483
  messages: List[MessageParam] = []
410
484
  for msg in chat:
411
485
  content: List[Union[TextBlockParam, ImageBlockParam]] = [
@@ -426,20 +500,35 @@ class ClaudeSonnetLMM(LMM):
426
500
  )
427
501
  messages.append({"role": msg["role"], "content": content})
428
502
 
503
+ # prefers kwargs from second dictionary over first
504
+ tmp_kwargs = self.kwargs | kwargs
429
505
  response = self.client.messages.create(
430
- model=self.model_name,
431
- max_tokens=self.max_tokens,
432
- temperature=self.temperature,
433
- messages=messages,
434
- **self.kwargs,
506
+ model=self.model_name, messages=messages, **tmp_kwargs
435
507
  )
436
- return cast(str, response.content[0].text)
508
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
509
+
510
+ def f() -> Iterator[Optional[str]]:
511
+ for chunk in response:
512
+ if (
513
+ chunk.type == "message_start"
514
+ or chunk.type == "content_block_start"
515
+ ):
516
+ continue
517
+ elif chunk.type == "content_block_delta":
518
+ yield chunk.delta.text
519
+ elif chunk.type == "message_stop":
520
+ yield None
521
+
522
+ return f()
523
+ else:
524
+ return cast(str, response.content[0].text)
437
525
 
438
526
  def generate(
439
527
  self,
440
528
  prompt: str,
441
529
  media: Optional[List[Union[str, Path]]] = None,
442
- ) -> str:
530
+ **kwargs: Any,
531
+ ) -> Union[str, Iterator[Optional[str]]]:
443
532
  content: List[Union[TextBlockParam, ImageBlockParam]] = [
444
533
  TextBlockParam(type="text", text=prompt)
445
534
  ]
@@ -456,11 +545,28 @@ class ClaudeSonnetLMM(LMM):
456
545
  },
457
546
  )
458
547
  )
548
+
549
+ # prefers kwargs from second dictionary over first
550
+ tmp_kwargs = self.kwargs | kwargs
459
551
  response = self.client.messages.create(
460
552
  model=self.model_name,
461
- max_tokens=self.max_tokens,
462
- temperature=self.temperature,
463
553
  messages=[{"role": "user", "content": content}],
464
- **self.kwargs,
554
+ **tmp_kwargs,
465
555
  )
466
- return cast(str, response.content[0].text)
556
+ if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
557
+
558
+ def f() -> Iterator[Optional[str]]:
559
+ for chunk in response:
560
+ if (
561
+ chunk.type == "message_start"
562
+ or chunk.type == "content_block_start"
563
+ ):
564
+ continue
565
+ elif chunk.type == "content_block_delta":
566
+ yield chunk.delta.text
567
+ elif chunk.type == "message_stop":
568
+ yield None
569
+
570
+ return f()
571
+ else:
572
+ return cast(str, response.content[0].text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.103
3
+ Version: 0.2.104
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,8 +2,8 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
3
  vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
4
  vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
5
- vision_agent/agent/vision_agent.py,sha256=U7VqUR-Io0xkGHpcF03Kq87Y0YQIdZQGqxuXdwjQzgk,8441
6
- vision_agent/agent/vision_agent_coder.py,sha256=A3x1vb2iYq1Bi0AfUodFh2b0w9G0XfN0Kq0gjY8f5f0,30700
5
+ vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
6
+ vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +12,7 @@ vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=KcS6h-8whGFmwt7t4LNlj0hZ4U-rBojYBLKLmrMsF48,15075
15
+ vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
16
  vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
17
  vision_agent/tools/__init__.py,sha256=MK0D8NtIChwGHwqsTz3LeV5BGuQecNVrNzUsyaEwuGA,1926
18
18
  vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU
27
27
  vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.103.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.103.dist-info/METADATA,sha256=DfZa2bcKHvQxsgAJRBdIEpPGdBjt18TuOwMzXOUIV_w,10729
32
- vision_agent-0.2.103.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.103.dist-info/RECORD,,
30
+ vision_agent-0.2.104.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.104.dist-info/METADATA,sha256=aSP8goyL8RZS_6SZSzrJZCsIzySrN_domJ2vvvbedQg,10729
32
+ vision_agent-0.2.104.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.104.dist-info/RECORD,,