ursa-ai 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ursa-ai might be problematic. Click here for more details.

ursa/agents/mp_agent.py CHANGED
@@ -5,39 +5,20 @@ from concurrent.futures import ThreadPoolExecutor
5
5
  from tqdm import tqdm
6
6
 
7
7
  from mp_api.client import MPRester
8
- from langchain.schema import Document
9
8
 
10
9
  import os
11
- import pymupdf
12
- import requests
13
- import feedparser
14
- from PIL import Image
15
- from io import BytesIO
16
- import base64
17
- from urllib.parse import quote
18
10
  from typing_extensions import TypedDict, List
19
- from concurrent.futures import ThreadPoolExecutor, as_completed
11
+ from concurrent.futures import ThreadPoolExecutor
20
12
  from tqdm import tqdm
21
13
  import re
22
14
 
23
- from langchain_community.document_loaders import PyPDFLoader
24
15
  from langchain_core.output_parsers import StrOutputParser
25
16
  from langchain_core.prompts import ChatPromptTemplate
26
17
  from langgraph.graph import StateGraph, END, START
27
- from langchain.text_splitter import RecursiveCharacterTextSplitter
28
- from langchain_chroma import Chroma
29
- from langchain_openai import OpenAIEmbeddings
30
-
31
- from openai import OpenAI
32
18
 
33
19
  from .base import BaseAgent
34
20
 
35
21
 
36
- client = OpenAI()
37
-
38
- embeddings = OpenAIEmbeddings()
39
-
40
-
41
22
  class PaperMetadata(TypedDict):
42
23
  arxiv_id: str
43
24
  full_text: str
@@ -51,74 +32,6 @@ class PaperState(TypedDict, total=False):
51
32
  final_summary: str
52
33
 
53
34
 
54
- def describe_image(image: Image.Image) -> str:
55
- buffered = BytesIO()
56
- image.save(buffered, format="PNG")
57
- img_base64 = base64.b64encode(buffered.getvalue()).decode()
58
-
59
- response = client.chat.completions.create(
60
- model="gpt-4-vision-preview",
61
- messages=[
62
- {
63
- "role": "system",
64
- "content": "You are a scientific assistant who explains plots and scientific diagrams.",
65
- },
66
- {
67
- "role": "user",
68
- "content": [
69
- {
70
- "type": "text",
71
- "text": "Describe this scientific image or plot in detail.",
72
- },
73
- {
74
- "type": "image_url",
75
- "image_url": {
76
- "url": f"data:image/png;base64,{img_base64}"
77
- },
78
- },
79
- ],
80
- },
81
- ],
82
- max_tokens=500,
83
- )
84
- return response.choices[0].message.content.strip()
85
-
86
-
87
- def extract_and_describe_images(
88
- pdf_path: str, max_images: int = 5
89
- ) -> List[str]:
90
- doc = pymupdf.open(pdf_path)
91
- descriptions = []
92
- image_count = 0
93
-
94
- for page_index in range(len(doc)):
95
- if image_count >= max_images:
96
- break
97
- page = doc[page_index]
98
- images = page.get_images(full=True)
99
-
100
- for img_index, img in enumerate(images):
101
- if image_count >= max_images:
102
- break
103
- xref = img[0]
104
- base_image = doc.extract_image(xref)
105
- image_bytes = base_image["image"]
106
- image = Image.open(BytesIO(image_bytes))
107
-
108
- try:
109
- desc = describe_image(image)
110
- descriptions.append(
111
- f"Page {page_index + 1}, Image {img_index + 1}: {desc}"
112
- )
113
- except Exception as e:
114
- descriptions.append(
115
- f"Page {page_index + 1}, Image {img_index + 1}: [Error: {e}]"
116
- )
117
- image_count += 1
118
-
119
- return descriptions
120
-
121
-
122
35
  def remove_surrogates(text: str) -> str:
123
36
  return re.sub(r"[\ud800-\udfff]", "", text)
124
37
 
@@ -131,7 +44,6 @@ class MaterialsProjectAgent(BaseAgent):
131
44
  max_results: int = 3,
132
45
  database_path: str = "mp_database",
133
46
  summaries_path: str = "mp_summaries",
134
- vectorstore_path: str = "mp_vectorstores",
135
47
  **kwargs,
136
48
  ):
137
49
  super().__init__(llm, **kwargs)
@@ -139,13 +51,10 @@ class MaterialsProjectAgent(BaseAgent):
139
51
  self.max_results = max_results
140
52
  self.database_path = database_path
141
53
  self.summaries_path = summaries_path
142
- self.vectorstore_path = vectorstore_path
143
54
 
144
55
  os.makedirs(self.database_path, exist_ok=True)
145
56
  os.makedirs(self.summaries_path, exist_ok=True)
146
- os.makedirs(self.vectorstore_path, exist_ok=True)
147
57
 
148
- self.embeddings = OpenAIEmbeddings() # or your preferred embedding
149
58
  self.graph = self._build_graph()
150
59
 
151
60
  def _fetch_node(self, state: Dict) -> Dict:
@@ -175,24 +84,6 @@ class MaterialsProjectAgent(BaseAgent):
175
84
 
176
85
  return {**state, "materials": mats}
177
86
 
178
- def _get_or_build_vectorstore(self, text: str, mid: str):
179
- """Build or load a Chroma vectorstore for a single material's description."""
180
- persist_dir = os.path.join(self.vectorstore_path, mid)
181
- if os.path.exists(persist_dir):
182
- store = Chroma(
183
- persist_directory=persist_dir,
184
- embedding_function=self.embeddings,
185
- )
186
- else:
187
- splitter = RecursiveCharacterTextSplitter(
188
- chunk_size=500, chunk_overlap=100
189
- )
190
- docs = splitter.create_documents([text])
191
- store = Chroma.from_documents(
192
- docs, self.embeddings, persist_directory=persist_dir
193
- )
194
- return store.as_retriever(search_kwargs={"k": 5})
195
-
196
87
  def _summarize_node(self, state: Dict) -> Dict:
197
88
  """Summarize each material via LLM over its metadata."""
198
89
  # prompt template
@@ -204,7 +95,6 @@ You are a materials-science assistant. Given the following metadata about a mate
204
95
  chain = prompt | self.llm | StrOutputParser()
205
96
 
206
97
  summaries = [None] * len(state["materials"])
207
- relevancy = [0.0] * len(state["materials"])
208
98
 
209
99
  def process(i, mat):
210
100
  mid = mat["material_id"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ursa-ai
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Summary: Agents for science at LANL
5
5
  Author-email: Mike Grosskopf <mikegros@lanl.gov>, Nathan Debardeleben <ndebard@lanl.gov>, Rahul Somasundaram <rsomasundaram@lanl.gov>, Isaac Michaud <imichaud@lanl.gov>, Avanish Mishra <avanish@lanl.gov>, Arthur Lui <alui@lanl.gov>, Russell Bent <rbent@lanl.gov>, Earl Lawrence <earl@lanl.gov>
6
6
  License-Expression: BSD-3-Clause
@@ -4,7 +4,7 @@ ursa/agents/base.py,sha256=kGbiGn8qu1eKQSv2Y9YZWEv8ngYsyUMTBkAxk8iD9R0,1334
4
4
  ursa/agents/code_review_agent.py,sha256=yVO7nzYI3o0k2HguFw3OSY1IrCy5W8V0YiriYAeviY4,11562
5
5
  ursa/agents/execution_agent.py,sha256=Hw7EZem8qYedXjeLi5RDPrPIhqlW1G-vE66MlZ7g1BY,16607
6
6
  ursa/agents/hypothesizer_agent.py,sha256=p3bLHyqsiGRwYS4nycYcwnpye2j1umWdaOYspGAFRU0,23309
7
- ursa/agents/mp_agent.py,sha256=Kv793S2x6gavdgBD68wxvTPNFKLDLyI0FSs8iXCkcVQ,9732
7
+ ursa/agents/mp_agent.py,sha256=PvCbneJslKSVqWysoGX2_DTYb_JKBWQrYuJvZOVDlw4,6104
8
8
  ursa/agents/planning_agent.py,sha256=5KSRk_gDsUrv_6zSxd7CqXhhMCYtnlfNlxSI9tSbqzc,5422
9
9
  ursa/agents/recall_agent.py,sha256=UcNRZLbx3j3cHaLEZul4__KzWV4SnUhLTjX9GeoYbHM,823
10
10
  ursa/agents/websearch_agent.py,sha256=zDS4IF-WJgsvSmV42HEO582rt3zCh_fJjteh7VpSNe4,7715
@@ -19,8 +19,8 @@ ursa/tools/write_code.py,sha256=DtCsUMZegYm0mk-HMPG5Zo3Ba1gbGfnXHsv1NZTdDs8,1220
19
19
  ursa/util/diff_renderer.py,sha256=gHawyUtBLeOq32A25_etDSy-HXAPyZQrnzfYGtHoEIQ,4086
20
20
  ursa/util/memory_logger.py,sha256=Qu8JRjqvXvchnVh6s-91te_xnfOAK1fJDyf1DvsRWnI,5737
21
21
  ursa/util/parse.py,sha256=M0cjyQWmjatxX4WbVmDRUiirTLyW-t_Aemlrlrsc5nA,2811
22
- ursa_ai-0.2.7.dist-info/licenses/LICENSE,sha256=4Vr6_u2zTHIUvYjoOBg9ztDbfpV3hyCFv3mTCS87gYU,1482
23
- ursa_ai-0.2.7.dist-info/METADATA,sha256=93ph0QLYscdrN2SpPVlEVjVtUaxDBpIE2BYMyTTJAkY,6848
24
- ursa_ai-0.2.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
- ursa_ai-0.2.7.dist-info/top_level.txt,sha256=OjA1gRYSUAeiXGnpqPC8iOOGfcjFO1IlP848qMnYSdY,5
26
- ursa_ai-0.2.7.dist-info/RECORD,,
22
+ ursa_ai-0.2.8.dist-info/licenses/LICENSE,sha256=4Vr6_u2zTHIUvYjoOBg9ztDbfpV3hyCFv3mTCS87gYU,1482
23
+ ursa_ai-0.2.8.dist-info/METADATA,sha256=AoP6uLIGtPXutp46JzTFKUSv_oh5cXCWQHLtQcCH5eg,6848
24
+ ursa_ai-0.2.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
+ ursa_ai-0.2.8.dist-info/top_level.txt,sha256=OjA1gRYSUAeiXGnpqPC8iOOGfcjFO1IlP848qMnYSdY,5
26
+ ursa_ai-0.2.8.dist-info/RECORD,,