ursa-ai 0.2.7__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ursa-ai might be problematic. Click here for more details.
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/PKG-INFO +1 -1
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/mp_agent.py +1 -111
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa_ai.egg-info/PKG-INFO +1 -1
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/LICENSE +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/README.md +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/pyproject.toml +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/setup.cfg +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/__init__.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/arxiv_agent.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/base.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/code_review_agent.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/execution_agent.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/hypothesizer_agent.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/planning_agent.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/recall_agent.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/agents/websearch_agent.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/prompt_library/code_review_prompts.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/prompt_library/execution_prompts.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/prompt_library/hypothesizer_prompts.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/prompt_library/literature_prompts.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/prompt_library/planning_prompts.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/prompt_library/websearch_prompts.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/tools/run_command.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/tools/write_code.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/util/diff_renderer.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/util/memory_logger.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa/util/parse.py +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa_ai.egg-info/SOURCES.txt +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa_ai.egg-info/dependency_links.txt +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa_ai.egg-info/requires.txt +0 -0
- {ursa_ai-0.2.7 → ursa_ai-0.2.8}/src/ursa_ai.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ursa-ai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Agents for science at LANL
|
|
5
5
|
Author-email: Mike Grosskopf <mikegros@lanl.gov>, Nathan Debardeleben <ndebard@lanl.gov>, Rahul Somasundaram <rsomasundaram@lanl.gov>, Isaac Michaud <imichaud@lanl.gov>, Avanish Mishra <avanish@lanl.gov>, Arthur Lui <alui@lanl.gov>, Russell Bent <rbent@lanl.gov>, Earl Lawrence <earl@lanl.gov>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -5,39 +5,20 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
7
7
|
from mp_api.client import MPRester
|
|
8
|
-
from langchain.schema import Document
|
|
9
8
|
|
|
10
9
|
import os
|
|
11
|
-
import pymupdf
|
|
12
|
-
import requests
|
|
13
|
-
import feedparser
|
|
14
|
-
from PIL import Image
|
|
15
|
-
from io import BytesIO
|
|
16
|
-
import base64
|
|
17
|
-
from urllib.parse import quote
|
|
18
10
|
from typing_extensions import TypedDict, List
|
|
19
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
20
12
|
from tqdm import tqdm
|
|
21
13
|
import re
|
|
22
14
|
|
|
23
|
-
from langchain_community.document_loaders import PyPDFLoader
|
|
24
15
|
from langchain_core.output_parsers import StrOutputParser
|
|
25
16
|
from langchain_core.prompts import ChatPromptTemplate
|
|
26
17
|
from langgraph.graph import StateGraph, END, START
|
|
27
|
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
28
|
-
from langchain_chroma import Chroma
|
|
29
|
-
from langchain_openai import OpenAIEmbeddings
|
|
30
|
-
|
|
31
|
-
from openai import OpenAI
|
|
32
18
|
|
|
33
19
|
from .base import BaseAgent
|
|
34
20
|
|
|
35
21
|
|
|
36
|
-
client = OpenAI()
|
|
37
|
-
|
|
38
|
-
embeddings = OpenAIEmbeddings()
|
|
39
|
-
|
|
40
|
-
|
|
41
22
|
class PaperMetadata(TypedDict):
|
|
42
23
|
arxiv_id: str
|
|
43
24
|
full_text: str
|
|
@@ -51,74 +32,6 @@ class PaperState(TypedDict, total=False):
|
|
|
51
32
|
final_summary: str
|
|
52
33
|
|
|
53
34
|
|
|
54
|
-
def describe_image(image: Image.Image) -> str:
|
|
55
|
-
buffered = BytesIO()
|
|
56
|
-
image.save(buffered, format="PNG")
|
|
57
|
-
img_base64 = base64.b64encode(buffered.getvalue()).decode()
|
|
58
|
-
|
|
59
|
-
response = client.chat.completions.create(
|
|
60
|
-
model="gpt-4-vision-preview",
|
|
61
|
-
messages=[
|
|
62
|
-
{
|
|
63
|
-
"role": "system",
|
|
64
|
-
"content": "You are a scientific assistant who explains plots and scientific diagrams.",
|
|
65
|
-
},
|
|
66
|
-
{
|
|
67
|
-
"role": "user",
|
|
68
|
-
"content": [
|
|
69
|
-
{
|
|
70
|
-
"type": "text",
|
|
71
|
-
"text": "Describe this scientific image or plot in detail.",
|
|
72
|
-
},
|
|
73
|
-
{
|
|
74
|
-
"type": "image_url",
|
|
75
|
-
"image_url": {
|
|
76
|
-
"url": f"data:image/png;base64,{img_base64}"
|
|
77
|
-
},
|
|
78
|
-
},
|
|
79
|
-
],
|
|
80
|
-
},
|
|
81
|
-
],
|
|
82
|
-
max_tokens=500,
|
|
83
|
-
)
|
|
84
|
-
return response.choices[0].message.content.strip()
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def extract_and_describe_images(
|
|
88
|
-
pdf_path: str, max_images: int = 5
|
|
89
|
-
) -> List[str]:
|
|
90
|
-
doc = pymupdf.open(pdf_path)
|
|
91
|
-
descriptions = []
|
|
92
|
-
image_count = 0
|
|
93
|
-
|
|
94
|
-
for page_index in range(len(doc)):
|
|
95
|
-
if image_count >= max_images:
|
|
96
|
-
break
|
|
97
|
-
page = doc[page_index]
|
|
98
|
-
images = page.get_images(full=True)
|
|
99
|
-
|
|
100
|
-
for img_index, img in enumerate(images):
|
|
101
|
-
if image_count >= max_images:
|
|
102
|
-
break
|
|
103
|
-
xref = img[0]
|
|
104
|
-
base_image = doc.extract_image(xref)
|
|
105
|
-
image_bytes = base_image["image"]
|
|
106
|
-
image = Image.open(BytesIO(image_bytes))
|
|
107
|
-
|
|
108
|
-
try:
|
|
109
|
-
desc = describe_image(image)
|
|
110
|
-
descriptions.append(
|
|
111
|
-
f"Page {page_index + 1}, Image {img_index + 1}: {desc}"
|
|
112
|
-
)
|
|
113
|
-
except Exception as e:
|
|
114
|
-
descriptions.append(
|
|
115
|
-
f"Page {page_index + 1}, Image {img_index + 1}: [Error: {e}]"
|
|
116
|
-
)
|
|
117
|
-
image_count += 1
|
|
118
|
-
|
|
119
|
-
return descriptions
|
|
120
|
-
|
|
121
|
-
|
|
122
35
|
def remove_surrogates(text: str) -> str:
|
|
123
36
|
return re.sub(r"[\ud800-\udfff]", "", text)
|
|
124
37
|
|
|
@@ -131,7 +44,6 @@ class MaterialsProjectAgent(BaseAgent):
|
|
|
131
44
|
max_results: int = 3,
|
|
132
45
|
database_path: str = "mp_database",
|
|
133
46
|
summaries_path: str = "mp_summaries",
|
|
134
|
-
vectorstore_path: str = "mp_vectorstores",
|
|
135
47
|
**kwargs,
|
|
136
48
|
):
|
|
137
49
|
super().__init__(llm, **kwargs)
|
|
@@ -139,13 +51,10 @@ class MaterialsProjectAgent(BaseAgent):
|
|
|
139
51
|
self.max_results = max_results
|
|
140
52
|
self.database_path = database_path
|
|
141
53
|
self.summaries_path = summaries_path
|
|
142
|
-
self.vectorstore_path = vectorstore_path
|
|
143
54
|
|
|
144
55
|
os.makedirs(self.database_path, exist_ok=True)
|
|
145
56
|
os.makedirs(self.summaries_path, exist_ok=True)
|
|
146
|
-
os.makedirs(self.vectorstore_path, exist_ok=True)
|
|
147
57
|
|
|
148
|
-
self.embeddings = OpenAIEmbeddings() # or your preferred embedding
|
|
149
58
|
self.graph = self._build_graph()
|
|
150
59
|
|
|
151
60
|
def _fetch_node(self, state: Dict) -> Dict:
|
|
@@ -175,24 +84,6 @@ class MaterialsProjectAgent(BaseAgent):
|
|
|
175
84
|
|
|
176
85
|
return {**state, "materials": mats}
|
|
177
86
|
|
|
178
|
-
def _get_or_build_vectorstore(self, text: str, mid: str):
|
|
179
|
-
"""Build or load a Chroma vectorstore for a single material's description."""
|
|
180
|
-
persist_dir = os.path.join(self.vectorstore_path, mid)
|
|
181
|
-
if os.path.exists(persist_dir):
|
|
182
|
-
store = Chroma(
|
|
183
|
-
persist_directory=persist_dir,
|
|
184
|
-
embedding_function=self.embeddings,
|
|
185
|
-
)
|
|
186
|
-
else:
|
|
187
|
-
splitter = RecursiveCharacterTextSplitter(
|
|
188
|
-
chunk_size=500, chunk_overlap=100
|
|
189
|
-
)
|
|
190
|
-
docs = splitter.create_documents([text])
|
|
191
|
-
store = Chroma.from_documents(
|
|
192
|
-
docs, self.embeddings, persist_directory=persist_dir
|
|
193
|
-
)
|
|
194
|
-
return store.as_retriever(search_kwargs={"k": 5})
|
|
195
|
-
|
|
196
87
|
def _summarize_node(self, state: Dict) -> Dict:
|
|
197
88
|
"""Summarize each material via LLM over its metadata."""
|
|
198
89
|
# prompt template
|
|
@@ -204,7 +95,6 @@ You are a materials-science assistant. Given the following metadata about a mate
|
|
|
204
95
|
chain = prompt | self.llm | StrOutputParser()
|
|
205
96
|
|
|
206
97
|
summaries = [None] * len(state["materials"])
|
|
207
|
-
relevancy = [0.0] * len(state["materials"])
|
|
208
98
|
|
|
209
99
|
def process(i, mat):
|
|
210
100
|
mid = mat["material_id"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ursa-ai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Agents for science at LANL
|
|
5
5
|
Author-email: Mike Grosskopf <mikegros@lanl.gov>, Nathan Debardeleben <ndebard@lanl.gov>, Rahul Somasundaram <rsomasundaram@lanl.gov>, Isaac Michaud <imichaud@lanl.gov>, Avanish Mishra <avanish@lanl.gov>, Arthur Lui <alui@lanl.gov>, Russell Bent <rbent@lanl.gov>, Earl Lawrence <earl@lanl.gov>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|