ursa-ai 0.2.13__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ursa-ai might be problematic. Click here for more details.
- {ursa_ai-0.2.13/src/ursa_ai.egg-info → ursa_ai-0.4.0}/PKG-INFO +4 -2
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/pyproject.toml +3 -1
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/__init__.py +2 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/arxiv_agent.py +22 -2
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/execution_agent.py +2 -2
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/hypothesizer_agent.py +22 -24
- ursa_ai-0.4.0/src/ursa/agents/lammps_agent.py +392 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0/src/ursa_ai.egg-info}/PKG-INFO +4 -2
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa_ai.egg-info/SOURCES.txt +1 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa_ai.egg-info/requires.txt +3 -1
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/LICENSE +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/README.md +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/setup.cfg +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/base.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/code_review_agent.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/mp_agent.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/planning_agent.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/recall_agent.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/agents/websearch_agent.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/prompt_library/code_review_prompts.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/prompt_library/execution_prompts.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/prompt_library/hypothesizer_prompts.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/prompt_library/literature_prompts.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/prompt_library/planning_prompts.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/prompt_library/websearch_prompts.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/tools/run_command.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/tools/write_code.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/util/diff_renderer.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/util/memory_logger.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa/util/parse.py +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa_ai.egg-info/dependency_links.txt +0 -0
- {ursa_ai-0.2.13 → ursa_ai-0.4.0}/src/ursa_ai.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ursa-ai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Agents for science at LANL
|
|
5
5
|
Author-email: Mike Grosskopf <mikegros@lanl.gov>, Nathan Debardeleben <ndebard@lanl.gov>, Rahul Somasundaram <rsomasundaram@lanl.gov>, Isaac Michaud <imichaud@lanl.gov>, Avanish Mishra <avanish@lanl.gov>, Arthur Lui <alui@lanl.gov>, Russell Bent <rbent@lanl.gov>, Earl Lawrence <earl@lanl.gov>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -19,7 +19,7 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: arxiv<3.0,>=2.2.0
|
|
21
21
|
Requires-Dist: beautifulsoup4<5.0,>=4.13.4
|
|
22
|
-
Requires-Dist:
|
|
22
|
+
Requires-Dist: randomname<0.3,>=0.2.1
|
|
23
23
|
Requires-Dist: langchain<0.4,>=0.3.27
|
|
24
24
|
Requires-Dist: langchain-community<0.4,>=0.3.29
|
|
25
25
|
Requires-Dist: langchain-litellm<0.3,>=0.2.2
|
|
@@ -38,6 +38,8 @@ Requires-Dist: langchain-anthropic<0.4,>=0.3.19
|
|
|
38
38
|
Requires-Dist: langgraph-checkpoint-sqlite<3.0,>=2.0.10
|
|
39
39
|
Requires-Dist: langchain-ollama<0.4,>=0.3.6
|
|
40
40
|
Requires-Dist: ddgs>=9.5.5
|
|
41
|
+
Requires-Dist: atomman>=1.5.2
|
|
42
|
+
Requires-Dist: trafilatura>=1.6.1
|
|
41
43
|
Dynamic: license-file
|
|
42
44
|
|
|
43
45
|
# URSA - The Universal Research and Scientific Agent
|
|
@@ -19,7 +19,7 @@ requires-python = ">=3.10"
|
|
|
19
19
|
dependencies = [
|
|
20
20
|
"arxiv>=2.2.0,<3.0",
|
|
21
21
|
"beautifulsoup4>=4.13.4,<5.0",
|
|
22
|
-
"
|
|
22
|
+
"randomname>=0.2.1,<0.3",
|
|
23
23
|
"langchain>=0.3.27,<0.4",
|
|
24
24
|
"langchain-community>=0.3.29,<0.4",
|
|
25
25
|
"langchain-litellm>=0.2.2,<0.3",
|
|
@@ -38,6 +38,8 @@ dependencies = [
|
|
|
38
38
|
"langgraph-checkpoint-sqlite>=2.0.10,<3.0",
|
|
39
39
|
"langchain-ollama>=0.3.6,<0.4",
|
|
40
40
|
"ddgs>=9.5.5",
|
|
41
|
+
"atomman>=1.5.2",
|
|
42
|
+
"trafilatura>=1.6.1",
|
|
41
43
|
]
|
|
42
44
|
classifiers = [
|
|
43
45
|
"Operating System :: OS Independent",
|
|
@@ -9,6 +9,8 @@ from .execution_agent import ExecutionAgent as ExecutionAgent
|
|
|
9
9
|
from .execution_agent import ExecutionState as ExecutionState
|
|
10
10
|
from .hypothesizer_agent import HypothesizerAgent as HypothesizerAgent
|
|
11
11
|
from .hypothesizer_agent import HypothesizerState as HypothesizerState
|
|
12
|
+
from .lammps_agent import LammpsAgent as LammpsAgent
|
|
13
|
+
from .lammps_agent import LammpsState as LammpsState
|
|
12
14
|
from .mp_agent import MaterialsProjectAgent as MaterialsProjectAgent
|
|
13
15
|
from .planning_agent import PlanningAgent as PlanningAgent
|
|
14
16
|
from .planning_agent import PlanningState as PlanningState
|
|
@@ -156,9 +156,29 @@ class ArxivAgent(BaseAgent):
|
|
|
156
156
|
if self.download_papers:
|
|
157
157
|
encoded_query = quote(query)
|
|
158
158
|
url = f"http://export.arxiv.org/api/query?search_query=all:{encoded_query}&start=0&max_results={self.max_results}"
|
|
159
|
-
|
|
159
|
+
# print(f"URL is {url}") # if verbose
|
|
160
|
+
entries = []
|
|
161
|
+
try:
|
|
162
|
+
response = requests.get(url, timeout=10)
|
|
163
|
+
response.raise_for_status()
|
|
164
|
+
|
|
165
|
+
feed = feedparser.parse(response.content)
|
|
166
|
+
# print(f"parsed response status is {feed.status}") # if verbose
|
|
167
|
+
entries = feed.entries
|
|
168
|
+
if feed.bozo:
|
|
169
|
+
raise Exception("Feed from arXiv looks like garbage =(")
|
|
170
|
+
except requests.exceptions.Timeout:
|
|
171
|
+
print("Request timed out while fetching papers.")
|
|
172
|
+
except requests.exceptions.RequestException as e:
|
|
173
|
+
print(f"Request error encountered while fetching papers: {e}")
|
|
174
|
+
except ValueError as ve:
|
|
175
|
+
print(f"Value error occurred while fetching papers: {ve}")
|
|
176
|
+
except Exception as e:
|
|
177
|
+
print(
|
|
178
|
+
f"An unexpected error occurred while fetching papers: {e}"
|
|
179
|
+
)
|
|
160
180
|
|
|
161
|
-
for i, entry in enumerate(
|
|
181
|
+
for i, entry in enumerate(entries):
|
|
162
182
|
full_id = entry.id.split("/abs/")[-1]
|
|
163
183
|
arxiv_id = full_id.split("/")[-1]
|
|
164
184
|
title = entry.title.strip()
|
|
@@ -5,7 +5,7 @@ import subprocess
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Annotated, Any, Literal, Optional
|
|
7
7
|
|
|
8
|
-
import
|
|
8
|
+
import randomname
|
|
9
9
|
from langchain_community.tools import (
|
|
10
10
|
DuckDuckGoSearchResults,
|
|
11
11
|
) # TavilySearchResults,
|
|
@@ -75,7 +75,7 @@ class ExecutionAgent(BaseAgent):
|
|
|
75
75
|
def query_executor(self, state: ExecutionState) -> ExecutionState:
|
|
76
76
|
new_state = state.copy()
|
|
77
77
|
if "workspace" not in new_state.keys():
|
|
78
|
-
new_state["workspace"] =
|
|
78
|
+
new_state["workspace"] = randomname.get_name()
|
|
79
79
|
print(
|
|
80
80
|
f"{RED}Creating the folder {BLUE}{BOLD}{new_state['workspace']}{RESET}{RED} for this project.{RESET}"
|
|
81
81
|
)
|
|
@@ -35,7 +35,7 @@ class HypothesizerState(TypedDict):
|
|
|
35
35
|
agent1_solution: List[str] # List to store each iteration of solutions
|
|
36
36
|
agent2_critiques: List[str] # List to store critiques
|
|
37
37
|
agent3_perspectives: List[str] # List to store competitor perspectives
|
|
38
|
-
|
|
38
|
+
solution: str # Refined solution
|
|
39
39
|
summary_report: str # the final summarized report
|
|
40
40
|
visited_sites: List[str]
|
|
41
41
|
|
|
@@ -262,12 +262,10 @@ class HypothesizerAgent(BaseAgent):
|
|
|
262
262
|
)
|
|
263
263
|
return new_state
|
|
264
264
|
|
|
265
|
-
def
|
|
266
|
-
|
|
267
|
-
) -> HypothesizerState:
|
|
268
|
-
"""Generate the final, refined solution based on all iterations."""
|
|
265
|
+
def generate_solution(self, state: HypothesizerState) -> HypothesizerState:
|
|
266
|
+
"""Generate the overall, refined solution based on all iterations."""
|
|
269
267
|
print(
|
|
270
|
-
f"[iteration {state['current_iteration']} - DEBUG] Entering
|
|
268
|
+
f"[iteration {state['current_iteration']} - DEBUG] Entering generate_solution."
|
|
271
269
|
)
|
|
272
270
|
prompt = f"Original question: {state['question']}\n\n"
|
|
273
271
|
prompt += "Evolution of solutions:\n"
|
|
@@ -280,23 +278,23 @@ class HypothesizerAgent(BaseAgent):
|
|
|
280
278
|
f"Competitor perspective: {state['agent3_perspectives'][i]}\n"
|
|
281
279
|
)
|
|
282
280
|
|
|
283
|
-
prompt += "\nBased on this iterative process, provide the
|
|
281
|
+
prompt += "\nBased on this iterative process, provide the overall, refined solution."
|
|
284
282
|
|
|
285
283
|
print(
|
|
286
|
-
f"[iteration {state['current_iteration']} - DEBUG] Generating
|
|
284
|
+
f"[iteration {state['current_iteration']} - DEBUG] Generating overall solution with LLM..."
|
|
287
285
|
)
|
|
288
|
-
|
|
286
|
+
solution = self.llm.invoke(prompt)
|
|
289
287
|
print(
|
|
290
|
-
f"[iteration {state['current_iteration']} - DEBUG]
|
|
291
|
-
|
|
288
|
+
f"[iteration {state['current_iteration']} - DEBUG] Overall solution obtained. Preview:",
|
|
289
|
+
solution.content[:200],
|
|
292
290
|
"...",
|
|
293
291
|
)
|
|
294
292
|
|
|
295
293
|
new_state = state.copy()
|
|
296
|
-
new_state["
|
|
294
|
+
new_state["solution"] = solution.content
|
|
297
295
|
|
|
298
296
|
print(
|
|
299
|
-
f"[iteration {state['current_iteration']} - DEBUG] Exiting
|
|
297
|
+
f"[iteration {state['current_iteration']} - DEBUG] Exiting generate_solution."
|
|
300
298
|
)
|
|
301
299
|
return new_state
|
|
302
300
|
|
|
@@ -363,13 +361,13 @@ class HypothesizerAgent(BaseAgent):
|
|
|
363
361
|
|
|
364
362
|
{iteration_details}
|
|
365
363
|
|
|
366
|
-
The
|
|
364
|
+
The solution we arrived at was:
|
|
367
365
|
|
|
368
|
-
{state["
|
|
366
|
+
{state["solution"]}
|
|
369
367
|
|
|
370
368
|
Now produce a valid LaTeX document. Be sure to use a table of contents.
|
|
371
369
|
It must start with an Executive Summary (that may be multiple pages) which summarizes
|
|
372
|
-
the entire iterative process. Following that, we should include the
|
|
370
|
+
the entire iterative process. Following that, we should include the solution in full,
|
|
373
371
|
not summarized, but reformatted for appropriate LaTeX. And then, finally (and this will be
|
|
374
372
|
quite long), we must take all the steps - solutions, critiques, and competitor perspectives
|
|
375
373
|
and *NOT SUMMARIZE THEM* but merely reformat them for the reader. This will be in an Appendix
|
|
@@ -387,7 +385,7 @@ class HypothesizerAgent(BaseAgent):
|
|
|
387
385
|
"""
|
|
388
386
|
|
|
389
387
|
# Now produce a valid LaTeX document that nicely summarizes this entire iterative process.
|
|
390
|
-
# It must include the
|
|
388
|
+
# It must include the overall solution in full, not summarized, but reformatted for appropriate
|
|
391
389
|
# LaTeX. The summarization is for the other steps.
|
|
392
390
|
|
|
393
391
|
all_visited_sites = state.get("visited_sites", [])
|
|
@@ -455,7 +453,7 @@ class HypothesizerAgent(BaseAgent):
|
|
|
455
453
|
self.graph.add_node("agent2", self.agent2_critique)
|
|
456
454
|
self.graph.add_node("agent3", self.agent3_competitor_perspective)
|
|
457
455
|
self.graph.add_node("increment_iteration", self.increment_iteration)
|
|
458
|
-
self.graph.add_node("finalize", self.
|
|
456
|
+
self.graph.add_node("finalize", self.generate_solution)
|
|
459
457
|
self.graph.add_node("print_sites", self.print_visited_sites)
|
|
460
458
|
self.graph.add_node(
|
|
461
459
|
"summarize_as_latex", self.summarize_process_as_latex
|
|
@@ -497,7 +495,7 @@ class HypothesizerAgent(BaseAgent):
|
|
|
497
495
|
agent1_solution=[],
|
|
498
496
|
agent2_critiques=[],
|
|
499
497
|
agent3_perspectives=[],
|
|
500
|
-
|
|
498
|
+
solution="",
|
|
501
499
|
)
|
|
502
500
|
# Run the graph
|
|
503
501
|
result = self.action.invoke(
|
|
@@ -507,7 +505,7 @@ class HypothesizerAgent(BaseAgent):
|
|
|
507
505
|
"configurable": {"thread_id": self.thread_id},
|
|
508
506
|
},
|
|
509
507
|
)
|
|
510
|
-
return result["
|
|
508
|
+
return result["solution"]
|
|
511
509
|
|
|
512
510
|
|
|
513
511
|
def should_continue(state: HypothesizerState) -> Literal["continue", "finish"]:
|
|
@@ -580,7 +578,7 @@ if __name__ == "__main__":
|
|
|
580
578
|
agent1_solution=[],
|
|
581
579
|
agent2_critiques=[],
|
|
582
580
|
agent3_perspectives=[],
|
|
583
|
-
|
|
581
|
+
solution="",
|
|
584
582
|
)
|
|
585
583
|
|
|
586
584
|
print("[DEBUG] Invoking the graph...")
|
|
@@ -596,9 +594,9 @@ if __name__ == "__main__":
|
|
|
596
594
|
|
|
597
595
|
print("[DEBUG] Graph invocation complete.")
|
|
598
596
|
|
|
599
|
-
# Print the
|
|
600
|
-
print("
|
|
601
|
-
print(result["
|
|
597
|
+
# Print the overall solution
|
|
598
|
+
print("Overall Solution:")
|
|
599
|
+
print(result["solution"])
|
|
602
600
|
|
|
603
601
|
# print("Summarized Report:")
|
|
604
602
|
# print(summary_text)
|
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
from typing import Any, Dict, List, Optional, TypedDict
|
|
5
|
+
|
|
6
|
+
import atomman as am
|
|
7
|
+
import tiktoken
|
|
8
|
+
import trafilatura
|
|
9
|
+
from langchain_core.output_parsers import StrOutputParser
|
|
10
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
11
|
+
from langgraph.graph import END, StateGraph
|
|
12
|
+
|
|
13
|
+
from .base import BaseAgent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LammpsState(TypedDict, total=False):
|
|
17
|
+
simulation_task: str
|
|
18
|
+
elements: List[str]
|
|
19
|
+
|
|
20
|
+
matches: List[Any]
|
|
21
|
+
db_message: str
|
|
22
|
+
|
|
23
|
+
idx: int
|
|
24
|
+
summaries: List[str]
|
|
25
|
+
full_texts: List[str]
|
|
26
|
+
|
|
27
|
+
summaries_combined: str
|
|
28
|
+
choice_json: str
|
|
29
|
+
chosen_index: int
|
|
30
|
+
|
|
31
|
+
input_script: str
|
|
32
|
+
run_returncode: Optional[int]
|
|
33
|
+
run_stdout: str
|
|
34
|
+
run_stderr: str
|
|
35
|
+
|
|
36
|
+
fix_attempts: int
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class LammpsAgent(BaseAgent):
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
llm,
|
|
43
|
+
max_potentials: int = 5,
|
|
44
|
+
max_fix_attempts: int = 10,
|
|
45
|
+
mpi_procs: int = 8,
|
|
46
|
+
workspace: str = "./workspace",
|
|
47
|
+
lammps_cmd: str = "lmp_mpi",
|
|
48
|
+
mpirun_cmd: str = "mpirun",
|
|
49
|
+
tiktoken_model: str = "o3",
|
|
50
|
+
max_tokens: int = 200000,
|
|
51
|
+
**kwargs,
|
|
52
|
+
):
|
|
53
|
+
self.max_potentials = max_potentials
|
|
54
|
+
self.max_fix_attempts = max_fix_attempts
|
|
55
|
+
self.mpi_procs = mpi_procs
|
|
56
|
+
self.lammps_cmd = lammps_cmd
|
|
57
|
+
self.mpirun_cmd = mpirun_cmd
|
|
58
|
+
self.tiktoken_model = tiktoken_model
|
|
59
|
+
self.max_tokens = max_tokens
|
|
60
|
+
|
|
61
|
+
self.pair_styles = [
|
|
62
|
+
"eam",
|
|
63
|
+
"eam/alloy",
|
|
64
|
+
"eam/fs",
|
|
65
|
+
"meam",
|
|
66
|
+
"adp", # classical, HEA-relevant
|
|
67
|
+
"kim", # OpenKIM models
|
|
68
|
+
"snap",
|
|
69
|
+
"quip",
|
|
70
|
+
"mlip",
|
|
71
|
+
"pace",
|
|
72
|
+
"nep", # ML/ACE families (if available)
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
self.workspace = workspace
|
|
76
|
+
os.makedirs(self.workspace, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
super().__init__(llm, **kwargs)
|
|
79
|
+
|
|
80
|
+
self.str_parser = StrOutputParser()
|
|
81
|
+
|
|
82
|
+
self.summ_chain = (
|
|
83
|
+
ChatPromptTemplate.from_template(
|
|
84
|
+
"Here is some data about an interatomic potential: {metadata}\n\n"
|
|
85
|
+
"Briefly summarize why it could be useful for this task: {simulation_task}."
|
|
86
|
+
)
|
|
87
|
+
| self.llm
|
|
88
|
+
| self.str_parser
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
self.choose_chain = (
|
|
92
|
+
ChatPromptTemplate.from_template(
|
|
93
|
+
"Here are the summaries of a certain number of interatomic potentials: {summaries_combined}\n\n"
|
|
94
|
+
"Pick one potential which would be most useful for this task: {simulation_task}.\n\n"
|
|
95
|
+
"Return your answer **only** as valid JSON, with no extra text or formatting.\n\n"
|
|
96
|
+
"Use this exact schema:\n"
|
|
97
|
+
"{{\n"
|
|
98
|
+
' "Chosen index": <int>,\n'
|
|
99
|
+
' "rationale": "<string>",\n'
|
|
100
|
+
' "Potential name": "<string>"\n'
|
|
101
|
+
"}}\n"
|
|
102
|
+
)
|
|
103
|
+
| self.llm
|
|
104
|
+
| self.str_parser
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self.author_chain = (
|
|
108
|
+
ChatPromptTemplate.from_template(
|
|
109
|
+
"Your task is to write a LAMMPS input file for this purpose: {simulation_task}.\n"
|
|
110
|
+
"Here is metadata about the interatomic potential that will be used: {metadata}.\n"
|
|
111
|
+
"Note that all potential files are in the './' directory.\n"
|
|
112
|
+
"Here is some information about the pair_style and pair_coeff that might be useful in writing the input file: {pair_info}.\n"
|
|
113
|
+
"Ensure that all output data is written only to the './log.lammps' file. Do not create any other output file.\n"
|
|
114
|
+
"To create the log, use only the 'log ./log.lammps' command. Do not use any other command like 'echo' or 'screen'.\n"
|
|
115
|
+
"Return your answer **only** as valid JSON, with no extra text or formatting.\n"
|
|
116
|
+
"Use this exact schema:\n"
|
|
117
|
+
"{{\n"
|
|
118
|
+
' "input_script": "<string>"\n'
|
|
119
|
+
"}}\n"
|
|
120
|
+
)
|
|
121
|
+
| self.llm
|
|
122
|
+
| self.str_parser
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
self.fix_chain = (
|
|
126
|
+
ChatPromptTemplate.from_template(
|
|
127
|
+
"You are part of a larger scientific workflow whose purpose is to accomplish this task: {simulation_task}\n"
|
|
128
|
+
"For this purpose, this input file for LAMMPS was written: {input_script}\n"
|
|
129
|
+
"However, when running the simulation, an error was raised.\n"
|
|
130
|
+
"Here is the full stdout message that includes the error message: {err_message}\n"
|
|
131
|
+
"Your task is to write a new input file that resolves the error.\n"
|
|
132
|
+
"Here is metadata about the interatomic potential that will be used: {metadata}.\n"
|
|
133
|
+
"Note that all potential files are in the './' directory.\n"
|
|
134
|
+
"Here is some information about the pair_style and pair_coeff that might be useful in writing the input file: {pair_info}.\n"
|
|
135
|
+
"Ensure that all output data is written only to the './log.lammps' file. Do not create any other output file.\n"
|
|
136
|
+
"To create the log, use only the 'log ./log.lammps' command. Do not use any other command like 'echo' or 'screen'.\n"
|
|
137
|
+
"Return your answer **only** as valid JSON, with no extra text or formatting.\n"
|
|
138
|
+
"Use this exact schema:\n"
|
|
139
|
+
"{{\n"
|
|
140
|
+
' "input_script": "<string>"\n'
|
|
141
|
+
"}}\n"
|
|
142
|
+
)
|
|
143
|
+
| self.llm
|
|
144
|
+
| self.str_parser
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
self.graph = self._build_graph().compile()
|
|
148
|
+
|
|
149
|
+
@staticmethod
|
|
150
|
+
def _safe_json_loads(s: str) -> Dict[str, Any]:
|
|
151
|
+
s = s.strip()
|
|
152
|
+
if s.startswith("```"):
|
|
153
|
+
s = s.strip("`")
|
|
154
|
+
i = s.find("\n")
|
|
155
|
+
if i != -1:
|
|
156
|
+
s = s[i + 1 :].strip()
|
|
157
|
+
return json.loads(s)
|
|
158
|
+
|
|
159
|
+
def _fetch_and_trim_text(self, url: str) -> str:
|
|
160
|
+
downloaded = trafilatura.fetch_url(url)
|
|
161
|
+
if not downloaded:
|
|
162
|
+
return "No metadata available"
|
|
163
|
+
text = trafilatura.extract(
|
|
164
|
+
downloaded,
|
|
165
|
+
include_comments=False,
|
|
166
|
+
include_tables=True,
|
|
167
|
+
include_links=False,
|
|
168
|
+
favor_recall=True,
|
|
169
|
+
)
|
|
170
|
+
if not text:
|
|
171
|
+
return "No metadata available"
|
|
172
|
+
text = text.strip()
|
|
173
|
+
try:
|
|
174
|
+
enc = tiktoken.encoding_for_model(self.tiktoken_model)
|
|
175
|
+
toks = enc.encode(text)
|
|
176
|
+
if len(toks) > self.max_tokens:
|
|
177
|
+
toks = toks[: self.max_tokens]
|
|
178
|
+
text = enc.decode(toks)
|
|
179
|
+
except Exception:
|
|
180
|
+
pass
|
|
181
|
+
return text
|
|
182
|
+
|
|
183
|
+
def _find_potentials(self, state: LammpsState) -> LammpsState:
|
|
184
|
+
db = am.library.Database(remote=True)
|
|
185
|
+
matches = db.get_lammps_potentials(
|
|
186
|
+
pair_style=self.pair_styles, elements=state["elements"]
|
|
187
|
+
)
|
|
188
|
+
msg_lines = []
|
|
189
|
+
if not list(matches):
|
|
190
|
+
msg_lines.append("No potentials found for this task in NIST.")
|
|
191
|
+
else:
|
|
192
|
+
msg_lines.append("Found these potentials in NIST:")
|
|
193
|
+
for rec in matches:
|
|
194
|
+
msg_lines.append(f"{rec.id} {rec.pair_style} {rec.symbols}")
|
|
195
|
+
return {
|
|
196
|
+
**state,
|
|
197
|
+
"matches": list(matches),
|
|
198
|
+
"db_message": "\n".join(msg_lines),
|
|
199
|
+
"idx": 0,
|
|
200
|
+
"summaries": [],
|
|
201
|
+
"full_texts": [],
|
|
202
|
+
"fix_attempts": 0,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
def _should_summarize(self, state: LammpsState) -> str:
|
|
206
|
+
matches = state.get("matches", [])
|
|
207
|
+
i = state.get("idx", 0)
|
|
208
|
+
if not matches:
|
|
209
|
+
print("No potentials found in NIST for this task. Exiting....")
|
|
210
|
+
return "done_no_matches"
|
|
211
|
+
if i < min(self.max_potentials, len(matches)):
|
|
212
|
+
return "summarize_one"
|
|
213
|
+
return "summarize_done"
|
|
214
|
+
|
|
215
|
+
def _summarize_one(self, state: LammpsState) -> LammpsState:
|
|
216
|
+
i = state["idx"]
|
|
217
|
+
print(f"Summarizing potential #{i}")
|
|
218
|
+
match = state["matches"][i]
|
|
219
|
+
md = match.metadata()
|
|
220
|
+
|
|
221
|
+
if md.get("comments") is None:
|
|
222
|
+
text = "No metadata available"
|
|
223
|
+
summary = "No summary available"
|
|
224
|
+
else:
|
|
225
|
+
lines = md["comments"].split("\n")
|
|
226
|
+
url = lines[1] if len(lines) > 1 else ""
|
|
227
|
+
text = (
|
|
228
|
+
self._fetch_and_trim_text(url)
|
|
229
|
+
if url
|
|
230
|
+
else "No metadata available"
|
|
231
|
+
)
|
|
232
|
+
summary = self.summ_chain.invoke({
|
|
233
|
+
"metadata": text,
|
|
234
|
+
"simulation_task": state["simulation_task"],
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
**state,
|
|
239
|
+
"idx": i + 1,
|
|
240
|
+
"summaries": [*state["summaries"], summary],
|
|
241
|
+
"full_texts": [*state["full_texts"], text],
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
def _build_summaries(self, state: LammpsState) -> LammpsState:
|
|
245
|
+
parts = []
|
|
246
|
+
for i, s in enumerate(state["summaries"]):
|
|
247
|
+
rec = state["matches"][i]
|
|
248
|
+
parts.append(f"\nSummary of potential #{i}: {rec.id}\n{s}\n")
|
|
249
|
+
return {**state, "summaries_combined": "".join(parts)}
|
|
250
|
+
|
|
251
|
+
def _choose(self, state: LammpsState) -> LammpsState:
|
|
252
|
+
print("Choosing one potential for this task...")
|
|
253
|
+
choice = self.choose_chain.invoke({
|
|
254
|
+
"summaries_combined": state["summaries_combined"],
|
|
255
|
+
"simulation_task": state["simulation_task"],
|
|
256
|
+
})
|
|
257
|
+
choice_dict = self._safe_json_loads(choice)
|
|
258
|
+
chosen_index = int(choice_dict["Chosen index"])
|
|
259
|
+
print(f"Chosen potential #{chosen_index}")
|
|
260
|
+
print("Rationale for choosing this potential:")
|
|
261
|
+
print(choice_dict["rationale"])
|
|
262
|
+
return {**state, "choice_json": choice, "chosen_index": chosen_index}
|
|
263
|
+
|
|
264
|
+
def _author(self, state: LammpsState) -> LammpsState:
|
|
265
|
+
print("First attempt at writing LAMMPS input file....")
|
|
266
|
+
match = state["matches"][state["chosen_index"]]
|
|
267
|
+
match.download_files(self.workspace)
|
|
268
|
+
text = state["full_texts"][state["chosen_index"]]
|
|
269
|
+
pair_info = match.pair_info()
|
|
270
|
+
authored_json = self.author_chain.invoke({
|
|
271
|
+
"simulation_task": state["simulation_task"],
|
|
272
|
+
"metadata": text,
|
|
273
|
+
"pair_info": pair_info,
|
|
274
|
+
})
|
|
275
|
+
script_dict = self._safe_json_loads(authored_json)
|
|
276
|
+
input_script = script_dict["input_script"]
|
|
277
|
+
with open(os.path.join(self.workspace, "in.lammps"), "w") as f:
|
|
278
|
+
f.write(input_script)
|
|
279
|
+
return {**state, "input_script": input_script}
|
|
280
|
+
|
|
281
|
+
def _run_lammps(self, state: LammpsState) -> LammpsState:
|
|
282
|
+
print("Running LAMMPS....")
|
|
283
|
+
result = subprocess.run(
|
|
284
|
+
[
|
|
285
|
+
self.mpirun_cmd,
|
|
286
|
+
"-np",
|
|
287
|
+
str(self.mpi_procs),
|
|
288
|
+
self.lammps_cmd,
|
|
289
|
+
"-in",
|
|
290
|
+
"in.lammps",
|
|
291
|
+
],
|
|
292
|
+
cwd=self.workspace,
|
|
293
|
+
stdout=subprocess.PIPE,
|
|
294
|
+
stderr=subprocess.PIPE,
|
|
295
|
+
text=True,
|
|
296
|
+
check=False,
|
|
297
|
+
)
|
|
298
|
+
return {
|
|
299
|
+
**state,
|
|
300
|
+
"run_returncode": result.returncode,
|
|
301
|
+
"run_stdout": result.stdout,
|
|
302
|
+
"run_stderr": result.stderr,
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
def _route_run(self, state: LammpsState) -> str:
|
|
306
|
+
rc = state.get("run_returncode", 0)
|
|
307
|
+
attempts = state.get("fix_attempts", 0)
|
|
308
|
+
if rc == 0:
|
|
309
|
+
print("LAMMPS run successful! Exiting...")
|
|
310
|
+
return "done_success"
|
|
311
|
+
if attempts < self.max_fix_attempts:
|
|
312
|
+
print("LAMMPS run Failed. Attempting to rewrite input file...")
|
|
313
|
+
return "need_fix"
|
|
314
|
+
print("LAMMPS run Failed and maximum fix attempts reached. Exiting...")
|
|
315
|
+
return "done_failed"
|
|
316
|
+
|
|
317
|
+
def _fix(self, state: LammpsState) -> LammpsState:
|
|
318
|
+
match = state["matches"][state["chosen_index"]]
|
|
319
|
+
text = state["full_texts"][state["chosen_index"]]
|
|
320
|
+
pair_info = match.pair_info()
|
|
321
|
+
err_blob = state.get("run_stdout")
|
|
322
|
+
|
|
323
|
+
fixed_json = self.fix_chain.invoke({
|
|
324
|
+
"simulation_task": state["simulation_task"],
|
|
325
|
+
"input_script": state["input_script"],
|
|
326
|
+
"err_message": err_blob,
|
|
327
|
+
"metadata": text,
|
|
328
|
+
"pair_info": pair_info,
|
|
329
|
+
})
|
|
330
|
+
script_dict = self._safe_json_loads(fixed_json)
|
|
331
|
+
new_input = script_dict["input_script"]
|
|
332
|
+
with open(os.path.join(self.workspace, "in.lammps"), "w") as f:
|
|
333
|
+
f.write(new_input)
|
|
334
|
+
return {
|
|
335
|
+
**state,
|
|
336
|
+
"input_script": new_input,
|
|
337
|
+
"fix_attempts": state.get("fix_attempts", 0) + 1,
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
def _build_graph(self):
|
|
341
|
+
g = StateGraph(LammpsState)
|
|
342
|
+
|
|
343
|
+
g.add_node("find_potentials", self._find_potentials)
|
|
344
|
+
g.add_node("summarize_one", self._summarize_one)
|
|
345
|
+
g.add_node("build_summaries", self._build_summaries)
|
|
346
|
+
g.add_node("choose", self._choose)
|
|
347
|
+
g.add_node("author", self._author)
|
|
348
|
+
g.add_node("run_lammps", self._run_lammps)
|
|
349
|
+
g.add_node("fix", self._fix)
|
|
350
|
+
|
|
351
|
+
g.set_entry_point("find_potentials")
|
|
352
|
+
|
|
353
|
+
g.add_conditional_edges(
|
|
354
|
+
"find_potentials",
|
|
355
|
+
self._should_summarize,
|
|
356
|
+
{
|
|
357
|
+
"summarize_one": "summarize_one",
|
|
358
|
+
"summarize_done": "build_summaries",
|
|
359
|
+
"done_no_matches": END,
|
|
360
|
+
},
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
g.add_conditional_edges(
|
|
364
|
+
"summarize_one",
|
|
365
|
+
self._should_summarize,
|
|
366
|
+
{
|
|
367
|
+
"summarize_one": "summarize_one",
|
|
368
|
+
"summarize_done": "build_summaries",
|
|
369
|
+
},
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
g.add_edge("build_summaries", "choose")
|
|
373
|
+
g.add_edge("choose", "author")
|
|
374
|
+
g.add_edge("author", "run_lammps")
|
|
375
|
+
|
|
376
|
+
g.add_conditional_edges(
|
|
377
|
+
"run_lammps",
|
|
378
|
+
self._route_run,
|
|
379
|
+
{
|
|
380
|
+
"need_fix": "fix",
|
|
381
|
+
"done_success": END,
|
|
382
|
+
"done_failed": END,
|
|
383
|
+
},
|
|
384
|
+
)
|
|
385
|
+
g.add_edge("fix", "run_lammps")
|
|
386
|
+
return g
|
|
387
|
+
|
|
388
|
+
def run(self, simulation_task, elements):
|
|
389
|
+
return self.graph.invoke(
|
|
390
|
+
{"simulation_task": simulation_task, "elements": elements},
|
|
391
|
+
{"recursion_limit": 999_999},
|
|
392
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ursa-ai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Agents for science at LANL
|
|
5
5
|
Author-email: Mike Grosskopf <mikegros@lanl.gov>, Nathan Debardeleben <ndebard@lanl.gov>, Rahul Somasundaram <rsomasundaram@lanl.gov>, Isaac Michaud <imichaud@lanl.gov>, Avanish Mishra <avanish@lanl.gov>, Arthur Lui <alui@lanl.gov>, Russell Bent <rbent@lanl.gov>, Earl Lawrence <earl@lanl.gov>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -19,7 +19,7 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: arxiv<3.0,>=2.2.0
|
|
21
21
|
Requires-Dist: beautifulsoup4<5.0,>=4.13.4
|
|
22
|
-
Requires-Dist:
|
|
22
|
+
Requires-Dist: randomname<0.3,>=0.2.1
|
|
23
23
|
Requires-Dist: langchain<0.4,>=0.3.27
|
|
24
24
|
Requires-Dist: langchain-community<0.4,>=0.3.29
|
|
25
25
|
Requires-Dist: langchain-litellm<0.3,>=0.2.2
|
|
@@ -38,6 +38,8 @@ Requires-Dist: langchain-anthropic<0.4,>=0.3.19
|
|
|
38
38
|
Requires-Dist: langgraph-checkpoint-sqlite<3.0,>=2.0.10
|
|
39
39
|
Requires-Dist: langchain-ollama<0.4,>=0.3.6
|
|
40
40
|
Requires-Dist: ddgs>=9.5.5
|
|
41
|
+
Requires-Dist: atomman>=1.5.2
|
|
42
|
+
Requires-Dist: trafilatura>=1.6.1
|
|
41
43
|
Dynamic: license-file
|
|
42
44
|
|
|
43
45
|
# URSA - The Universal Research and Scientific Agent
|
|
@@ -7,6 +7,7 @@ src/ursa/agents/base.py
|
|
|
7
7
|
src/ursa/agents/code_review_agent.py
|
|
8
8
|
src/ursa/agents/execution_agent.py
|
|
9
9
|
src/ursa/agents/hypothesizer_agent.py
|
|
10
|
+
src/ursa/agents/lammps_agent.py
|
|
10
11
|
src/ursa/agents/mp_agent.py
|
|
11
12
|
src/ursa/agents/planning_agent.py
|
|
12
13
|
src/ursa/agents/recall_agent.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
arxiv<3.0,>=2.2.0
|
|
2
2
|
beautifulsoup4<5.0,>=4.13.4
|
|
3
|
-
|
|
3
|
+
randomname<0.3,>=0.2.1
|
|
4
4
|
langchain<0.4,>=0.3.27
|
|
5
5
|
langchain-community<0.4,>=0.3.29
|
|
6
6
|
langchain-litellm<0.3,>=0.2.2
|
|
@@ -19,3 +19,5 @@ langchain-anthropic<0.4,>=0.3.19
|
|
|
19
19
|
langgraph-checkpoint-sqlite<3.0,>=2.0.10
|
|
20
20
|
langchain-ollama<0.4,>=0.3.6
|
|
21
21
|
ddgs>=9.5.5
|
|
22
|
+
atomman>=1.5.2
|
|
23
|
+
trafilatura>=1.6.1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|