PyPI - ursa-ai - Versions diffs - 0.2.5__tar.gz → 0.2.6__tar.gz - Mend

ursa-ai 0.2.5tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ursa-ai might be problematic. Click here for more details.

Files changed (32) hide show

{ursa_ai-0.2.5/src/ursa_ai.egg-info → ursa_ai-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ursa-ai
-Version: 0.2.5
+Version: 0.2.6
 Summary: Agents for science at LANL
 Author-email: Mike Grosskopf <mikegros@lanl.gov>, Nathan Debardeleben <ndebard@lanl.gov>, Rahul Somasundaram <rsomasundaram@lanl.gov>, Isaac Michaud <imichaud@lanl.gov>, Avanish Mishra <avanish@lanl.gov>, Arthur Lui <alui@lanl.gov>, Russell Bent <rbent@lanl.gov>, Earl Lawrence <earl@lanl.gov>
 License-Expression: BSD-3-Clause
@@ -42,7 +42,7 @@ Dynamic: license-file
 # URSA - The Universal Research and Scientific Agent
-<img src="./logos/logo.png" alt="URSA Logo" width="200" height="200">
+<img src="https://github.com/lanl/ursa/raw/main/logos/logo.png" alt="URSA Logo" width="200" height="200">
 [![PyPI Version][pypi-version]](https://pypi.org/project/ursa-ai/)
 [![PyPI Downloads][total-downloads]](https://pepy.tech/projects/ursa-ai)
@@ -115,7 +115,7 @@ You have a duty for ensuring that you use URSA responsibly.
 URSA has been developed at Los Alamos National Laboratory as part of the ArtIMis project.
-<img src="./logos/artimis.png" alt="ArtIMis Logo" width="200" height="200">
+<img src="https://github.com/lanl/ursa/raw/main/logos/artimis.png" alt="ArtIMis Logo" width="200" height="200">
 ### Notice of Copyright Assertion (O4958):
 *This program is Open-Source under the BSD-3 License.

{ursa_ai-0.2.5 → ursa_ai-0.2.6}/README.md RENAMED Viewed

@@ -1,6 +1,6 @@
 # URSA - The Universal Research and Scientific Agent
-<img src="./logos/logo.png" alt="URSA Logo" width="200" height="200">
+<img src="https://github.com/lanl/ursa/raw/main/logos/logo.png" alt="URSA Logo" width="200" height="200">
 [![PyPI Version][pypi-version]](https://pypi.org/project/ursa-ai/)
 [![PyPI Downloads][total-downloads]](https://pepy.tech/projects/ursa-ai)
@@ -73,7 +73,7 @@ You have a duty for ensuring that you use URSA responsibly.
 URSA has been developed at Los Alamos National Laboratory as part of the ArtIMis project.
-<img src="./logos/artimis.png" alt="ArtIMis Logo" width="200" height="200">
+<img src="https://github.com/lanl/ursa/raw/main/logos/artimis.png" alt="ArtIMis Logo" width="200" height="200">
 ### Notice of Copyright Assertion (O4958):
 *This program is Open-Source under the BSD-3 License.

{ursa_ai-0.2.5 → ursa_ai-0.2.6}/pyproject.toml RENAMED Viewed

@@ -79,5 +79,6 @@ pycodestyle.max-doc-length = 80
 dev = [
     "langgraph-checkpoint-sqlite>=2.0.10",
     "notebook>=7.3.3",
+    "pre-commit>=4.3.0",
     "scikit-optimize>=0.10.2",
 ]

ursa_ai-0.2.6/src/ursa/agents/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .planning_agent import PlanningAgent, PlanningState
+from .websearch_agent import WebSearchAgent, WebSearchState
+from .execution_agent import ExecutionAgent, ExecutionState
+from .code_review_agent import CodeReviewAgent, CodeReviewState
+from .hypothesizer_agent import HypothesizerAgent, HypothesizerState
+from .arxiv_agent import ArxivAgent, PaperState, PaperMetadata
+from .recall_agent import RecallAgent
+from .base import BaseAgent, BaseChatModel
+from .mp_agent import MaterialsProjectAgent

{ursa_ai-0.2.5 → ursa_ai-0.2.6}/src/ursa/agents/arxiv_agent.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import os
-import pymupdf
+import pymupdf
 import requests
 import feedparser
 from PIL import Image
@@ -29,10 +29,12 @@ except:
 # embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
 # embeddings = OpenAIEmbeddings()
 class PaperMetadata(TypedDict):
     arxiv_id: str
     full_text: str
 class PaperState(TypedDict, total=False):
     query: str
     context: str
@@ -42,11 +44,13 @@ class PaperState(TypedDict, total=False):
 def describe_image(image: Image.Image) -> str:
-    if 'OpenAI' not in globals():
-        print("Vision transformer for summarizing images currently only implemented for OpenAI API.")
+    if "OpenAI" not in globals():
+        print(
+            "Vision transformer for summarizing images currently only implemented for OpenAI API."
+        )
         return ""
     client = OpenAI()
     buffered = BytesIO()
     image.save(buffered, format="PNG")
     img_base64 = base64.b64encode(buffered.getvalue()).decode()
@@ -54,12 +58,23 @@ def describe_image(image: Image.Image) -> str:
     response = client.chat.completions.create(
         model="gpt-4-vision-preview",
         messages=[
-            {"role": "system", "content": "You are a scientific assistant who explains plots and scientific diagrams."},
+            {
+                "role": "system",
+                "content": "You are a scientific assistant who explains plots and scientific diagrams.",
+            },
             {
                 "role": "user",
                 "content": [
-                    {"type": "text", "text": "Describe this scientific image or plot in detail."},
-                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}}
+                    {
+                        "type": "text",
+                        "text": "Describe this scientific image or plot in detail.",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{img_base64}"
+                        },
+                    },
                 ],
             },
         ],
@@ -68,7 +83,9 @@ def describe_image(image: Image.Image) -> str:
     return response.choices[0].message.content.strip()
-def extract_and_describe_images(pdf_path: str, max_images: int = 5) -> List[str]:
+def extract_and_describe_images(
+    pdf_path: str, max_images: int = 5
+) -> List[str]:
     doc = pymupdf.open(pdf_path)
     descriptions = []
     image_count = 0
@@ -89,98 +106,117 @@ def extract_and_describe_images(pdf_path: str, max_images: int = 5) -> List[str]
             try:
                 desc = describe_image(image)
-                descriptions.append(f"Page {page_index + 1}, Image {img_index + 1}: {desc}")
+                descriptions.append(
+                    f"Page {page_index + 1}, Image {img_index + 1}: {desc}"
+                )
             except Exception as e:
-                descriptions.append(f"Page {page_index + 1}, Image {img_index + 1}: [Error: {e}]")
+                descriptions.append(
+                    f"Page {page_index + 1}, Image {img_index + 1}: [Error: {e}]"
+                )
             image_count += 1
     return descriptions
 def remove_surrogates(text: str) -> str:
-    return re.sub(r'[\ud800-\udfff]', '', text)
+    return re.sub(r"[\ud800-\udfff]", "", text)
 class ArxivAgent(BaseAgent):
-    def __init__(self,
-                 llm="openai/o3-mini",
-                 summarize: bool = True,
-                 process_images = True,
-                 max_results: int = 3,
-                 download_papers: bool = True,
-                 rag_embedding         = None,
-                 database_path      ='arxiv_papers',
-                 summaries_path     ='arxiv_generated_summaries',
-                 vectorstore_path   ='arxiv_vectorstores',**kwargs):
+    def __init__(
+        self,
+        llm="openai/o3-mini",
+        summarize: bool = True,
+        process_images=True,
+        max_results: int = 3,
+        download_papers: bool = True,
+        rag_embedding=None,
+        database_path="arxiv_papers",
+        summaries_path="arxiv_generated_summaries",
+        vectorstore_path="arxiv_vectorstores",
+        **kwargs,
+    ):
         super().__init__(llm, **kwargs)
-        self.summarize        = summarize
-        self.process_images   = process_images
-        self.max_results      = max_results
-        self.database_path    = database_path
-        self.summaries_path   = summaries_path
+        self.summarize = summarize
+        self.process_images = process_images
+        self.max_results = max_results
+        self.database_path = database_path
+        self.summaries_path = summaries_path
         self.vectorstore_path = vectorstore_path
-        self.download_papers  = download_papers
-        self.rag_embedding    = rag_embedding
+        self.download_papers = download_papers
+        self.rag_embedding = rag_embedding
         self.graph = self._build_graph()
         os.makedirs(self.database_path, exist_ok=True)
         os.makedirs(self.summaries_path, exist_ok=True)
     def _fetch_papers(self, query: str) -> List[PaperMetadata]:
         if self.download_papers:
             encoded_query = quote(query)
             url = f"http://export.arxiv.org/api/query?search_query=all:{encoded_query}&start=0&max_results={self.max_results}"
             feed = feedparser.parse(url)
-            for i,entry in enumerate(feed.entries):
-                full_id = entry.id.split('/abs/')[-1]
-                arxiv_id = full_id.split('/')[-1]
+            for i, entry in enumerate(feed.entries):
+                full_id = entry.id.split("/abs/")[-1]
+                arxiv_id = full_id.split("/")[-1]
                 title = entry.title.strip()
                 authors = ", ".join(author.name for author in entry.authors)
                 pdf_url = f"https://arxiv.org/pdf/{full_id}.pdf"
-                pdf_filename = os.path.join(self.database_path, f"{arxiv_id}.pdf")
+                pdf_filename = os.path.join(
+                    self.database_path, f"{arxiv_id}.pdf"
+                )
                 if os.path.exists(pdf_filename):
-                    print(f"Paper # {i+1}, Title: {title}, already exists in database")
+                    print(
+                        f"Paper # {i + 1}, Title: {title}, already exists in database"
+                    )
                 else:
-                    print(f"Downloading paper # {i+1}, Title: {title}")
+                    print(f"Downloading paper # {i + 1}, Title: {title}")
                     response = requests.get(pdf_url)
-                    with open(pdf_filename, 'wb') as f:
+                    with open(pdf_filename, "wb") as f:
                         f.write(response.content)
         papers = []
-        pdf_files = [f for f in os.listdir(self.database_path) if f.lower().endswith(".pdf")]
-        for i,pdf_filename in enumerate(pdf_files):
+        pdf_files = [
+            f
+            for f in os.listdir(self.database_path)
+            if f.lower().endswith(".pdf")
+        ]
+        for i, pdf_filename in enumerate(pdf_files):
             full_text = ""
-            arxiv_id = pdf_filename.split('.pdf')[0]
-            vec_save_loc =  self.vectorstore_path + '/' + arxiv_id
+            arxiv_id = pdf_filename.split(".pdf")[0]
+            vec_save_loc = self.vectorstore_path + "/" + arxiv_id
             if self.summarize and not os.path.exists(vec_save_loc):
                 try:
-                    loader = PyPDFLoader( os.path.join(self.database_path, pdf_filename) )
+                    loader = PyPDFLoader(
+                        os.path.join(self.database_path, pdf_filename)
+                    )
                     pages = loader.load()
                     full_text = "\n".join([p.page_content for p in pages])
                     if self.process_images:
-                        image_descriptions = extract_and_describe_images( os.path.join(self.database_path, pdf_filename) )
-                        full_text += "\n\n[Image Interpretations]\n" + "\n".join(image_descriptions)
+                        image_descriptions = extract_and_describe_images(
+                            os.path.join(self.database_path, pdf_filename)
+                        )
+                        full_text += (
+                            "\n\n[Image Interpretations]\n"
+                            + "\n".join(image_descriptions)
+                        )
                 except Exception as e:
                     full_text = f"Error loading paper: {e}"
-            papers.append({
-                "arxiv_id": arxiv_id,
-                "full_text": full_text,
-            })
+            papers.append(
+                {
+                    "arxiv_id": arxiv_id,
+                    "full_text": full_text,
+                }
+            )
         return papers
@@ -188,24 +224,28 @@ class ArxivAgent(BaseAgent):
         papers = self._fetch_papers(state["query"])
         return {**state, "papers": papers}
     def _get_or_build_vectorstore(self, paper_text: str, arxiv_id: str):
         os.makedirs(self.vectorstore_path, exist_ok=True)
         persist_directory = os.path.join(self.vectorstore_path, arxiv_id)
         if os.path.exists(persist_directory):
-            vectorstore = Chroma(persist_directory=persist_directory, embedding_function=self.rag_embedding)
+            vectorstore = Chroma(
+                persist_directory=persist_directory,
+                embedding_function=self.rag_embedding,
+            )
         else:
-            splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+            splitter = RecursiveCharacterTextSplitter(
+                chunk_size=1000, chunk_overlap=200
+            )
             docs = splitter.create_documents([paper_text])
-            vectorstore = Chroma.from_documents(docs, self.rag_embedding, persist_directory=persist_directory)
+            vectorstore = Chroma.from_documents(
+                docs, self.rag_embedding, persist_directory=persist_directory
+            )
         return vectorstore.as_retriever(search_kwargs={"k": 5})
     def _summarize_node(self, state: PaperState) -> PaperState:
         prompt = ChatPromptTemplate.from_template("""
         You are a scientific assistant responsible for summarizing extracts from research papers, in the context of the following task: {context}
@@ -213,79 +253,115 @@ class ArxivAgent(BaseAgent):
         {retrieved_content}
         """)
         chain = prompt | self.llm | StrOutputParser()
         summaries = [None] * len(state["papers"])
         relevancy_scores = [0.0] * len(state["papers"])
         def process_paper(i, paper):
             arxiv_id = paper["arxiv_id"]
-            summary_filename = os.path.join(self.summaries_path, f"{arxiv_id}_summary.txt")
+            summary_filename = os.path.join(
+                self.summaries_path, f"{arxiv_id}_summary.txt"
+            )
             try:
                 cleaned_text = remove_surrogates(paper["full_text"])
                 if self.rag_embedding:
-                    retriever = self._get_or_build_vectorstore(cleaned_text, arxiv_id)
+                    retriever = self._get_or_build_vectorstore(
+                        cleaned_text, arxiv_id
+                    )
-                    relevant_docs_with_scores = retriever.vectorstore.similarity_search_with_score(state["context"], k=5)
+                    relevant_docs_with_scores = (
+                        retriever.vectorstore.similarity_search_with_score(
+                            state["context"], k=5
+                        )
+                    )
                     if relevant_docs_with_scores:
-                        score = sum([s for _, s in relevant_docs_with_scores]) / len(relevant_docs_with_scores)
+                        score = sum(
+                            [s for _, s in relevant_docs_with_scores]
+                        ) / len(relevant_docs_with_scores)
                         relevancy_scores[i] = abs(1.0 - score)
                     else:
                         relevancy_scores[i] = 0.0
-                    retrieved_content = "\n\n".join([doc.page_content for doc, _ in relevant_docs_with_scores])
+                    retrieved_content = "\n\n".join(
+                        [
+                            doc.page_content
+                            for doc, _ in relevant_docs_with_scores
+                        ]
+                    )
                 else:
                     retrieved_content = cleaned_text
-                summary = chain.invoke({"retrieved_content": retrieved_content, "context": state["context"]})
+                summary = chain.invoke(
+                    {
+                        "retrieved_content": retrieved_content,
+                        "context": state["context"],
+                    }
+                )
             except Exception as e:
                 summary = f"Error summarizing paper: {e}"
                 relevancy_scores[i] = 0.0
             with open(summary_filename, "w") as f:
                 f.write(summary)
             return i, summary
-        if ('papers' not in state or len(state['papers']) == 0):
-            print(f"No papers retrieved - bad query or network connection to ArXiv?")
-            return {**state, "summaries": None}
-        with ThreadPoolExecutor(max_workers=min(32, len(state["papers"]))) as executor:
-            futures = [executor.submit(process_paper, i, paper) for i, paper in enumerate(state["papers"])]
+        if "papers" not in state or len(state["papers"]) == 0:
+            print(
+                f"No papers retrieved - bad query or network connection to ArXiv?"
+            )
+            return {**state, "summaries": None}
-            for future in tqdm(as_completed(futures), total=len(futures), desc="Summarizing Papers"):
+        with ThreadPoolExecutor(
+            max_workers=min(32, len(state["papers"]))
+        ) as executor:
+            futures = [
+                executor.submit(process_paper, i, paper)
+                for i, paper in enumerate(state["papers"])
+            ]
+            for future in tqdm(
+                as_completed(futures),
+                total=len(futures),
+                desc="Summarizing Papers",
+            ):
                 i, result = future.result()
                 summaries[i] = result
         if self.rag_embedding:
             print(f"\nMax Relevancy Score: {max(relevancy_scores)}")
             print(f"Min Relevancy Score: {min(relevancy_scores)}")
-            print(f"Median Relevancy Score: {statistics.median(relevancy_scores)}\n")
-        return {**state, "summaries": summaries}
+            print(
+                f"Median Relevancy Score: {statistics.median(relevancy_scores)}\n"
+            )
+        return {**state, "summaries": summaries}
     def _aggregate_node(self, state: PaperState) -> PaperState:
         summaries = state["summaries"]
         papers = state["papers"]
         formatted = []
-        if 'summaries' not in state or state['summaries'] is None or 'papers' not in state or state['papers'] is None:
+        if (
+            "summaries" not in state
+            or state["summaries"] is None
+            or "papers" not in state
+            or state["papers"] is None
+        ):
             return {**state, "final_summary": None}
         for i, (paper, summary) in enumerate(zip(papers, summaries)):
-            citation = f"[{i+1}] Arxiv ID: {paper['arxiv_id']}"
+            citation = f"[{i + 1}] Arxiv ID: {paper['arxiv_id']}"
             formatted.append(f"{citation}\n\nSummary:\n{summary}")
         combined = "\n\n" + ("\n\n" + "-" * 40 + "\n\n").join(formatted)
-        with open(self.summaries_path+'/summaries_combined.txt', "w") as f:
+        with open(self.summaries_path + "/summaries_combined.txt", "w") as f:
             f.write(combined)
         prompt = ChatPromptTemplate.from_template("""
@@ -300,15 +376,15 @@ class ArxivAgent(BaseAgent):
         chain = prompt | self.llm | StrOutputParser()
-        final_summary = chain.invoke({"Summaries": combined, "context":state["context"]})
+        final_summary = chain.invoke(
+            {"Summaries": combined, "context": state["context"]}
+        )
-        with open(self.summaries_path+'/final_summary.txt', "w") as f:
+        with open(self.summaries_path + "/final_summary.txt", "w") as f:
             f.write(final_summary)
         return {**state, "final_summary": final_summary}
     def _build_graph(self):
         builder = StateGraph(PaperState)
         builder.add_node("fetch_papers", self._fetch_node)
@@ -325,25 +401,26 @@ class ArxivAgent(BaseAgent):
         else:
             builder.set_entry_point("fetch_papers")
             builder.set_finish_point("fetch_papers")
         graph = builder.compile()
         return graph
     def run(self, arxiv_search_query: str, context: str) -> str:
-        result = self.graph.invoke({"query": arxiv_search_query, "context":context})
+        result = self.graph.invoke(
+            {"query": arxiv_search_query, "context": context}
+        )
         if self.summarize:
             return result.get("final_summary", "No summary generated.")
         else:
             return "\n\nFinished Fetching papers!"
 if __name__ == "__main__":
     agent = ArxivAgent()
-    result = agent.run(arxiv_search_query="Experimental Constraints on neutron star radius",
-                       context="What are the constraints on the neutron star radius and what uncertainties are there on the constraints?")
-    print(result)
+    result = agent.run(
+        arxiv_search_query="Experimental Constraints on neutron star radius",
+        context="What are the constraints on the neutron star radius and what uncertainties are there on the constraints?",
+    )
+    print(result)

{ursa_ai-0.2.5 → ursa_ai-0.2.6}/src/ursa/agents/base.py RENAMED Viewed

@@ -5,6 +5,7 @@ from langchain_core.load import dumps
 import json
 class BaseAgent:
     # llm: BaseChatModel
     # llm_with_tools: Runnable[LanguageModelInput, BaseMessage]
@@ -35,7 +36,7 @@ class BaseAgent:
         self.checkpointer = checkpointer
         self.thread_id = self.__class__.__name__
     def write_state(self, filename, state):
         json_state = dumps(state, ensure_ascii=False)
         with open(filename, "w") as f:

ursa-ai 0.2.5__tar.gz → 0.2.6__tar.gz

Potentially problematic release.

ursa-ai 0.2.5tar.gz → 0.2.6tar.gz