PyPI - ursa-ai - Versions diffs - 0.4.2__py3-none-any.whl → 0.6.0rc1__py3-none-any.whl - Mend

ursa-ai 0.4.2py3-none-any.whl → 0.6.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ursa-ai might be problematic. Click here for more details.

Files changed (29) hide show

ursa/agents/__init__.py +2 -0
ursa/agents/arxiv_agent.py +88 -99
ursa/agents/base.py +369 -2
ursa/agents/execution_agent.py +92 -48
ursa/agents/hypothesizer_agent.py +39 -42
ursa/agents/lammps_agent.py +51 -29
ursa/agents/mp_agent.py +45 -20
ursa/agents/optimization_agent.py +403 -0
ursa/agents/planning_agent.py +63 -28
ursa/agents/rag_agent.py +303 -0
ursa/agents/recall_agent.py +35 -5
ursa/agents/websearch_agent.py +44 -54
ursa/cli/__init__.py +127 -0
ursa/cli/hitl.py +426 -0
ursa/observability/pricing.py +319 -0
ursa/observability/timing.py +1441 -0
ursa/prompt_library/execution_prompts.py +7 -0
ursa/prompt_library/optimization_prompts.py +131 -0
ursa/tools/feasibility_checker.py +114 -0
ursa/tools/feasibility_tools.py +1075 -0
ursa/util/helperFunctions.py +142 -0
ursa/util/optimization_schema.py +78 -0
{ursa_ai-0.4.2.dist-info → ursa_ai-0.6.0rc1.dist-info}/METADATA +123 -4
ursa_ai-0.6.0rc1.dist-info/RECORD +39 -0
ursa_ai-0.6.0rc1.dist-info/entry_points.txt +2 -0
ursa_ai-0.4.2.dist-info/RECORD +0 -27
{ursa_ai-0.4.2.dist-info → ursa_ai-0.6.0rc1.dist-info}/WHEEL +0 -0
{ursa_ai-0.4.2.dist-info → ursa_ai-0.6.0rc1.dist-info}/licenses/LICENSE +0 -0
{ursa_ai-0.4.2.dist-info → ursa_ai-0.6.0rc1.dist-info}/top_level.txt +0 -0

ursa/agents/rag_agent.py ADDED Viewed

@@ -0,0 +1,303 @@
+import os
+import re
+import statistics
+from functools import cached_property
+from threading import Lock
+from typing import Any, Mapping, TypedDict
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_chroma import Chroma
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_core.embeddings import Embeddings
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.graph import StateGraph
+from tqdm import tqdm
+from ursa.agents.base import BaseAgent
+class RAGMetadata(TypedDict):
+    k: int
+    num_results: int
+    relevance_scores: list[float]
+class RAGState(TypedDict, total=False):
+    context: str
+    doc_texts: list[str]
+    doc_ids: list[str]
+    summary: str
+    rag_metadata: RAGMetadata
+def remove_surrogates(text: str) -> str:
+    return re.sub(r"[\ud800-\udfff]", "", text)
+class RAGAgent(BaseAgent):
+    def __init__(
+        self,
+        embedding: Embeddings,
+        llm="openai/o3-mini",
+        return_k: int = 10,
+        chunk_size: int = 1000,
+        chunk_overlap: int = 200,
+        database_path: str = "database",
+        summaries_path: str = "database",
+        vectorstore_path: str = "vectorstore",
+        **kwargs,
+    ):
+        super().__init__(llm, **kwargs)
+        self.retriever = None
+        self._vs_lock = Lock()
+        self.return_k = return_k
+        self.embedding = embedding
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.database_path = database_path
+        self.summaries_path = summaries_path
+        self.vectorstore_path = vectorstore_path
+        os.makedirs(self.vectorstore_path, exist_ok=True)
+        self.vectorstore = self._open_global_vectorstore()
+    @cached_property
+    def graph(self):
+        return self._build_graph()
+    @property
+    def _action(self):
+        return self.graph
+    @property
+    def manifest_path(self) -> str:
+        return os.path.join(self.vectorstore_path, "_ingested_ids.txt")
+    @property
+    def manifest_exists(self) -> bool:
+        return os.path.exists(self.manifest_path)
+    def _open_global_vectorstore(self) -> Chroma:
+        return Chroma(
+            persist_directory=self.vectorstore_path,
+            embedding_function=self.embedding,
+            collection_metadata={"hnsw:space": "cosine"},
+        )
+    def _paper_exists_in_vectorstore(self, doc_id: str) -> bool:
+        try:
+            col = self.vectorstore._collection
+            res = col.get(where={"id": doc_id}, limit=1)
+            return len(res.get("ids", [])) > 0
+        except Exception:
+            if not self.manifest_exists:
+                return False
+            with open(self.manifest_path, "r") as f:
+                return any(line.strip() == doc_id for line in f)
+    def _mark_paper_ingested(self, arxiv_id: str) -> None:
+        with open(self.manifest_path, "a") as f:
+            f.write(f"{arxiv_id}\n")
+    def _ensure_doc_in_vectorstore(self, paper_text: str, doc_id: str) -> None:
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
+        )
+        docs = splitter.create_documents(
+            [paper_text], metadatas=[{"id": doc_id}]
+        )
+        with self._vs_lock:
+            if not self._paper_exists_in_vectorstore(doc_id):
+                ids = [f"{doc_id}::{i}" for i, _ in enumerate(docs)]
+                self.vectorstore.add_documents(docs, ids=ids)
+                self._mark_paper_ingested(doc_id)
+    def _get_global_retriever(self, k: int = 5):
+        return self.vectorstore, self.vectorstore.as_retriever(
+            search_kwargs={"k": k}
+        )
+    def _read_docs_node(self, state: RAGState) -> RAGState:
+        print("[RAG Agent] Reading Documents....")
+        papers = []
+        new_state = state.copy()
+        pdf_files = [
+            f
+            for f in os.listdir(self.database_path)
+            if f.lower().endswith(".pdf")
+        ]
+        doc_ids = [
+            pdf_filename.rsplit(".pdf", 1)[0] for pdf_filename in pdf_files
+        ]
+        pdf_files = [
+            pdf_filename
+            for pdf_filename, id in zip(pdf_files, doc_ids)
+            if not self._paper_exists_in_vectorstore(id)
+        ]
+        for pdf_filename in tqdm(pdf_files, desc="RAG parsing text"):
+            full_text = ""
+            try:
+                loader = PyPDFLoader(
+                    os.path.join(self.database_path, pdf_filename)
+                )
+                pages = loader.load()
+                full_text = "\n".join([p.page_content for p in pages])
+            except Exception as e:
+                full_text = f"Error loading paper: {e}"
+            papers.append(full_text)
+        new_state["doc_texts"] = papers
+        new_state["doc_ids"] = doc_ids
+        return new_state
+    def _ingest_docs_node(self, state: RAGState) -> RAGState:
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
+        )
+        if "doc_texts" not in state:
+            raise RuntimeError("Unexpected error: doc_ids not in state!")
+        if "doc_ids" not in state:
+            raise RuntimeError("Unexpected error: doc_texts not in state!")
+        batch_docs, batch_ids = [], []
+        for paper, id in tqdm(
+            zip(state["doc_texts"], state["doc_ids"]),
+            total=len(state["doc_texts"]),
+            desc="RAG Ingesting",
+        ):
+            cleaned_text = remove_surrogates(paper)
+            docs = splitter.create_documents(
+                [cleaned_text], metadatas=[{"id": id}]
+            )
+            ids = [f"{id}::{i}" for i, _ in enumerate(docs)]
+            batch_docs.extend(docs)
+            batch_ids.extend(ids)
+        if state["doc_texts"]:
+            print("[RAG Agent] Ingesting Documents Into RAG Database....")
+            with self._vs_lock:
+                self.vectorstore.add_documents(batch_docs, ids=batch_ids)
+                for id in batch_ids:
+                    self._mark_paper_ingested(id)
+        return state
+    def _retrieve_and_summarize_node(self, state: RAGState) -> RAGState:
+        print(
+            "[RAG Agent] Retrieving Contextually Relevant Information From Database..."
+        )
+        prompt = ChatPromptTemplate.from_template("""
+        You are a scientific assistant responsible for summarizing extracts from research papers, in the context of the following task: {context}
+        Summarize the retrieved scientific content below.
+        Cite sources by ID when relevant: {source_ids}
+        {retrieved_content}
+        """)
+        chain = prompt | self.llm | StrOutputParser()
+        # 2) One retrieval over the global DB with the task context
+        try:
+            if "context" not in state:
+                raise RuntimeError("Unexpected error: context not in state!")
+            results = self.vectorstore.similarity_search_with_relevance_scores(
+                state["context"], k=self.return_k
+            )
+            relevance_scores = [score for _, score in results]
+        except Exception as e:
+            print(f"RAG failed due to: {e}")
+            return {**state, "summary": ""}
+        source_ids_list = []
+        for doc, _ in results:
+            aid = doc.metadata.get("id")
+            if aid and aid not in source_ids_list:
+                source_ids_list.append(aid)
+        source_ids = ", ".join(source_ids_list)
+        retrieved_content = (
+            "\n\n".join(doc.page_content for doc, _ in results)
+            if results
+            else ""
+        )
+        print("[RAG Agent] Summarizing Retrieved Information From Database...")
+        # 3) One summary based on retrieved chunks
+        rag_summary = chain.invoke({
+            "retrieved_content": retrieved_content,
+            "context": state["context"],
+            "source_ids": source_ids,
+        })
+        # Persist a single file for the batch (optional)
+        batch_name = "RAG_summary.txt"
+        os.makedirs(self.summaries_path, exist_ok=True)
+        with open(os.path.join(self.summaries_path, batch_name), "w") as f:
+            f.write(rag_summary)
+        # Diagnostics
+        if relevance_scores:
+            print(f"\nMax Relevance Score: {max(relevance_scores):.4f}")
+            print(f"Min Relevance Score: {min(relevance_scores):.4f}")
+            print(
+                f"Median Relevance Score: {statistics.median(relevance_scores):.4f}\n"
+            )
+        else:
+            print("\nNo RAG results retrieved (score list empty).\n")
+        # Return a single-element list by default (preferred)
+        return {
+            **state,
+            "summary": rag_summary,
+            "rag_metadata": {
+                "k": self.return_k,
+                "num_results": len(results),
+                "relevance_scores": relevance_scores,
+            },
+        }
+    def _invoke(
+        self, inputs: Mapping[str, Any], recursion_limit: int = 100000, **_
+    ):
+        config = self.build_config(
+            recursion_limit=recursion_limit, tags=["graph"]
+        )
+        return self._action.invoke(inputs, config)
+    def _build_graph(self):
+        graph = StateGraph(RAGState)
+        self.add_node(graph, self._read_docs_node)
+        self.add_node(graph, self._ingest_docs_node)
+        self.add_node(graph, self._retrieve_and_summarize_node)
+        graph.add_edge("_read_docs_node", "_ingest_docs_node")
+        graph.add_edge("_ingest_docs_node", "_retrieve_and_summarize_node")
+        graph.set_entry_point("_read_docs_node")
+        graph.set_finish_point("_retrieve_and_summarize_node")
+        return graph.compile(checkpointer=self.checkpointer)
+# NOTE: Run test in `tests/agents/test_rag_agent/test_rag_agent.py` via:
+#
+# pytest -s tests/agents/test_rag_agent
+#
+# OR
+#
+# uv run pytest -s tests/agents/test_rag_agent
+#
+# NOTE: You may need to `rm -rf workspace/rag-agent` to remove the vectorstore.

ursa/agents/recall_agent.py CHANGED Viewed

@@ -1,23 +1,53 @@
+from typing import Any, Mapping, TypedDict
+from langgraph.graph import StateGraph
 from .base import BaseAgent
+class RecallState(TypedDict):
+    query: str
+    memory: str
 class RecallAgent(BaseAgent):
     def __init__(self, llm, memory, **kwargs):
         super().__init__(llm, **kwargs)
         self.memorydb = memory
+        self._action = self._build_graph()
-    def remember(self, query):
-        memories = self.memorydb.retrieve(query)
+    def _remember(self, state: RecallState) -> str:
+        memories = self.memorydb.retrieve(state["query"])
         summarize_query = f"""
         You are being given the critical task of generating a detailed description of logged information
         to an important official to make a decision. Summarize the following memories that are related to
         the statement. Ensure that any specific details that are important are retained in the summary.
-        Query: {query}
+        Query: {state["query"]}
         """
         for memory in memories:
             summarize_query += f"Memory: {memory} \n\n"
-        memory = self.llm.invoke(summarize_query).content
-        return memory
+        state["memory"] = self.llm.invoke(summarize_query).content
+        return state
+    def _build_graph(self):
+        graph = StateGraph(RecallState)
+        self.add_node(graph, self._remember)
+        graph.set_entry_point("_remember")
+        graph.set_finish_point("_remember")
+        return graph.compile(checkpointer=self.checkpointer)
+    def _invoke(
+        self, inputs: Mapping[str, Any], recursion_limit: int = 100000, **_
+    ):
+        config = self.build_config(
+            recursion_limit=recursion_limit, tags=["graph"]
+        )
+        if "query" not in inputs:
+            raise ("'query' is a required argument")
+        output = self._action.invoke(inputs, config)
+        return output["memory"]

ursa/agents/websearch_agent.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # from langchain_community.tools    import TavilySearchResults
 # from langchain_core.runnables.graph import MermaidDrawMethod
-from typing import Annotated, Any, List, Optional
+from typing import Annotated, Any, List, Mapping, Optional
 import requests
 from bs4 import BeautifulSoup
@@ -8,7 +8,7 @@ from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
 from langchain_openai import ChatOpenAI
-from langgraph.graph import END, START, StateGraph
+from langgraph.graph import StateGraph
 from langgraph.graph.message import add_messages
 from langgraph.prebuilt import InjectedState, create_react_agent
 from pydantic import Field
@@ -57,9 +57,9 @@ class WebSearchAgent(BaseAgent):
         self.has_internet = self._check_for_internet(
             kwargs.get("url", "http://www.lanl.gov")
         )
-        self._initialize_agent()
+        self._build_graph()
-    def review_node(self, state: WebSearchState) -> WebSearchState:
+    def _review_node(self, state: WebSearchState) -> WebSearchState:
         if not self.has_internet:
             return {
                 "messages": [
@@ -78,7 +78,7 @@ class WebSearchAgent(BaseAgent):
         )
         return {"messages": [HumanMessage(content=res.content)]}
-    def response_node(self, state: WebSearchState) -> WebSearchState:
+    def _response_node(self, state: WebSearchState) -> WebSearchState:
         if not self.has_internet:
             return {
                 "messages": [
@@ -111,60 +111,50 @@ class WebSearchAgent(BaseAgent):
         except (requests.ConnectionError, requests.Timeout):
             return False
-    def state_store_node(self, state: WebSearchState) -> WebSearchState:
+    def _state_store_node(self, state: WebSearchState) -> WebSearchState:
         state["thread_id"] = self.thread_id
         return state
         # return dict(**state, thread_id=self.thread_id)
-    def _initialize_agent(self):
-        self.graph = StateGraph(WebSearchState)
-        self.graph.add_node("state_store", self.state_store_node)
-        self.graph.add_node(
-            "websearch",
-            create_react_agent(
-                self.llm,
-                self.tools,
-                state_schema=WebSearchState,
-                prompt=self.websearch_prompt,
-            ),
+    def _create_react(self, state: WebSearchState) -> WebSearchState:
+        react_agent = create_react_agent(
+            self.llm,
+            self.tools,
+            state_schema=WebSearchState,
+            prompt=self.websearch_prompt,
         )
-        self.graph.add_node("review", self.review_node)
-        self.graph.add_node("response", self.response_node)
-        self.graph.add_edge(START, "state_store")
-        self.graph.add_edge("state_store", "websearch")
-        self.graph.add_edge("websearch", "review")
-        self.graph.add_edge("response", END)
-        self.graph.add_conditional_edges(
-            "review",
+        return react_agent.invoke(state)
+    def _build_graph(self):
+        graph = StateGraph(WebSearchState)
+        self.add_node(graph, self._state_store_node)
+        self.add_node(graph, self._create_react)
+        self.add_node(graph, self._review_node)
+        self.add_node(graph, self._response_node)
+        graph.set_entry_point("_state_store_node")
+        graph.add_edge("_state_store_node", "_create_react")
+        graph.add_edge("_create_react", "_review_node")
+        graph.set_finish_point("_response_node")
+        graph.add_conditional_edges(
+            "_review_node",
             should_continue,
-            {"websearch": "websearch", "response": "response"},
-        )
-        self.action = self.graph.compile(checkpointer=self.checkpointer)
-        # self.action.get_graph().draw_mermaid_png(output_file_path="./websearch_agent_graph.png", draw_method=MermaidDrawMethod.PYPPETEER)
-    def run(self, prompt, recursion_limit=100):
-        if not self.has_internet:
-            return {
-                "messages": [
-                    HumanMessage(
-                        content="No internet for WebSearch Agent. No research carried out."
-                    )
-                ]
-            }
-        inputs = {
-            "messages": [HumanMessage(content=prompt)],
-            "model": self.llm,
-        }
-        return self.action.invoke(
-            inputs,
             {
-                "recursion_limit": recursion_limit,
-                "configurable": {"thread_id": self.thread_id},
+                "_create_react": "_create_react",
+                "_response_node": "_response_node",
             },
         )
+        self._action = graph.compile(checkpointer=self.checkpointer)
+        # self._action.get_graph().draw_mermaid_png(output_file_path="./websearch_agent_graph.png", draw_method=MermaidDrawMethod.PYPPETEER)
+    def _invoke(
+        self, inputs: Mapping[str, Any], recursion_limit: int = 1000, **_
+    ):
+        config = self.build_config(
+            recursion_limit=recursion_limit, tags=["graph"]
+        )
+        return self._action.invoke(inputs, config)
 def process_content(
@@ -204,10 +194,10 @@ search_tool = DuckDuckGoSearchResults(output_format="json", num_results=10)
 def should_continue(state: WebSearchState):
     if len(state["messages"]) > (state.get("max_websearch_steps", 100) + 3):
-        return "response"
+        return "_response_node"
     if "[APPROVED]" in state["messages"][-1].content:
-        return "response"
-    return "websearch"
+        return "_response_node"
+    return "_create_react"
 def main():
@@ -220,7 +210,7 @@ def main():
         "messages": [HumanMessage(content=problem_string)],
         "model": model,
     }
-    result = websearcher.action.invoke(
+    result = websearcher.invoke(
         inputs,
         {
             "recursion_limit": 10000,

ursa/cli/__init__.py ADDED Viewed

@@ -0,0 +1,127 @@
+from pathlib import Path
+from typing import Annotated, Optional
+from rich.console import Console
+from typer import Option, Typer
+app = Typer()
+# TODO: add help
+@app.command()
+def run(
+    workspace: Annotated[
+        Path, Option(help="Directory to store ursa ouput")
+    ] = Path(".ursa"),
+    llm_model_name: Annotated[
+        str,
+        Option(
+            help="Name of LLM to use for agent tasks", envvar="URSA_LLM_NAME"
+        ),
+    ] = "gpt-5",
+    llm_base_url: Annotated[
+        str, Option(help="Base url for LLM.", envvar="URSA_LLM_BASE_URL")
+    ] = "https://api.openai.com/v1",
+    llm_api_key: Annotated[
+        Optional[str], Option(help="API key for LLM", envvar="URSA_LLM_API_KEY")
+    ] = None,
+    max_completion_tokens: Annotated[
+        int, Option(help="Maximum tokens for LLM to output")
+    ] = 50000,
+    emb_model_name: Annotated[
+        str, Option(help="Embedding model name", envvar="URSA_EMB_NAME")
+    ] = "text-embedding-3-small",
+    emb_base_url: Annotated[
+        str,
+        Option(help="Base url for embedding model", envvar="URSA_EMB_BASE_URL"),
+    ] = "https://api.openai.com/v1",
+    emb_api_key: Annotated[
+        Optional[str],
+        Option(help="API key for embedding model", envvar="URSA_EMB_API_KEY"),
+    ] = None,
+    share_key: Annotated[
+        bool,
+        Option(
+            help=(
+                "Whether or not the LLM and embedding model share the same "
+                "API key. If yes, then you can specify only one of them."
+            )
+        ),
+    ] = False,
+    arxiv_summarize: Annotated[
+        bool,
+        Option(
+            help="Whether or not to allow ArxivAgent to summarize response."
+        ),
+    ] = True,
+    arxiv_process_images: Annotated[
+        bool,
+        Option(help="Whether or not to allow ArxivAgent to process images."),
+    ] = False,
+    arxiv_max_results: Annotated[
+        int,
+        Option(
+            help="Maximum number of results for ArxivAgent to retrieve from ArXiv."
+        ),
+    ] = 10,
+    arxiv_database_path: Annotated[
+        Optional[Path],
+        Option(
+            help="Path to download/downloaded ArXiv documents; used by ArxivAgent."
+        ),
+    ] = None,
+    arxiv_summaries_path: Annotated[
+        Optional[Path],
+        Option(help="Path to store ArXiv paper summaries; used by ArxivAgent."),
+    ] = None,
+    arxiv_vectorstore_path: Annotated[
+        Optional[Path],
+        Option(
+            help="Path to store ArXiv paper vector store; used by ArxivAgent."
+        ),
+    ] = None,
+    arxiv_download_papers: Annotated[
+        bool,
+        Option(
+            help="Whether or not to allow ArxivAgent to download ArXiv papers."
+        ),
+    ] = True,
+    ssl_verify: Annotated[
+        bool, Option(help="Whether or not to verify SSL certificates.")
+    ] = True,
+) -> None:
+    console = Console()
+    with console.status("[grey50]Loading ursa ..."):
+        from ursa.cli.hitl import HITL, UrsaRepl
+    hitl = HITL(
+        workspace=workspace,
+        llm_model_name=llm_model_name,
+        llm_base_url=llm_base_url,
+        llm_api_key=llm_api_key,
+        max_completion_tokens=max_completion_tokens,
+        emb_model_name=emb_model_name,
+        emb_base_url=emb_base_url,
+        emb_api_key=emb_api_key,
+        share_key=share_key,
+        arxiv_summarize=arxiv_summarize,
+        arxiv_process_images=arxiv_process_images,
+        arxiv_max_results=arxiv_max_results,
+        arxiv_database_path=arxiv_database_path,
+        arxiv_summaries_path=arxiv_summaries_path,
+        arxiv_vectorstore_path=arxiv_vectorstore_path,
+        arxiv_download_papers=arxiv_download_papers,
+        ssl_verify=ssl_verify,
+    )
+    UrsaRepl(hitl).run()
+@app.command()
+def version() -> None:
+    from importlib.metadata import version as get_version
+    print(get_version("ursa-ai"))
+def main():
+    app()

ursa-ai 0.4.2__py3-none-any.whl → 0.6.0rc1__py3-none-any.whl

Potentially problematic release.

ursa-ai 0.4.2py3-none-any.whl → 0.6.0rc1py3-none-any.whl