PyPI - ursa-ai - Versions diffs - 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

ursa-ai 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ursa-ai might be problematic. Click here for more details.

Files changed (35) hide show

ursa/__init__.py +0 -0
ursa/agents/arxiv_agent.py +77 -47
ursa/agents/base.py +369 -2
ursa/agents/code_review_agent.py +3 -1
ursa/agents/execution_agent.py +92 -48
ursa/agents/hypothesizer_agent.py +39 -42
ursa/agents/lammps_agent.py +51 -29
ursa/agents/mp_agent.py +45 -20
ursa/agents/optimization_agent.py +405 -0
ursa/agents/planning_agent.py +63 -28
ursa/agents/rag_agent.py +75 -44
ursa/agents/recall_agent.py +35 -5
ursa/agents/websearch_agent.py +44 -54
ursa/cli/__init__.py +127 -0
ursa/cli/hitl.py +426 -0
ursa/observability/pricing.py +319 -0
ursa/observability/timing.py +1441 -0
ursa/prompt_library/__init__.py +0 -0
ursa/prompt_library/execution_prompts.py +7 -0
ursa/prompt_library/optimization_prompts.py +131 -0
ursa/tools/__init__.py +0 -0
ursa/tools/feasibility_checker.py +114 -0
ursa/tools/feasibility_tools.py +1075 -0
ursa/tools/write_code.py +1 -1
ursa/util/__init__.py +0 -0
ursa/util/helperFunctions.py +142 -0
ursa/util/optimization_schema.py +78 -0
ursa/util/parse.py +1 -1
{ursa_ai-0.5.0.dist-info → ursa_ai-0.6.0.dist-info}/METADATA +123 -4
ursa_ai-0.6.0.dist-info/RECORD +43 -0
ursa_ai-0.6.0.dist-info/entry_points.txt +2 -0
ursa_ai-0.5.0.dist-info/RECORD +0 -28
{ursa_ai-0.5.0.dist-info → ursa_ai-0.6.0.dist-info}/WHEEL +0 -0
{ursa_ai-0.5.0.dist-info → ursa_ai-0.6.0.dist-info}/licenses/LICENSE +0 -0
{ursa_ai-0.5.0.dist-info → ursa_ai-0.6.0.dist-info}/top_level.txt +0 -0

ursa/agents/rag_agent.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
 import re
 import statistics
+from functools import cached_property
 from threading import Lock
-from typing import List, Optional, TypedDict
+from typing import Any, Mapping, TypedDict
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_chroma import Chroma
@@ -11,15 +12,23 @@ from langchain_core.embeddings import Embeddings
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langgraph.graph import StateGraph
+from tqdm import tqdm
 from ursa.agents.base import BaseAgent
+class RAGMetadata(TypedDict):
+    k: int
+    num_results: int
+    relevance_scores: list[float]
 class RAGState(TypedDict, total=False):
     context: str
-    doc_texts: List[str]
-    doc_ids: List[str]
+    doc_texts: list[str]
+    doc_ids: list[str]
     summary: str
+    rag_metadata: RAGMetadata
 def remove_surrogates(text: str) -> str:
@@ -29,8 +38,8 @@ def remove_surrogates(text: str) -> str:
 class RAGAgent(BaseAgent):
     def __init__(
         self,
+        embedding: Embeddings,
         llm="openai/o3-mini",
-        embedding: Optional[Embeddings] = None,
         return_k: int = 10,
         chunk_size: int = 1000,
         chunk_overlap: int = 200,
@@ -49,11 +58,18 @@ class RAGAgent(BaseAgent):
         self.database_path = database_path
         self.summaries_path = summaries_path
         self.vectorstore_path = vectorstore_path
-        self.graph = self._build_graph()
         os.makedirs(self.vectorstore_path, exist_ok=True)
         self.vectorstore = self._open_global_vectorstore()
+    @cached_property
+    def graph(self):
+        return self._build_graph()
+    @property
+    def _action(self):
+        return self.graph
     @property
     def manifest_path(self) -> str:
         return os.path.join(self.vectorstore_path, "_ingested_ids.txt")
@@ -66,6 +82,7 @@ class RAGAgent(BaseAgent):
         return Chroma(
             persist_directory=self.vectorstore_path,
             embedding_function=self.embedding,
+            collection_metadata={"hnsw:space": "cosine"},
         )
     def _paper_exists_in_vectorstore(self, doc_id: str) -> bool:
@@ -101,7 +118,7 @@ class RAGAgent(BaseAgent):
             search_kwargs={"k": k}
         )
-    def _read_docs(self, state: RAGState) -> RAGState:
+    def _read_docs_node(self, state: RAGState) -> RAGState:
         print("[RAG Agent] Reading Documents....")
         papers = []
         new_state = state.copy()
@@ -121,7 +138,7 @@ class RAGAgent(BaseAgent):
             if not self._paper_exists_in_vectorstore(id)
         ]
-        for pdf_filename in pdf_files:
+        for pdf_filename in tqdm(pdf_files, desc="RAG parsing text"):
             full_text = ""
             try:
@@ -141,13 +158,23 @@ class RAGAgent(BaseAgent):
         return new_state
-    def _ingest_docs(self, state: RAGState) -> RAGState:
+    def _ingest_docs_node(self, state: RAGState) -> RAGState:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
         )
+        if "doc_texts" not in state:
+            raise RuntimeError("Unexpected error: doc_ids not in state!")
+        if "doc_ids" not in state:
+            raise RuntimeError("Unexpected error: doc_texts not in state!")
         batch_docs, batch_ids = [], []
-        for paper, id in zip(state["doc_texts"], state["doc_ids"]):
+        for paper, id in tqdm(
+            zip(state["doc_texts"], state["doc_ids"]),
+            total=len(state["doc_texts"]),
+            desc="RAG Ingesting",
+        ):
             cleaned_text = remove_surrogates(paper)
             docs = splitter.create_documents(
                 [cleaned_text], metadatas=[{"id": id}]
@@ -160,12 +187,12 @@ class RAGAgent(BaseAgent):
             print("[RAG Agent] Ingesting Documents Into RAG Database....")
             with self._vs_lock:
                 self.vectorstore.add_documents(batch_docs, ids=batch_ids)
-                for id in ids:
+                for id in batch_ids:
                     self._mark_paper_ingested(id)
         return state
-    def _summarize_node(self, state: RAGState) -> RAGState:
+    def _retrieve_and_summarize_node(self, state: RAGState) -> RAGState:
         print(
             "[RAG Agent] Retrieving Contextually Relevant Information From Database..."
         )
@@ -181,9 +208,14 @@ class RAGAgent(BaseAgent):
         # 2) One retrieval over the global DB with the task context
         try:
-            results = self.vectorstore.similarity_search_with_score(
+            if "context" not in state:
+                raise RuntimeError("Unexpected error: context not in state!")
+            results = self.vectorstore.similarity_search_with_relevance_scores(
                 state["context"], k=self.return_k
             )
+            relevance_scores = [score for _, score in results]
         except Exception as e:
             print(f"RAG failed due to: {e}")
             return {**state, "summary": ""}
@@ -195,13 +227,6 @@ class RAGAgent(BaseAgent):
                 source_ids_list.append(aid)
         source_ids = ", ".join(source_ids_list)
-        # Compute a simple similarity-based quality score
-        relevancy_scores = []
-        if results:
-            distances = [score for _, score in results]
-            sims = [1.0 / (1.0 + d) for d in distances]  # map distance -> [0,1)
-            relevancy_scores = sims
         retrieved_content = (
             "\n\n".join(doc.page_content for doc, _ in results)
             if results
@@ -223,11 +248,11 @@ class RAGAgent(BaseAgent):
             f.write(rag_summary)
         # Diagnostics
-        if relevancy_scores:
-            print(f"\nMax Relevancy Score: {max(relevancy_scores):.4f}")
-            print(f"Min Relevancy Score: {min(relevancy_scores):.4f}")
+        if relevance_scores:
+            print(f"\nMax Relevance Score: {max(relevance_scores):.4f}")
+            print(f"Min Relevance Score: {min(relevance_scores):.4f}")
             print(
-                f"Median Relevancy Score: {statistics.median(relevancy_scores):.4f}\n"
+                f"Median Relevance Score: {statistics.median(relevance_scores):.4f}\n"
             )
         else:
             print("\nNo RAG results retrieved (score list empty).\n")
@@ -239,34 +264,40 @@ class RAGAgent(BaseAgent):
             "rag_metadata": {
                 "k": self.return_k,
                 "num_results": len(results),
-                "relevancy_scores": relevancy_scores,
+                "relevance_scores": relevance_scores,
             },
         }
-    def _build_graph(self):
-        builder = StateGraph(RAGState)
-        builder.add_node("Read Documents", self._read_docs)
-        builder.add_node("Ingest Documents", self._ingest_docs)
-        builder.add_node("Retrieve and Summarize", self._summarize_node)
-        builder.add_edge("Read Documents", "Ingest Documents")
-        builder.add_edge("Ingest Documents", "Retrieve and Summarize")
+    def _invoke(
+        self, inputs: Mapping[str, Any], recursion_limit: int = 100000, **_
+    ):
+        config = self.build_config(
+            recursion_limit=recursion_limit, tags=["graph"]
+        )
+        return self._action.invoke(inputs, config)
-        builder.set_entry_point("Read Documents")
-        builder.set_finish_point("Retrieve and Summarize")
+    def _build_graph(self):
+        graph = StateGraph(RAGState)
-        graph = builder.compile()
-        return graph
+        self.add_node(graph, self._read_docs_node)
+        self.add_node(graph, self._ingest_docs_node)
+        self.add_node(graph, self._retrieve_and_summarize_node)
-    def run(self, context: str) -> str:
-        result = self.graph.invoke({"context": context})
+        graph.add_edge("_read_docs_node", "_ingest_docs_node")
+        graph.add_edge("_ingest_docs_node", "_retrieve_and_summarize_node")
-        return result.get("summary", "No summary generated.")
+        graph.set_entry_point("_read_docs_node")
+        graph.set_finish_point("_retrieve_and_summarize_node")
+        return graph.compile(checkpointer=self.checkpointer)
-if __name__ == "__main__":
-    agent = RAGAgent(database_path="workspace/arxiv_papers_neutron_star")
-    result = agent.run(
-        context="What are the constraints on the neutron star radius and what uncertainties are there on the constraints?",
-    )
-    print(result)
+# NOTE: Run test in `tests/agents/test_rag_agent/test_rag_agent.py` via:
+#
+# pytest -s tests/agents/test_rag_agent
+#
+# OR
+#
+# uv run pytest -s tests/agents/test_rag_agent
+#
+# NOTE: You may need to `rm -rf workspace/rag-agent` to remove the vectorstore.

ursa/agents/recall_agent.py CHANGED Viewed

@@ -1,23 +1,53 @@
+from typing import Any, Mapping, TypedDict
+from langgraph.graph import StateGraph
 from .base import BaseAgent
+class RecallState(TypedDict):
+    query: str
+    memory: str
 class RecallAgent(BaseAgent):
     def __init__(self, llm, memory, **kwargs):
         super().__init__(llm, **kwargs)
         self.memorydb = memory
+        self._action = self._build_graph()
-    def remember(self, query):
-        memories = self.memorydb.retrieve(query)
+    def _remember(self, state: RecallState) -> str:
+        memories = self.memorydb.retrieve(state["query"])
         summarize_query = f"""
         You are being given the critical task of generating a detailed description of logged information
         to an important official to make a decision. Summarize the following memories that are related to
         the statement. Ensure that any specific details that are important are retained in the summary.
-        Query: {query}
+        Query: {state["query"]}
         """
         for memory in memories:
             summarize_query += f"Memory: {memory} \n\n"
-        memory = self.llm.invoke(summarize_query).content
-        return memory
+        state["memory"] = self.llm.invoke(summarize_query).content
+        return state
+    def _build_graph(self):
+        graph = StateGraph(RecallState)
+        self.add_node(graph, self._remember)
+        graph.set_entry_point("_remember")
+        graph.set_finish_point("_remember")
+        return graph.compile(checkpointer=self.checkpointer)
+    def _invoke(
+        self, inputs: Mapping[str, Any], recursion_limit: int = 100000, **_
+    ):
+        config = self.build_config(
+            recursion_limit=recursion_limit, tags=["graph"]
+        )
+        if "query" not in inputs:
+            raise ("'query' is a required argument")
+        output = self._action.invoke(inputs, config)
+        return output["memory"]

ursa/agents/websearch_agent.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # from langchain_community.tools    import TavilySearchResults
 # from langchain_core.runnables.graph import MermaidDrawMethod
-from typing import Annotated, Any, List, Optional
+from typing import Annotated, Any, List, Mapping, Optional
 import requests
 from bs4 import BeautifulSoup
@@ -8,7 +8,7 @@ from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
 from langchain_openai import ChatOpenAI
-from langgraph.graph import END, START, StateGraph
+from langgraph.graph import StateGraph
 from langgraph.graph.message import add_messages
 from langgraph.prebuilt import InjectedState, create_react_agent
 from pydantic import Field
@@ -57,9 +57,9 @@ class WebSearchAgent(BaseAgent):
         self.has_internet = self._check_for_internet(
             kwargs.get("url", "http://www.lanl.gov")
         )
-        self._initialize_agent()
+        self._build_graph()
-    def review_node(self, state: WebSearchState) -> WebSearchState:
+    def _review_node(self, state: WebSearchState) -> WebSearchState:
         if not self.has_internet:
             return {
                 "messages": [
@@ -78,7 +78,7 @@ class WebSearchAgent(BaseAgent):
         )
         return {"messages": [HumanMessage(content=res.content)]}
-    def response_node(self, state: WebSearchState) -> WebSearchState:
+    def _response_node(self, state: WebSearchState) -> WebSearchState:
         if not self.has_internet:
             return {
                 "messages": [
@@ -111,60 +111,50 @@ class WebSearchAgent(BaseAgent):
         except (requests.ConnectionError, requests.Timeout):
             return False
-    def state_store_node(self, state: WebSearchState) -> WebSearchState:
+    def _state_store_node(self, state: WebSearchState) -> WebSearchState:
         state["thread_id"] = self.thread_id
         return state
         # return dict(**state, thread_id=self.thread_id)
-    def _initialize_agent(self):
-        self.graph = StateGraph(WebSearchState)
-        self.graph.add_node("state_store", self.state_store_node)
-        self.graph.add_node(
-            "websearch",
-            create_react_agent(
-                self.llm,
-                self.tools,
-                state_schema=WebSearchState,
-                prompt=self.websearch_prompt,
-            ),
+    def _create_react(self, state: WebSearchState) -> WebSearchState:
+        react_agent = create_react_agent(
+            self.llm,
+            self.tools,
+            state_schema=WebSearchState,
+            prompt=self.websearch_prompt,
         )
-        self.graph.add_node("review", self.review_node)
-        self.graph.add_node("response", self.response_node)
-        self.graph.add_edge(START, "state_store")
-        self.graph.add_edge("state_store", "websearch")
-        self.graph.add_edge("websearch", "review")
-        self.graph.add_edge("response", END)
-        self.graph.add_conditional_edges(
-            "review",
+        return react_agent.invoke(state)
+    def _build_graph(self):
+        graph = StateGraph(WebSearchState)
+        self.add_node(graph, self._state_store_node)
+        self.add_node(graph, self._create_react)
+        self.add_node(graph, self._review_node)
+        self.add_node(graph, self._response_node)
+        graph.set_entry_point("_state_store_node")
+        graph.add_edge("_state_store_node", "_create_react")
+        graph.add_edge("_create_react", "_review_node")
+        graph.set_finish_point("_response_node")
+        graph.add_conditional_edges(
+            "_review_node",
             should_continue,
-            {"websearch": "websearch", "response": "response"},
-        )
-        self.action = self.graph.compile(checkpointer=self.checkpointer)
-        # self.action.get_graph().draw_mermaid_png(output_file_path="./websearch_agent_graph.png", draw_method=MermaidDrawMethod.PYPPETEER)
-    def run(self, prompt, recursion_limit=100):
-        if not self.has_internet:
-            return {
-                "messages": [
-                    HumanMessage(
-                        content="No internet for WebSearch Agent. No research carried out."
-                    )
-                ]
-            }
-        inputs = {
-            "messages": [HumanMessage(content=prompt)],
-            "model": self.llm,
-        }
-        return self.action.invoke(
-            inputs,
             {
-                "recursion_limit": recursion_limit,
-                "configurable": {"thread_id": self.thread_id},
+                "_create_react": "_create_react",
+                "_response_node": "_response_node",
             },
         )
+        self._action = graph.compile(checkpointer=self.checkpointer)
+        # self._action.get_graph().draw_mermaid_png(output_file_path="./websearch_agent_graph.png", draw_method=MermaidDrawMethod.PYPPETEER)
+    def _invoke(
+        self, inputs: Mapping[str, Any], recursion_limit: int = 1000, **_
+    ):
+        config = self.build_config(
+            recursion_limit=recursion_limit, tags=["graph"]
+        )
+        return self._action.invoke(inputs, config)
 def process_content(
@@ -204,10 +194,10 @@ search_tool = DuckDuckGoSearchResults(output_format="json", num_results=10)
 def should_continue(state: WebSearchState):
     if len(state["messages"]) > (state.get("max_websearch_steps", 100) + 3):
-        return "response"
+        return "_response_node"
     if "[APPROVED]" in state["messages"][-1].content:
-        return "response"
-    return "websearch"
+        return "_response_node"
+    return "_create_react"
 def main():
@@ -220,7 +210,7 @@ def main():
         "messages": [HumanMessage(content=problem_string)],
         "model": model,
     }
-    result = websearcher.action.invoke(
+    result = websearcher.invoke(
         inputs,
         {
             "recursion_limit": 10000,

ursa/cli/__init__.py ADDED Viewed

@@ -0,0 +1,127 @@
+from pathlib import Path
+from typing import Annotated, Optional
+from rich.console import Console
+from typer import Option, Typer
+app = Typer()
+# TODO: add help
+@app.command()
+def run(
+    workspace: Annotated[
+        Path, Option(help="Directory to store ursa ouput")
+    ] = Path(".ursa"),
+    llm_model_name: Annotated[
+        str,
+        Option(
+            help="Name of LLM to use for agent tasks", envvar="URSA_LLM_NAME"
+        ),
+    ] = "gpt-5",
+    llm_base_url: Annotated[
+        str, Option(help="Base url for LLM.", envvar="URSA_LLM_BASE_URL")
+    ] = "https://api.openai.com/v1",
+    llm_api_key: Annotated[
+        Optional[str], Option(help="API key for LLM", envvar="URSA_LLM_API_KEY")
+    ] = None,
+    max_completion_tokens: Annotated[
+        int, Option(help="Maximum tokens for LLM to output")
+    ] = 50000,
+    emb_model_name: Annotated[
+        str, Option(help="Embedding model name", envvar="URSA_EMB_NAME")
+    ] = "text-embedding-3-small",
+    emb_base_url: Annotated[
+        str,
+        Option(help="Base url for embedding model", envvar="URSA_EMB_BASE_URL"),
+    ] = "https://api.openai.com/v1",
+    emb_api_key: Annotated[
+        Optional[str],
+        Option(help="API key for embedding model", envvar="URSA_EMB_API_KEY"),
+    ] = None,
+    share_key: Annotated[
+        bool,
+        Option(
+            help=(
+                "Whether or not the LLM and embedding model share the same "
+                "API key. If yes, then you can specify only one of them."
+            )
+        ),
+    ] = False,
+    arxiv_summarize: Annotated[
+        bool,
+        Option(
+            help="Whether or not to allow ArxivAgent to summarize response."
+        ),
+    ] = True,
+    arxiv_process_images: Annotated[
+        bool,
+        Option(help="Whether or not to allow ArxivAgent to process images."),
+    ] = False,
+    arxiv_max_results: Annotated[
+        int,
+        Option(
+            help="Maximum number of results for ArxivAgent to retrieve from ArXiv."
+        ),
+    ] = 10,
+    arxiv_database_path: Annotated[
+        Optional[Path],
+        Option(
+            help="Path to download/downloaded ArXiv documents; used by ArxivAgent."
+        ),
+    ] = None,
+    arxiv_summaries_path: Annotated[
+        Optional[Path],
+        Option(help="Path to store ArXiv paper summaries; used by ArxivAgent."),
+    ] = None,
+    arxiv_vectorstore_path: Annotated[
+        Optional[Path],
+        Option(
+            help="Path to store ArXiv paper vector store; used by ArxivAgent."
+        ),
+    ] = None,
+    arxiv_download_papers: Annotated[
+        bool,
+        Option(
+            help="Whether or not to allow ArxivAgent to download ArXiv papers."
+        ),
+    ] = True,
+    ssl_verify: Annotated[
+        bool, Option(help="Whether or not to verify SSL certificates.")
+    ] = True,
+) -> None:
+    console = Console()
+    with console.status("[grey50]Loading ursa ..."):
+        from ursa.cli.hitl import HITL, UrsaRepl
+    hitl = HITL(
+        workspace=workspace,
+        llm_model_name=llm_model_name,
+        llm_base_url=llm_base_url,
+        llm_api_key=llm_api_key,
+        max_completion_tokens=max_completion_tokens,
+        emb_model_name=emb_model_name,
+        emb_base_url=emb_base_url,
+        emb_api_key=emb_api_key,
+        share_key=share_key,
+        arxiv_summarize=arxiv_summarize,
+        arxiv_process_images=arxiv_process_images,
+        arxiv_max_results=arxiv_max_results,
+        arxiv_database_path=arxiv_database_path,
+        arxiv_summaries_path=arxiv_summaries_path,
+        arxiv_vectorstore_path=arxiv_vectorstore_path,
+        arxiv_download_papers=arxiv_download_papers,
+        ssl_verify=ssl_verify,
+    )
+    UrsaRepl(hitl).run()
+@app.command()
+def version() -> None:
+    from importlib.metadata import version as get_version
+    print(get_version("ursa-ai"))
+def main():
+    app()

ursa-ai 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

Potentially problematic release.

ursa-ai 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl