PyPI - ws-bom-robot-app - Versions diffs - 0.0.95__tar.gz → 0.0.97__tar.gz - Mend

ws-bom-robot-app 0.0.95tar.gz → 0.0.97tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

{ws_bom_robot_app-0.0.95/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.97}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ws_bom_robot_app
-Version: 0.0.95
+Version: 0.0.97
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
 setup(
     name="ws_bom_robot_app",
-    version="0.0.95",
+    version="0.0.97",
     description="A FastAPI application serving ws bom/robot/llm platform ai.",
     long_description=open("README.md", encoding='utf-8').read(),
     long_description_content_type="text/markdown",

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/agent_lcel.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Optional
+from typing import Any, Optional, Literal
 from langchain.agents import AgentExecutor, create_tool_calling_agent
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables import RunnableLambda
@@ -12,8 +12,10 @@ from ws_bom_robot_app.llm.defaut_prompt import default_prompt, tool_prompt
 class AgentLcel:
-    def __init__(self, llm: LlmInterface, sys_message: str, sys_context: AgentContext, tools: list, rules: LlmRules = None):
+    def __init__(self, llm: LlmInterface, sys_message: str, sys_context: AgentContext, tools: list, ouput_model: str | dict = None,  rules: LlmRules = None):
         self.sys_message = chevron.render(template=sys_message,data=sys_context)
+        self.ouput_model = ouput_model
+        self.output_parser =  None
         self.__llm = llm
         self.__tools = tools
         self.rules = rules
@@ -27,6 +29,10 @@ class AgentLcel:
         message : LlmMessage = input[self.memory_key][-1]
         rules_prompt = await get_rules(self.embeddings, self.rules, message.content) if self.rules else ""
         system = default_prompt + (tool_prompt(render_text_description(self.__tools)) if len(self.__tools)>0 else "") + self.sys_message + rules_prompt
+        if isinstance(self.ouput_model, dict):
+          output_parser = self.__llm.get_parser(type="json", model=self.ouput_model)
+          system += "\n\nFormat instructions:\n{format_instructions}".strip()
         prompt = ChatPromptTemplate(
             messages=[
                 SystemMessage(content=system), #from ("system",system) to avoid improper f-string substitutions
@@ -35,6 +41,13 @@ class AgentLcel:
             ],
             template_format=None,
             )
+        if output_parser:
+          prompt.partial(
+              format_instructions=output_parser.get_format_instructions()
+            )
+          self.output_parser = output_parser
+        else:
+          self.output_parser = self.__llm.get_parser(type="text")
         return prompt
     def __create_agent(self) -> AgentExecutor:
@@ -45,6 +58,6 @@ class AgentLcel:
           }
           | RunnableLambda(self.__create_prompt)
           | self.__llm_with_tools
-          | self.__llm.get_parser()
+          | self.__llm.get_parser("text", None if not self.output_parser else "json", self.ouput_model)
       )
       return AgentExecutor(agent=agent,tools=self.__tools,verbose=False)

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/main.py RENAMED Viewed

@@ -127,6 +127,7 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
       sys_message=rq.system_message,
       sys_context=rq.system_context,
       tools=get_structured_tools(__llm, tools=rq.app_tools, callbacks=[callbacks], queue=queue),
+      ouput_model=rq.jsonOutputModel,
       rules=rq.rules
   )
   try:

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/models/api.py RENAMED Viewed

@@ -150,6 +150,8 @@ class LlmApp(BaseModel):
   app_tools: Optional[List[LlmAppTool]] = Field([], validation_alias=AliasChoices("appTools","app_tools"))
   vector_type: Optional[str] = "faiss"
   vector_db: Optional[str] = Field(None, validation_alias=AliasChoices("vectorDb","vector_db"))
+  output_model: Optional[str] = Field(None, validation_alias=AliasChoices("outputModel","output_model"))
+  jsonOutputModel: Optional[dict] = Field(None, validation_alias=AliasChoices("jsonOutputModel","json_output_model"))
   rules: Optional[LlmRules] = None
   fine_tuned_model: Optional[str] = Field(None, validation_alias=AliasChoices("fineTunedModel","fine_tuned_model"))
   lang_chain_tracing: Optional[bool] = Field(False, validation_alias=AliasChoices("langChainTracing","lang_chain_tracing"))

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/providers/llm_manager.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Literal
 from urllib.parse import urlparse
 from langchain_core.embeddings import Embeddings
 from langchain_core.language_models import BaseChatModel
@@ -34,9 +34,14 @@ class LlmInterface:
         from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
         return format_to_openai_tool_messages(intermediate_steps=intermadiate_steps)
-    def get_parser(self):
+    def get_parser(self, type: Literal["text", "json"] = "text", model: Optional[dict] = None):
         from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
+        from langchain_core.output_parsers import JsonOutputParser
+        from pydantic import create_model
+        if type == "json":
+          return JsonOutputParser(pydantic_object=create_model('json_schema', **{k: (type(v), ...) for k, v in model.items()}))
         return OpenAIToolsAgentOutputParser()
     async def _format_multimodal_image_message(self, message: dict) -> dict:
         return {
             "type": "image_url",

ws_bom_robot_app-0.0.97/ws_bom_robot_app/llm/vector_store/integration/api.py ADDED Viewed

@@ -0,0 +1,216 @@
+import asyncio, logging, aiohttp
+from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
+from langchain_core.documents import Document
+from ws_bom_robot_app.llm.vector_store.loader.base import Loader
+from typing import List, Union, Optional, Dict, Any, Literal
+from pydantic import BaseModel, Field, AliasChoices, field_validator
+import json
+import os
+class AuthConfig(BaseModel):
+  """
+  Configuration for API authentication.
+  Attributes:
+    type: Type of authentication (bearer, basic, api_key, custom, none)
+    token: Bearer token or API key value
+    username: Username for basic auth
+    password: Password for basic auth
+    header_name: Custom header name for API key
+    prefix: Prefix for the auth value (e.g., 'Bearer', 'Token')
+  """
+  type: Literal["bearer", "basic", "api_key", "custom", "none"] = Field(default="none")
+  token: Optional[str] = Field(default=None)
+  username: Optional[str] = Field(default=None)
+  password: Optional[str] = Field(default=None)
+  header_name: Optional[str] = Field(default=None, validation_alias=AliasChoices("headerName", "header_name"))
+  prefix: Optional[str] = Field(default=None)
+class ApiParams(BaseModel):
+  """
+  Generic API Integration Parameters.
+  Attributes:
+    url: The base URL of the API endpoint
+    method: HTTP method (GET, POST, PUT, DELETE, PATCH)
+    headers: Custom headers to include in the request
+    params: Query parameters for the request
+    body: Request body for POST/PUT/PATCH requests
+    auth: Authentication configuration
+    response_data_path: JSON path to extract data from response (e.g., 'data.items', 'results')
+    max_retries: Maximum number of retry attempts for failed requests
+    retry_delay: Base delay in seconds between retries (uses exponential backoff)
+    timeout: Request timeout in seconds
+  """
+  url: str = Field(validation_alias=AliasChoices("url", "endpoint"))
+  method: Literal["GET", "POST", "PUT", "DELETE", "PATCH"] = Field(default="GET")
+  headers: Optional[Dict[str, str]] = Field(default_factory=dict)
+  params: Optional[Dict[str, Any]] = Field(default_factory=dict)
+  body: Optional[Union[Dict[str, Any], str]] = Field(default=None)
+  auth: Optional[AuthConfig] = Field(default_factory=lambda: AuthConfig())
+  response_data_path: Optional[str] = Field(default=None, validation_alias=AliasChoices("responseDataPath", "response_data_path"))
+  max_retries: int = Field(default=5, validation_alias=AliasChoices("maxRetries", "max_retries"))
+  retry_delay: float = Field(default=1.0, validation_alias=AliasChoices("retryDelay", "retry_delay"))
+  timeout: int = Field(default=30)
+  @field_validator('auth', mode='before')
+  @classmethod
+  def parse_auth(cls, v):
+    """Parse auth config from dict if needed"""
+    if isinstance(v, dict):
+      return AuthConfig(**v)
+    return v or AuthConfig()
+class Api(IntegrationStrategy):
+  """
+  Generic API Integration that supports:
+  - Multiple HTTP methods (GET, POST, PUT, DELETE, PATCH)
+  - Various authentication types (Bearer, Basic, API Key, Custom)
+  - Custom headers and parameters
+  - Automatic retry with exponential backoff
+  - Flexible response data extraction
+  """
+  def __init__(self, knowledgebase_path: str, data: dict[str, Union[str, int, list]]):
+    super().__init__(knowledgebase_path, data)
+    self.__data = ApiParams.model_validate(self.data)
+  def working_subdirectory(self) -> str:
+    return 'api_integration'
+  async def run(self) -> None:
+    """Fetch data from the API and save to JSON file"""
+    _data = await self.__fetch_data()
+    json_file_path = os.path.join(self.working_directory, 'api_data.json')
+    with open(json_file_path, 'w', encoding='utf-8') as f:
+      json.dump(_data, f, ensure_ascii=False, indent=2)
+    logging.info(f"Saved {len(_data) if isinstance(_data, list) else 1} items to {json_file_path}")
+  async def load(self) -> list[Document]:
+    """Load data from API and convert to documents"""
+    await self.run()
+    await asyncio.sleep(1)
+    return await Loader(self.working_directory).load()
+  def __prepare_headers(self) -> Dict[str, str]:
+    """Prepare request headers with authentication"""
+    headers = self.__data.headers.copy() if self.__data.headers else {}
+    # Add Content-Type if not present
+    if 'Content-Type' not in headers and self.__data.method in ["POST", "PUT", "PATCH"]:
+      headers['Content-Type'] = 'application/json'
+    # Add authentication
+    auth = self.__data.auth
+    if auth.type == "bearer":
+      prefix = auth.prefix or "Bearer"
+      headers['Authorization'] = f"{prefix} {auth.token}"
+    elif auth.type == "basic":
+      import base64
+      credentials = f"{auth.username}:{auth.password}"
+      encoded = base64.b64encode(credentials.encode()).decode()
+      headers['Authorization'] = f"Basic {encoded}"
+    elif auth.type == "api_key" and auth.header_name:
+      prefix = f"{auth.prefix} " if auth.prefix else ""
+      headers[auth.header_name] = f"{prefix}{auth.token}"
+    return headers
+  def __get_nested_value(self, data: Any, path: Optional[str]) -> Any:
+    """Extract nested value from data using dot notation path"""
+    if not path:
+      return data
+    keys = path.split('.')
+    current = data
+    for key in keys:
+      if isinstance(current, dict):
+        current = current.get(key)
+      elif isinstance(current, list) and key.isdigit():
+        current = current[int(key)]
+      else:
+        return None
+      if current is None:
+        return None
+    return current
+  async def __make_request(
+    self,
+    url: str,
+    headers: Dict[str, str],
+    params: Optional[Dict[str, Any]] = None
+  ) -> Dict[str, Any]:
+    """Make HTTP request with retry logic"""
+    retry_count = 0
+    while retry_count <= self.__data.max_retries:
+      try:
+        timeout = aiohttp.ClientTimeout(total=self.__data.timeout)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+          request_kwargs = {
+            "headers": headers,
+            "params": params or self.__data.params
+          }
+          # Add body for POST/PUT/PATCH
+          if self.__data.method in ["POST", "PUT", "PATCH"] and self.__data.body:
+            if isinstance(self.__data.body, dict):
+              request_kwargs["json"] = self.__data.body
+            else:
+              request_kwargs["data"] = self.__data.body
+          async with session.request(
+            self.__data.method,
+            url,
+            **request_kwargs
+          ) as response:
+            # Check response status
+            if response.status == 429:  # Rate limit
+              retry_count += 1
+              if retry_count > self.__data.max_retries:
+                raise Exception("Rate limit exceeded. Maximum retries reached.")
+              wait_time = self.__data.retry_delay * (2 ** retry_count)
+              logging.warning(f"Rate limited. Waiting {wait_time}s (Attempt {retry_count}/{self.__data.max_retries})")
+              await asyncio.sleep(wait_time)
+              continue
+            response.raise_for_status()
+            # Parse response
+            try:
+              data = await response.json()
+              return data
+            except aiohttp.ContentTypeError:
+              text = await response.text()
+              logging.warning(f"Non-JSON response received: {text[:200]}")
+              return {"text": text}
+      except aiohttp.ClientError as e:
+        retry_count += 1
+        if retry_count > self.__data.max_retries:
+          raise Exception(f"Request failed after {self.__data.max_retries} retries: {e}")
+        wait_time = self.__data.retry_delay * (2 ** retry_count)
+        logging.warning(f"Request error: {e}. Retrying in {wait_time}s...")
+        await asyncio.sleep(wait_time)
+        continue
+    raise Exception("Maximum retries exceeded")
+  async def __fetch_data(self) -> Any:
+    """Fetch data from API"""
+    headers = self.__prepare_headers()
+    response = await self.__make_request(self.__data.url, headers)
+    # Extract data from response using path if specified
+    data = self.__get_nested_value(response, self.__data.response_data_path)
+    result = data if data is not None else response
+    return result

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py RENAMED Viewed

@@ -1,10 +1,38 @@
 import asyncio
+import json
+from pathlib import Path
 from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
 from unstructured_ingest.processes.connectors.google_drive import GoogleDriveConnectionConfig, GoogleDriveDownloaderConfig, GoogleDriveIndexerConfig, GoogleDriveAccessConfig
+from unstructured_ingest.data_types.file_data import FileData as OriginalFileData, BatchFileData as OriginalBatchFileData
 from langchain_core.documents import Document
 from ws_bom_robot_app.llm.vector_store.loader.base import Loader
 from typing import Union
 from pydantic import BaseModel, Field, AliasChoices
+# UTF-8 safe FileData classes
+class FileData(OriginalFileData):
+  @classmethod
+  def from_file(cls, path: str):
+    path = Path(path).resolve()
+    if not path.exists() or not path.is_file():
+      raise ValueError(f"file path not valid: {path}")
+    for encoding in ['utf-8', 'cp1252', 'iso-8859-1', 'latin-1']:
+      try:
+        with open(str(path), "r", encoding=encoding) as f:
+          return cls.model_validate(json.load(f))
+      except (UnicodeDecodeError, UnicodeError):
+        continue
+    raise ValueError(f"Could not decode file {path} with any supported encoding")
+  def to_file(self, path: str) -> None:
+    path = Path(path).resolve()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(str(path), "w", encoding="utf-8") as f:
+      json.dump(self.model_dump(), f, indent=2, ensure_ascii=False)
+class BatchFileData(OriginalBatchFileData, FileData):
+  pass
 class GoogleDriveParams(BaseModel):
   """
   GoogleDriveParams is a model that holds parameters for Google Drive integration.
@@ -42,26 +70,27 @@ class GoogleDrive(IntegrationStrategy):
     super().__init__(knowledgebase_path, data)
     self.__data = GoogleDriveParams.model_validate(self.data)
     self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
+    self._apply_encoding_fix()
+  def _apply_encoding_fix(self):
+    """Replace FileData classes with UTF-8 safe versions"""
+    import unstructured_ingest.data_types.file_data as fd
+    fd.FileData = FileData
+    fd.BatchFileData = BatchFileData
+    fd.file_data_from_file = lambda path: BatchFileData.from_file(path) if path else FileData.from_file(path)
   def working_subdirectory(self) -> str:
     return 'googledrive'
   def run(self) -> None:
-    indexer_config = GoogleDriveIndexerConfig(
-      extensions=self.__data.extensions,
-      recursive=self.__data.recursive
-    )
-    downloader_config = GoogleDriveDownloaderConfig(
-      download_dir=self.working_directory
-    )
-    connection_config = GoogleDriveConnectionConfig(
-      access_config=GoogleDriveAccessConfig(
-        service_account_key=self.__data.service_account_key
-        ),
-      drive_id=self.__data.drive_id
-    )
     self.__unstructured_ingest.pipeline(
-      indexer_config,
-      downloader_config,
-      connection_config).run()
+      GoogleDriveIndexerConfig(extensions=self.__data.extensions, recursive=self.__data.recursive),
+      GoogleDriveDownloaderConfig(download_dir=self.working_directory),
+      GoogleDriveConnectionConfig(
+        access_config=GoogleDriveAccessConfig(service_account_key=self.__data.service_account_key),
+        drive_id=self.__data.drive_id
+      )
+    ).run()
   async def load(self) -> list[Document]:
       await asyncio.to_thread(self.run)
       await asyncio.sleep(1)

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/manager.py RENAMED Viewed

@@ -14,6 +14,7 @@ from ws_bom_robot_app.llm.vector_store.integration.sitemap import Sitemap
 from ws_bom_robot_app.llm.vector_store.integration.slack import Slack
 from ws_bom_robot_app.llm.vector_store.integration.thron import Thron
 from ws_bom_robot_app.llm.vector_store.integration.shopify import Shopify
+from ws_bom_robot_app.llm.vector_store.integration.api import Api
 class IntegrationManager:
   _list: dict[str, Type[IntegrationStrategy]] = {
     "llmkbazure": Azure,
@@ -30,6 +31,7 @@ class IntegrationManager:
     "llmkbslack": Slack,
     "llmkbthron": Thron,
     "llmkbshopify": Shopify,
+    "llmkbapi": Api,
   }
   @classmethod
   def get_strategy(cls, name: str, knowledgebase_path: str, data: dict[str, str]) -> IntegrationStrategy:

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97/ws_bom_robot_app.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ws_bom_robot_app
-Version: 0.0.95
+Version: 0.0.97
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa

{ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app.egg-info/SOURCES.txt RENAMED Viewed

@@ -59,6 +59,7 @@ ws_bom_robot_app/llm/vector_store/db/faiss.py
 ws_bom_robot_app/llm/vector_store/db/manager.py
 ws_bom_robot_app/llm/vector_store/db/qdrant.py
 ws_bom_robot_app/llm/vector_store/integration/__init__.py
+ws_bom_robot_app/llm/vector_store/integration/api.py
 ws_bom_robot_app/llm/vector_store/integration/azure.py
 ws_bom_robot_app/llm/vector_store/integration/base.py
 ws_bom_robot_app/llm/vector_store/integration/confluence.py