ws-bom-robot-app 0.0.95__tar.gz → 0.0.97__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {ws_bom_robot_app-0.0.95/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.97}/PKG-INFO +1 -1
  2. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/setup.py +1 -1
  3. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/agent_lcel.py +16 -3
  4. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/main.py +1 -0
  5. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/models/api.py +2 -0
  6. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/providers/llm_manager.py +7 -2
  7. ws_bom_robot_app-0.0.97/ws_bom_robot_app/llm/vector_store/integration/api.py +216 -0
  8. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +45 -16
  9. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/manager.py +2 -0
  10. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97/ws_bom_robot_app.egg-info}/PKG-INFO +1 -1
  11. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app.egg-info/SOURCES.txt +1 -0
  12. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/MANIFEST.in +0 -0
  13. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/README.md +0 -0
  14. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/pyproject.toml +0 -0
  15. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/requirements.txt +0 -0
  16. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/setup.cfg +0 -0
  17. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/__init__.py +0 -0
  18. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/auth.py +0 -0
  19. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/config.py +0 -0
  20. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/cron_manager.py +0 -0
  21. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/__init__.py +0 -0
  22. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/agent_context.py +0 -0
  23. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/agent_description.py +0 -0
  24. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  25. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/api.py +0 -0
  26. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  27. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/evaluator.py +0 -0
  28. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
  29. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +0 -0
  30. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  31. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/models/base.py +0 -0
  32. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/models/feedback.py +0 -0
  33. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/models/kb.py +0 -0
  34. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/nebuly_handler.py +0 -0
  35. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
  36. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  37. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  38. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  39. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  40. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  41. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  42. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  43. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/agent.py +0 -0
  44. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
  45. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/cleanup.py +0 -0
  46. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/cms.py +0 -0
  47. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/download.py +0 -0
  48. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/print.py +0 -0
  49. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
  50. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  51. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  52. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
  53. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/db/base.py +0 -0
  54. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/db/chroma.py +0 -0
  55. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/db/faiss.py +0 -0
  56. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
  57. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +0 -0
  58. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  59. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  60. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  61. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  62. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  63. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  64. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  65. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  66. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  67. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  68. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  69. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  70. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/shopify.py +0 -0
  71. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  72. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  73. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/integration/thron.py +0 -0
  74. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  75. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -0
  76. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
  77. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  78. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/main.py +0 -0
  79. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/subprocess_runner.py +0 -0
  80. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/task_manager.py +0 -0
  81. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app/util.py +0 -0
  82. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  83. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app.egg-info/requires.txt +0 -0
  84. {ws_bom_robot_app-0.0.95 → ws_bom_robot_app-0.0.97}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.95
3
+ Version: 0.0.97
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
4
4
 
5
5
  setup(
6
6
  name="ws_bom_robot_app",
7
- version="0.0.95",
7
+ version="0.0.97",
8
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
9
9
  long_description=open("README.md", encoding='utf-8').read(),
10
10
  long_description_content_type="text/markdown",
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any, Optional, Literal
2
2
  from langchain.agents import AgentExecutor, create_tool_calling_agent
3
3
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
4
4
  from langchain_core.runnables import RunnableLambda
@@ -12,8 +12,10 @@ from ws_bom_robot_app.llm.defaut_prompt import default_prompt, tool_prompt
12
12
 
13
13
  class AgentLcel:
14
14
 
15
- def __init__(self, llm: LlmInterface, sys_message: str, sys_context: AgentContext, tools: list, rules: LlmRules = None):
15
+ def __init__(self, llm: LlmInterface, sys_message: str, sys_context: AgentContext, tools: list, ouput_model: str | dict = None, rules: LlmRules = None):
16
16
  self.sys_message = chevron.render(template=sys_message,data=sys_context)
17
+ self.ouput_model = ouput_model
18
+ self.output_parser = None
17
19
  self.__llm = llm
18
20
  self.__tools = tools
19
21
  self.rules = rules
@@ -27,6 +29,10 @@ class AgentLcel:
27
29
  message : LlmMessage = input[self.memory_key][-1]
28
30
  rules_prompt = await get_rules(self.embeddings, self.rules, message.content) if self.rules else ""
29
31
  system = default_prompt + (tool_prompt(render_text_description(self.__tools)) if len(self.__tools)>0 else "") + self.sys_message + rules_prompt
32
+ if isinstance(self.ouput_model, dict):
33
+ output_parser = self.__llm.get_parser(type="json", model=self.ouput_model)
34
+ system += "\n\nFormat instructions:\n{format_instructions}".strip()
35
+
30
36
  prompt = ChatPromptTemplate(
31
37
  messages=[
32
38
  SystemMessage(content=system), #from ("system",system) to avoid improper f-string substitutions
@@ -35,6 +41,13 @@ class AgentLcel:
35
41
  ],
36
42
  template_format=None,
37
43
  )
44
+ if output_parser:
45
+ prompt.partial(
46
+ format_instructions=output_parser.get_format_instructions()
47
+ )
48
+ self.output_parser = output_parser
49
+ else:
50
+ self.output_parser = self.__llm.get_parser(type="text")
38
51
  return prompt
39
52
 
40
53
  def __create_agent(self) -> AgentExecutor:
@@ -45,6 +58,6 @@ class AgentLcel:
45
58
  }
46
59
  | RunnableLambda(self.__create_prompt)
47
60
  | self.__llm_with_tools
48
- | self.__llm.get_parser()
61
+ | self.__llm.get_parser("text", None if not self.output_parser else "json", self.ouput_model)
49
62
  )
50
63
  return AgentExecutor(agent=agent,tools=self.__tools,verbose=False)
@@ -127,6 +127,7 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
127
127
  sys_message=rq.system_message,
128
128
  sys_context=rq.system_context,
129
129
  tools=get_structured_tools(__llm, tools=rq.app_tools, callbacks=[callbacks], queue=queue),
130
+ ouput_model=rq.jsonOutputModel,
130
131
  rules=rq.rules
131
132
  )
132
133
  try:
@@ -150,6 +150,8 @@ class LlmApp(BaseModel):
150
150
  app_tools: Optional[List[LlmAppTool]] = Field([], validation_alias=AliasChoices("appTools","app_tools"))
151
151
  vector_type: Optional[str] = "faiss"
152
152
  vector_db: Optional[str] = Field(None, validation_alias=AliasChoices("vectorDb","vector_db"))
153
+ output_model: Optional[str] = Field(None, validation_alias=AliasChoices("outputModel","output_model"))
154
+ jsonOutputModel: Optional[dict] = Field(None, validation_alias=AliasChoices("jsonOutputModel","json_output_model"))
153
155
  rules: Optional[LlmRules] = None
154
156
  fine_tuned_model: Optional[str] = Field(None, validation_alias=AliasChoices("fineTunedModel","fine_tuned_model"))
155
157
  lang_chain_tracing: Optional[bool] = Field(False, validation_alias=AliasChoices("langChainTracing","lang_chain_tracing"))
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, Literal
2
2
  from urllib.parse import urlparse
3
3
  from langchain_core.embeddings import Embeddings
4
4
  from langchain_core.language_models import BaseChatModel
@@ -34,9 +34,14 @@ class LlmInterface:
34
34
  from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
35
35
  return format_to_openai_tool_messages(intermediate_steps=intermadiate_steps)
36
36
 
37
- def get_parser(self):
37
+ def get_parser(self, type: Literal["text", "json"] = "text", model: Optional[dict] = None):
38
38
  from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
39
+ from langchain_core.output_parsers import JsonOutputParser
40
+ from pydantic import create_model
41
+ if type == "json":
42
+ return JsonOutputParser(pydantic_object=create_model('json_schema', **{k: (type(v), ...) for k, v in model.items()}))
39
43
  return OpenAIToolsAgentOutputParser()
44
+
40
45
  async def _format_multimodal_image_message(self, message: dict) -> dict:
41
46
  return {
42
47
  "type": "image_url",
@@ -0,0 +1,216 @@
1
+ import asyncio, logging, aiohttp
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
+ from langchain_core.documents import Document
4
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
5
+ from typing import List, Union, Optional, Dict, Any, Literal
6
+ from pydantic import BaseModel, Field, AliasChoices, field_validator
7
+ import json
8
+ import os
9
+
10
+
11
+ class AuthConfig(BaseModel):
12
+ """
13
+ Configuration for API authentication.
14
+
15
+ Attributes:
16
+ type: Type of authentication (bearer, basic, api_key, custom, none)
17
+ token: Bearer token or API key value
18
+ username: Username for basic auth
19
+ password: Password for basic auth
20
+ header_name: Custom header name for API key
21
+ prefix: Prefix for the auth value (e.g., 'Bearer', 'Token')
22
+ """
23
+ type: Literal["bearer", "basic", "api_key", "custom", "none"] = Field(default="none")
24
+ token: Optional[str] = Field(default=None)
25
+ username: Optional[str] = Field(default=None)
26
+ password: Optional[str] = Field(default=None)
27
+ header_name: Optional[str] = Field(default=None, validation_alias=AliasChoices("headerName", "header_name"))
28
+ prefix: Optional[str] = Field(default=None)
29
+
30
+
31
+ class ApiParams(BaseModel):
32
+ """
33
+ Generic API Integration Parameters.
34
+
35
+ Attributes:
36
+ url: The base URL of the API endpoint
37
+ method: HTTP method (GET, POST, PUT, DELETE, PATCH)
38
+ headers: Custom headers to include in the request
39
+ params: Query parameters for the request
40
+ body: Request body for POST/PUT/PATCH requests
41
+ auth: Authentication configuration
42
+ response_data_path: JSON path to extract data from response (e.g., 'data.items', 'results')
43
+ max_retries: Maximum number of retry attempts for failed requests
44
+ retry_delay: Base delay in seconds between retries (uses exponential backoff)
45
+ timeout: Request timeout in seconds
46
+ """
47
+ url: str = Field(validation_alias=AliasChoices("url", "endpoint"))
48
+ method: Literal["GET", "POST", "PUT", "DELETE", "PATCH"] = Field(default="GET")
49
+ headers: Optional[Dict[str, str]] = Field(default_factory=dict)
50
+ params: Optional[Dict[str, Any]] = Field(default_factory=dict)
51
+ body: Optional[Union[Dict[str, Any], str]] = Field(default=None)
52
+ auth: Optional[AuthConfig] = Field(default_factory=lambda: AuthConfig())
53
+ response_data_path: Optional[str] = Field(default=None, validation_alias=AliasChoices("responseDataPath", "response_data_path"))
54
+ max_retries: int = Field(default=5, validation_alias=AliasChoices("maxRetries", "max_retries"))
55
+ retry_delay: float = Field(default=1.0, validation_alias=AliasChoices("retryDelay", "retry_delay"))
56
+ timeout: int = Field(default=30)
57
+
58
+ @field_validator('auth', mode='before')
59
+ @classmethod
60
+ def parse_auth(cls, v):
61
+ """Parse auth config from dict if needed"""
62
+ if isinstance(v, dict):
63
+ return AuthConfig(**v)
64
+ return v or AuthConfig()
65
+
66
+
67
+ class Api(IntegrationStrategy):
68
+ """
69
+ Generic API Integration that supports:
70
+ - Multiple HTTP methods (GET, POST, PUT, DELETE, PATCH)
71
+ - Various authentication types (Bearer, Basic, API Key, Custom)
72
+ - Custom headers and parameters
73
+ - Automatic retry with exponential backoff
74
+ - Flexible response data extraction
75
+ """
76
+
77
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str, int, list]]):
78
+ super().__init__(knowledgebase_path, data)
79
+ self.__data = ApiParams.model_validate(self.data)
80
+
81
+ def working_subdirectory(self) -> str:
82
+ return 'api_integration'
83
+
84
+ async def run(self) -> None:
85
+ """Fetch data from the API and save to JSON file"""
86
+ _data = await self.__fetch_data()
87
+ json_file_path = os.path.join(self.working_directory, 'api_data.json')
88
+ with open(json_file_path, 'w', encoding='utf-8') as f:
89
+ json.dump(_data, f, ensure_ascii=False, indent=2)
90
+ logging.info(f"Saved {len(_data) if isinstance(_data, list) else 1} items to {json_file_path}")
91
+
92
+ async def load(self) -> list[Document]:
93
+ """Load data from API and convert to documents"""
94
+ await self.run()
95
+ await asyncio.sleep(1)
96
+ return await Loader(self.working_directory).load()
97
+
98
+ def __prepare_headers(self) -> Dict[str, str]:
99
+ """Prepare request headers with authentication"""
100
+ headers = self.__data.headers.copy() if self.__data.headers else {}
101
+
102
+ # Add Content-Type if not present
103
+ if 'Content-Type' not in headers and self.__data.method in ["POST", "PUT", "PATCH"]:
104
+ headers['Content-Type'] = 'application/json'
105
+
106
+ # Add authentication
107
+ auth = self.__data.auth
108
+ if auth.type == "bearer":
109
+ prefix = auth.prefix or "Bearer"
110
+ headers['Authorization'] = f"{prefix} {auth.token}"
111
+ elif auth.type == "basic":
112
+ import base64
113
+ credentials = f"{auth.username}:{auth.password}"
114
+ encoded = base64.b64encode(credentials.encode()).decode()
115
+ headers['Authorization'] = f"Basic {encoded}"
116
+ elif auth.type == "api_key" and auth.header_name:
117
+ prefix = f"{auth.prefix} " if auth.prefix else ""
118
+ headers[auth.header_name] = f"{prefix}{auth.token}"
119
+
120
+ return headers
121
+
122
+ def __get_nested_value(self, data: Any, path: Optional[str]) -> Any:
123
+ """Extract nested value from data using dot notation path"""
124
+ if not path:
125
+ return data
126
+
127
+ keys = path.split('.')
128
+ current = data
129
+ for key in keys:
130
+ if isinstance(current, dict):
131
+ current = current.get(key)
132
+ elif isinstance(current, list) and key.isdigit():
133
+ current = current[int(key)]
134
+ else:
135
+ return None
136
+
137
+ if current is None:
138
+ return None
139
+
140
+ return current
141
+
142
+ async def __make_request(
143
+ self,
144
+ url: str,
145
+ headers: Dict[str, str],
146
+ params: Optional[Dict[str, Any]] = None
147
+ ) -> Dict[str, Any]:
148
+ """Make HTTP request with retry logic"""
149
+ retry_count = 0
150
+
151
+ while retry_count <= self.__data.max_retries:
152
+ try:
153
+ timeout = aiohttp.ClientTimeout(total=self.__data.timeout)
154
+
155
+ async with aiohttp.ClientSession(timeout=timeout) as session:
156
+ request_kwargs = {
157
+ "headers": headers,
158
+ "params": params or self.__data.params
159
+ }
160
+
161
+ # Add body for POST/PUT/PATCH
162
+ if self.__data.method in ["POST", "PUT", "PATCH"] and self.__data.body:
163
+ if isinstance(self.__data.body, dict):
164
+ request_kwargs["json"] = self.__data.body
165
+ else:
166
+ request_kwargs["data"] = self.__data.body
167
+
168
+ async with session.request(
169
+ self.__data.method,
170
+ url,
171
+ **request_kwargs
172
+ ) as response:
173
+ # Check response status
174
+ if response.status == 429: # Rate limit
175
+ retry_count += 1
176
+ if retry_count > self.__data.max_retries:
177
+ raise Exception("Rate limit exceeded. Maximum retries reached.")
178
+
179
+ wait_time = self.__data.retry_delay * (2 ** retry_count)
180
+ logging.warning(f"Rate limited. Waiting {wait_time}s (Attempt {retry_count}/{self.__data.max_retries})")
181
+ await asyncio.sleep(wait_time)
182
+ continue
183
+
184
+ response.raise_for_status()
185
+
186
+ # Parse response
187
+ try:
188
+ data = await response.json()
189
+ return data
190
+ except aiohttp.ContentTypeError:
191
+ text = await response.text()
192
+ logging.warning(f"Non-JSON response received: {text[:200]}")
193
+ return {"text": text}
194
+
195
+ except aiohttp.ClientError as e:
196
+ retry_count += 1
197
+ if retry_count > self.__data.max_retries:
198
+ raise Exception(f"Request failed after {self.__data.max_retries} retries: {e}")
199
+
200
+ wait_time = self.__data.retry_delay * (2 ** retry_count)
201
+ logging.warning(f"Request error: {e}. Retrying in {wait_time}s...")
202
+ await asyncio.sleep(wait_time)
203
+ continue
204
+
205
+ raise Exception("Maximum retries exceeded")
206
+
207
+ async def __fetch_data(self) -> Any:
208
+ """Fetch data from API"""
209
+ headers = self.__prepare_headers()
210
+ response = await self.__make_request(self.__data.url, headers)
211
+
212
+ # Extract data from response using path if specified
213
+ data = self.__get_nested_value(response, self.__data.response_data_path)
214
+ result = data if data is not None else response
215
+
216
+ return result
@@ -1,10 +1,38 @@
1
1
  import asyncio
2
+ import json
3
+ from pathlib import Path
2
4
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
5
  from unstructured_ingest.processes.connectors.google_drive import GoogleDriveConnectionConfig, GoogleDriveDownloaderConfig, GoogleDriveIndexerConfig, GoogleDriveAccessConfig
6
+ from unstructured_ingest.data_types.file_data import FileData as OriginalFileData, BatchFileData as OriginalBatchFileData
4
7
  from langchain_core.documents import Document
5
8
  from ws_bom_robot_app.llm.vector_store.loader.base import Loader
6
9
  from typing import Union
7
10
  from pydantic import BaseModel, Field, AliasChoices
11
+
12
+ # UTF-8 safe FileData classes
13
+ class FileData(OriginalFileData):
14
+ @classmethod
15
+ def from_file(cls, path: str):
16
+ path = Path(path).resolve()
17
+ if not path.exists() or not path.is_file():
18
+ raise ValueError(f"file path not valid: {path}")
19
+ for encoding in ['utf-8', 'cp1252', 'iso-8859-1', 'latin-1']:
20
+ try:
21
+ with open(str(path), "r", encoding=encoding) as f:
22
+ return cls.model_validate(json.load(f))
23
+ except (UnicodeDecodeError, UnicodeError):
24
+ continue
25
+ raise ValueError(f"Could not decode file {path} with any supported encoding")
26
+
27
+ def to_file(self, path: str) -> None:
28
+ path = Path(path).resolve()
29
+ path.parent.mkdir(parents=True, exist_ok=True)
30
+ with open(str(path), "w", encoding="utf-8") as f:
31
+ json.dump(self.model_dump(), f, indent=2, ensure_ascii=False)
32
+
33
+ class BatchFileData(OriginalBatchFileData, FileData):
34
+ pass
35
+
8
36
  class GoogleDriveParams(BaseModel):
9
37
  """
10
38
  GoogleDriveParams is a model that holds parameters for Google Drive integration.
@@ -42,26 +70,27 @@ class GoogleDrive(IntegrationStrategy):
42
70
  super().__init__(knowledgebase_path, data)
43
71
  self.__data = GoogleDriveParams.model_validate(self.data)
44
72
  self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
73
+ self._apply_encoding_fix()
74
+
75
+ def _apply_encoding_fix(self):
76
+ """Replace FileData classes with UTF-8 safe versions"""
77
+ import unstructured_ingest.data_types.file_data as fd
78
+ fd.FileData = FileData
79
+ fd.BatchFileData = BatchFileData
80
+ fd.file_data_from_file = lambda path: BatchFileData.from_file(path) if path else FileData.from_file(path)
81
+
45
82
  def working_subdirectory(self) -> str:
46
83
  return 'googledrive'
84
+
47
85
  def run(self) -> None:
48
- indexer_config = GoogleDriveIndexerConfig(
49
- extensions=self.__data.extensions,
50
- recursive=self.__data.recursive
51
- )
52
- downloader_config = GoogleDriveDownloaderConfig(
53
- download_dir=self.working_directory
54
- )
55
- connection_config = GoogleDriveConnectionConfig(
56
- access_config=GoogleDriveAccessConfig(
57
- service_account_key=self.__data.service_account_key
58
- ),
59
- drive_id=self.__data.drive_id
60
- )
61
86
  self.__unstructured_ingest.pipeline(
62
- indexer_config,
63
- downloader_config,
64
- connection_config).run()
87
+ GoogleDriveIndexerConfig(extensions=self.__data.extensions, recursive=self.__data.recursive),
88
+ GoogleDriveDownloaderConfig(download_dir=self.working_directory),
89
+ GoogleDriveConnectionConfig(
90
+ access_config=GoogleDriveAccessConfig(service_account_key=self.__data.service_account_key),
91
+ drive_id=self.__data.drive_id
92
+ )
93
+ ).run()
65
94
  async def load(self) -> list[Document]:
66
95
  await asyncio.to_thread(self.run)
67
96
  await asyncio.sleep(1)
@@ -14,6 +14,7 @@ from ws_bom_robot_app.llm.vector_store.integration.sitemap import Sitemap
14
14
  from ws_bom_robot_app.llm.vector_store.integration.slack import Slack
15
15
  from ws_bom_robot_app.llm.vector_store.integration.thron import Thron
16
16
  from ws_bom_robot_app.llm.vector_store.integration.shopify import Shopify
17
+ from ws_bom_robot_app.llm.vector_store.integration.api import Api
17
18
  class IntegrationManager:
18
19
  _list: dict[str, Type[IntegrationStrategy]] = {
19
20
  "llmkbazure": Azure,
@@ -30,6 +31,7 @@ class IntegrationManager:
30
31
  "llmkbslack": Slack,
31
32
  "llmkbthron": Thron,
32
33
  "llmkbshopify": Shopify,
34
+ "llmkbapi": Api,
33
35
  }
34
36
  @classmethod
35
37
  def get_strategy(cls, name: str, knowledgebase_path: str, data: dict[str, str]) -> IntegrationStrategy:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.95
3
+ Version: 0.0.97
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -59,6 +59,7 @@ ws_bom_robot_app/llm/vector_store/db/faiss.py
59
59
  ws_bom_robot_app/llm/vector_store/db/manager.py
60
60
  ws_bom_robot_app/llm/vector_store/db/qdrant.py
61
61
  ws_bom_robot_app/llm/vector_store/integration/__init__.py
62
+ ws_bom_robot_app/llm/vector_store/integration/api.py
62
63
  ws_bom_robot_app/llm/vector_store/integration/azure.py
63
64
  ws_bom_robot_app/llm/vector_store/integration/base.py
64
65
  ws_bom_robot_app/llm/vector_store/integration/confluence.py