ws-bom-robot-app 0.0.105__py3-none-any.whl → 0.0.107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,6 +66,7 @@ class LlmAppToolChainSettings(BaseModel):
66
66
 
67
67
  class LlmAppToolDbSettings(BaseModel):
68
68
  connection_string: Optional[str] = Field(None, validation_alias=AliasChoices("connectionString","connection_string"))
69
+ additionalPrompt: Optional[str] = Field(None, validation_alias=AliasChoices("additionalPrompt","additional_prompt"))
69
70
 
70
71
  class LlmAppTool(BaseModel):
71
72
  id: Optional[str] = None
@@ -235,6 +236,9 @@ class RulesRequest(VectorDbRequest):
235
236
  rules: List[str]
236
237
 
237
238
  class KbRequest(VectorDbRequest):
239
+ chucking_method: Optional[str] = Field("recursive", validation_alias=AliasChoices("chunkingMethod","chunking_method"))
240
+ chuck_size: Optional[int] = Field(3_000, validation_alias=AliasChoices("chunkSize","chuckt_size"))
241
+ chunk_overlap: Optional[int] = Field(300, validation_alias=AliasChoices("chunkOverlap","chunk_overlap"))
238
242
  files: Optional[List[str]] = []
239
243
  integrations: Optional[List[LlmKbIntegration]] = []
240
244
  endpoints: Optional[List[LlmKbEndpoint]] = []
@@ -78,6 +78,80 @@ class ToolManager:
78
78
  source=app_tool.function_id,
79
79
  )
80
80
 
81
+ async def __download_sqlite_file(self, db_uri: str) -> str:
82
+ """
83
+ Scarica il file SQLite dalla CMS se necessario e restituisce il percorso locale.
84
+ Usa la stessa logica dell'integrazione Sitemap.
85
+
86
+ Args:
87
+ db_uri: URI del database o nome del file SQLite
88
+
89
+ Returns:
90
+ str: URI del database locale (sqlite:///path/to/file.db)
91
+ """
92
+ import os
93
+ from ws_bom_robot_app.config import config
94
+ from ws_bom_robot_app.llm.utils.download import download_file
95
+
96
+ if not db_uri.endswith('.db') and not db_uri.endswith('.sqlite') and not db_uri.endswith('.sqlite3'):
97
+ return db_uri
98
+
99
+ if db_uri.startswith('sqlite:///'):
100
+ file_path = db_uri.replace('sqlite:///', '')
101
+ if os.path.isabs(file_path) and os.path.exists(file_path):
102
+ return db_uri
103
+ filename = os.path.basename(file_path)
104
+ else:
105
+ filename = db_uri
106
+
107
+ db_folder = os.path.join(config.robot_data_folder, 'db')
108
+ os.makedirs(db_folder, exist_ok=True)
109
+
110
+ local_db_path = os.path.join(db_folder, filename)
111
+
112
+ if os.path.exists(local_db_path):
113
+ return f"sqlite:///{local_db_path}"
114
+
115
+ cms_file_url = f"{config.robot_cms_host}/{config.robot_cms_kb_folder}/{filename}"
116
+ auth = config.robot_cms_auth
117
+
118
+ try:
119
+ result = await download_file(cms_file_url, local_db_path, authorization=auth)
120
+ if result:
121
+ return f"sqlite:///{local_db_path}"
122
+ else:
123
+ raise ValueError(f"File SQLite {filename} non trovato nella CMS")
124
+ except Exception as e:
125
+ raise ValueError(f"Errore durante il download del file SQLite {filename}: {str(e)}")
126
+
127
+ async def __query_database(self, query: str, app_tool: LlmAppTool):
128
+ from langchain_community.agent_toolkits.sql.base import create_sql_agent
129
+ from langchain_community.utilities import SQLDatabase
130
+
131
+ secrets = app_tool.secrets_to_dict()
132
+
133
+ db_uri = app_tool.db_settings.connection_string
134
+ additional_prompt = app_tool.db_settings.additionalPrompt
135
+ if not db_uri:
136
+ raise ValueError("Database URI not found in tool secrets")
137
+
138
+ db_uri = await self.__download_sqlite_file(db_uri)
139
+
140
+ db = SQLDatabase.from_uri(db_uri)
141
+ llm = self.llm.get_llm()
142
+
143
+ agent = create_sql_agent(
144
+ llm=llm,
145
+ db=db,
146
+ agent_type="tool-calling",
147
+ suffix=additional_prompt if additional_prompt else None,
148
+ )
149
+
150
+ result = await agent.ainvoke({"input": query}, config={"callbacks": []})
151
+ if result and "output" in result:
152
+ return result["output"]
153
+ return None
154
+
81
155
  #region functions
82
156
  async def document_retriever(self, query: str) -> list:
83
157
  """
@@ -99,9 +173,11 @@ class ToolManager:
99
173
  """
100
174
  if (
101
175
  self.app_tool.type == "function" and self.app_tool.vector_db
102
- #and self.settings.get("dataSource") == "knowledgebase"
176
+ and self.app_tool.data_source == "knowledgebase"
103
177
  ):
104
178
  return await self.__extract_documents(query, self.app_tool)
179
+ elif self.app_tool.type == "function" and self.app_tool.data_source == "database":
180
+ return await self.__query_database(query, self.app_tool)
105
181
 
106
182
  async def image_generator(self, query: str, language: str = "it"):
107
183
  """
@@ -1,13 +1,17 @@
1
1
  from langchain_core.documents import Document
2
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
+ from langchain_text_splitters import MarkdownHeaderTextSplitter
3
4
 
4
5
  class DocumentChunker:
5
6
  @staticmethod
6
- def chunk(documents: list[Document]) -> list[Document]:
7
- return DocumentChunker.chunk_recursive(documents)
7
+ def chunk(documents: list[Document], chucking_method: str = "recursive", chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
8
+ if chucking_method == "recursive":
9
+ return DocumentChunker.chunk_recursive(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
10
+ elif chucking_method == "markdownHeader":
11
+ return DocumentChunker.chunk_markdown(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
8
12
 
9
13
  @staticmethod
10
- def chunk_recursive(documents: list[Document], chunk_size: int=3_000) -> list[Document]:
14
+ def chunk_recursive(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
11
15
  """
12
16
  Recursively split documents into smaller chunks while preserving metadata.
13
17
 
@@ -17,16 +21,16 @@ class DocumentChunker:
17
21
 
18
22
  Args:
19
23
  documents (list[Document]): A list of Document objects to be chunked.
20
- chunk_size (int, optional): The maximum size of each chunk in characters.
24
+ chunk_size (int, optional): The maximum size of each chunk in characters.
21
25
  Defaults to 3,000.
22
26
 
23
27
  Returns:
24
- list[Document]: A list of Document objects where each document's content is
25
- at most chunk_size characters. Each chunk preserves the metadata from
28
+ list[Document]: A list of Document objects where each document's content is
29
+ at most chunk_size characters. Each chunk preserves the metadata from
26
30
  its original document.
27
31
 
28
32
  Notes:
29
- - Chunk overlap is automatically set to 10% of the chunk_size to maintain
33
+ - Chunk overlap is automatically set to 10% of the chunk_size to maintain
30
34
  context between chunks.
31
35
  - Documents smaller than or equal to chunk_size are returned unchanged.
32
36
  - Metadata from the original document is copied to all resulting chunks.
@@ -80,3 +84,69 @@ class DocumentChunker:
80
84
  )
81
85
  return chunked_documents
82
86
 
87
+ @staticmethod
88
+ def chunk_markdown(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
89
+ """
90
+ Splits markdown documents based on headers and then into smaller chunks.
91
+
92
+ This function takes a list of Document objects containing markdown content and splits
93
+ them based on markdown headers (# H1, ## H2, ### H3, etc.). After splitting by headers,
94
+ it further chunks large sections using RecursiveCharacterTextSplitter to ensure no
95
+ chunk exceeds the specified size.
96
+
97
+ Args:
98
+ documents (list[Document]): A list of Document objects with markdown content.
99
+ chunk_size (int, optional): The maximum size of each chunk in characters.
100
+ Defaults to 3,000.
101
+
102
+ Returns:
103
+ list[Document]: A list of Document objects where each document represents a chunk.
104
+ Metadata includes the markdown header hierarchy and original document metadata.
105
+
106
+ Note:
107
+ - Headers are split at levels: H1 (#), H2 (##), and H3 (###)
108
+ - Header information is preserved in the metadata
109
+ - Large sections are further split to respect chunk_size limit
110
+ - Chunk overlap is set to 10% of chunk_size for context preservation
111
+ """
112
+ # Define headers to split on
113
+ headers_to_split_on = [
114
+ ("#", "h1"),
115
+ ("##", "h2"),
116
+ ("###", "h3"),
117
+ ("####", "h4"),
118
+ ]
119
+
120
+ markdown_splitter = MarkdownHeaderTextSplitter(
121
+ headers_to_split_on=headers_to_split_on,
122
+ strip_headers=False
123
+ )
124
+
125
+ # Secondary splitter for large sections
126
+ text_splitter = RecursiveCharacterTextSplitter(
127
+ chunk_size=chunk_size,
128
+ chunk_overlap=chunk_overlap
129
+ )
130
+
131
+ chunked_documents = []
132
+ for doc in documents:
133
+ # First split by markdown headers
134
+ md_chunks = markdown_splitter.split_text(doc.page_content)
135
+
136
+ # Then split large sections if needed
137
+ for md_chunk in md_chunks:
138
+ # Merge metadata from original doc and header metadata
139
+ merged_metadata = {**doc.metadata, **md_chunk.metadata}
140
+ if len(md_chunk.page_content) <= chunk_size:
141
+ chunked_documents.append(
142
+ Document(page_content=md_chunk.page_content, metadata=merged_metadata)
143
+ )
144
+ else:
145
+ # Further split large sections
146
+ sub_chunks = text_splitter.split_text(md_chunk.page_content)
147
+ for sub_chunk in sub_chunks:
148
+ chunked_documents.append(
149
+ Document(page_content=sub_chunk, metadata=merged_metadata)
150
+ )
151
+
152
+ return chunked_documents
@@ -107,6 +107,9 @@ class VectorDBStrategy(ABC):
107
107
  embeddings: Embeddings,
108
108
  documents: List[Document],
109
109
  storage_id: str,
110
+ chucking_method: str,
111
+ chunk_size: int,
112
+ chunk_overlap: int,
110
113
  **kwargs
111
114
  ) -> Optional[str]:
112
115
  pass
@@ -46,11 +46,14 @@ class Chroma(VectorDBStrategy):
46
46
  embeddings: Embeddings,
47
47
  documents: list[Document],
48
48
  storage_id: str,
49
+ chucking_method: str,
50
+ chunk_size: int,
51
+ chunk_overlap: int,
49
52
  **kwargs
50
53
  ) -> Optional[str]:
51
54
  try:
52
55
  documents = self._remove_empty_documents(documents)
53
- chunked_docs = DocumentChunker.chunk(documents)
56
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
54
57
  batches = self._batch_documents_by_tokens(chunked_docs)
55
58
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
56
59
  _instance: CHROMA = None
@@ -30,11 +30,14 @@ class Faiss(VectorDBStrategy):
30
30
  embeddings: Embeddings,
31
31
  documents: list[Document],
32
32
  storage_id: str,
33
+ chucking_method: str,
34
+ chunk_size: int,
35
+ chunk_overlap: int,
33
36
  **kwargs
34
37
  ) -> Optional[str]:
35
38
  try:
36
39
  documents = self._remove_empty_documents(documents)
37
- chunked_docs = DocumentChunker.chunk(documents)
40
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
38
41
  batches = self._batch_documents_by_tokens(chunked_docs)
39
42
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
40
43
  _instance: FAISS = None
@@ -14,14 +14,17 @@ class Qdrant(VectorDBStrategy):
14
14
  embeddings: Embeddings,
15
15
  documents: list[Document],
16
16
  storage_id: str,
17
+ chucking_method: str,
18
+ chunk_size: int,
19
+ chunk_overlap: int,
17
20
  **kwargs
18
21
  ) -> Optional[str]:
19
22
  try:
20
23
  documents = self._remove_empty_documents(documents)
21
- chunked_docs = DocumentChunker.chunk(documents)
24
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
22
25
  batches = self._batch_documents_by_tokens(chunked_docs)
23
26
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
24
- _instance: QDRANT = None
27
+ _instance: QDRANT = None
25
28
  if not os.path.exists(storage_id):
26
29
  os.makedirs(storage_id)
27
30
 
@@ -45,10 +48,10 @@ class Qdrant(VectorDBStrategy):
45
48
  # add a small delay to avoid rate limiting
46
49
  if i < len(batches) - 1: # except last batch
47
50
  await asyncio.sleep(1)
48
- if _instance:
51
+ if _instance:
49
52
  self._clear_cache(storage_id)
50
53
  logging.info(f"Successfully created {Qdrant.__name__} index with {len(chunked_docs)} total documents")
51
- return storage_id
54
+ return storage_id
52
55
  except Exception as e:
53
56
  logging.error(f"{Qdrant.__name__} create error: {e}")
54
57
  raise e
@@ -112,7 +112,7 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
112
112
  db_file_path = await aiofiles.os.wrap(shutil.make_archive)(
113
113
  os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name),
114
114
  "zip",
115
- await VectorDbManager.get_strategy(rq.vector_type).create(rq.embeddings(), documents, store_path, return_folder_path=True)
115
+ await VectorDbManager.get_strategy(rq.vector_type).create(rq.embeddings(), documents, store_path, rq.chucking_method, rq.chuck_size, rq.chunk_overlap, return_folder_path=True)
116
116
  )
117
117
  return VectorDbResponse(file = os.path.basename(db_file_path), vector_type=rq.vector_type)
118
118
  except Exception as e:
@@ -1,216 +1,216 @@
1
- import asyncio, logging, aiohttp
2
- from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
- from langchain_core.documents import Document
4
- from ws_bom_robot_app.llm.vector_store.loader.base import Loader
5
- from typing import List, Union, Optional, Dict, Any, Literal
6
- from pydantic import BaseModel, Field, AliasChoices, field_validator
7
- import json
8
- import os
9
-
10
-
11
- class AuthConfig(BaseModel):
12
- """
13
- Configuration for API authentication.
14
-
15
- Attributes:
16
- type: Type of authentication (bearer, basic, api_key, custom, none)
17
- token: Bearer token or API key value
18
- username: Username for basic auth
19
- password: Password for basic auth
20
- header_name: Custom header name for API key
21
- prefix: Prefix for the auth value (e.g., 'Bearer', 'Token')
22
- """
23
- type: Literal["bearer", "basic", "api_key", "custom", "none"] = Field(default="none")
24
- token: Optional[str] = Field(default=None)
25
- username: Optional[str] = Field(default=None)
26
- password: Optional[str] = Field(default=None)
27
- header_name: Optional[str] = Field(default=None, validation_alias=AliasChoices("headerName", "header_name"))
28
- prefix: Optional[str] = Field(default=None)
29
-
30
-
31
- class ApiParams(BaseModel):
32
- """
33
- Generic API Integration Parameters.
34
-
35
- Attributes:
36
- url: The base URL of the API endpoint
37
- method: HTTP method (GET, POST, PUT, DELETE, PATCH)
38
- headers: Custom headers to include in the request
39
- params: Query parameters for the request
40
- body: Request body for POST/PUT/PATCH requests
41
- auth: Authentication configuration
42
- response_data_path: JSON path to extract data from response (e.g., 'data.items', 'results')
43
- max_retries: Maximum number of retry attempts for failed requests
44
- retry_delay: Base delay in seconds between retries (uses exponential backoff)
45
- timeout: Request timeout in seconds
46
- """
47
- url: str = Field(validation_alias=AliasChoices("url", "endpoint"))
48
- method: Literal["GET", "POST", "PUT", "DELETE", "PATCH"] = Field(default="GET")
49
- headers: Optional[Dict[str, str]] = Field(default_factory=dict)
50
- params: Optional[Dict[str, Any]] = Field(default_factory=dict)
51
- body: Optional[Union[Dict[str, Any], str]] = Field(default=None)
52
- auth: Optional[AuthConfig] = Field(default_factory=lambda: AuthConfig())
53
- response_data_path: Optional[str] = Field(default=None, validation_alias=AliasChoices("responseDataPath", "response_data_path"))
54
- max_retries: int = Field(default=5, validation_alias=AliasChoices("maxRetries", "max_retries"))
55
- retry_delay: float = Field(default=1.0, validation_alias=AliasChoices("retryDelay", "retry_delay"))
56
- timeout: int = Field(default=30)
57
-
58
- @field_validator('auth', mode='before')
59
- @classmethod
60
- def parse_auth(cls, v):
61
- """Parse auth config from dict if needed"""
62
- if isinstance(v, dict):
63
- return AuthConfig(**v)
64
- return v or AuthConfig()
65
-
66
-
67
- class Api(IntegrationStrategy):
68
- """
69
- Generic API Integration that supports:
70
- - Multiple HTTP methods (GET, POST, PUT, DELETE, PATCH)
71
- - Various authentication types (Bearer, Basic, API Key, Custom)
72
- - Custom headers and parameters
73
- - Automatic retry with exponential backoff
74
- - Flexible response data extraction
75
- """
76
-
77
- def __init__(self, knowledgebase_path: str, data: dict[str, Union[str, int, list]]):
78
- super().__init__(knowledgebase_path, data)
79
- self.__data = ApiParams.model_validate(self.data)
80
-
81
- def working_subdirectory(self) -> str:
82
- return 'api_integration'
83
-
84
- async def run(self) -> None:
85
- """Fetch data from the API and save to JSON file"""
86
- _data = await self.__fetch_data()
87
- json_file_path = os.path.join(self.working_directory, 'api_data.json')
88
- with open(json_file_path, 'w', encoding='utf-8') as f:
89
- json.dump(_data, f, ensure_ascii=False, indent=2)
90
- logging.info(f"Saved {len(_data) if isinstance(_data, list) else 1} items to {json_file_path}")
91
-
92
- async def load(self) -> list[Document]:
93
- """Load data from API and convert to documents"""
94
- await self.run()
95
- await asyncio.sleep(1)
96
- return await Loader(self.working_directory).load()
97
-
98
- def __prepare_headers(self) -> Dict[str, str]:
99
- """Prepare request headers with authentication"""
100
- headers = self.__data.headers.copy() if self.__data.headers else {}
101
-
102
- # Add Content-Type if not present
103
- if 'Content-Type' not in headers and self.__data.method in ["POST", "PUT", "PATCH"]:
104
- headers['Content-Type'] = 'application/json'
105
-
106
- # Add authentication
107
- auth = self.__data.auth
108
- if auth.type == "bearer":
109
- prefix = auth.prefix or "Bearer"
110
- headers['Authorization'] = f"{prefix} {auth.token}"
111
- elif auth.type == "basic":
112
- import base64
113
- credentials = f"{auth.username}:{auth.password}"
114
- encoded = base64.b64encode(credentials.encode()).decode()
115
- headers['Authorization'] = f"Basic {encoded}"
116
- elif auth.type == "api_key" and auth.header_name:
117
- prefix = f"{auth.prefix} " if auth.prefix else ""
118
- headers[auth.header_name] = f"{prefix}{auth.token}"
119
-
120
- return headers
121
-
122
- def __get_nested_value(self, data: Any, path: Optional[str]) -> Any:
123
- """Extract nested value from data using dot notation path"""
124
- if not path:
125
- return data
126
-
127
- keys = path.split('.')
128
- current = data
129
- for key in keys:
130
- if isinstance(current, dict):
131
- current = current.get(key)
132
- elif isinstance(current, list) and key.isdigit():
133
- current = current[int(key)]
134
- else:
135
- return None
136
-
137
- if current is None:
138
- return None
139
-
140
- return current
141
-
142
- async def __make_request(
143
- self,
144
- url: str,
145
- headers: Dict[str, str],
146
- params: Optional[Dict[str, Any]] = None
147
- ) -> Dict[str, Any]:
148
- """Make HTTP request with retry logic"""
149
- retry_count = 0
150
-
151
- while retry_count <= self.__data.max_retries:
152
- try:
153
- timeout = aiohttp.ClientTimeout(total=self.__data.timeout)
154
-
155
- async with aiohttp.ClientSession(timeout=timeout) as session:
156
- request_kwargs = {
157
- "headers": headers,
158
- "params": params or self.__data.params
159
- }
160
-
161
- # Add body for POST/PUT/PATCH
162
- if self.__data.method in ["POST", "PUT", "PATCH"] and self.__data.body:
163
- if isinstance(self.__data.body, dict):
164
- request_kwargs["json"] = self.__data.body
165
- else:
166
- request_kwargs["data"] = self.__data.body
167
-
168
- async with session.request(
169
- self.__data.method,
170
- url,
171
- **request_kwargs
172
- ) as response:
173
- # Check response status
174
- if response.status == 429: # Rate limit
175
- retry_count += 1
176
- if retry_count > self.__data.max_retries:
177
- raise Exception("Rate limit exceeded. Maximum retries reached.")
178
-
179
- wait_time = self.__data.retry_delay * (2 ** retry_count)
180
- logging.warning(f"Rate limited. Waiting {wait_time}s (Attempt {retry_count}/{self.__data.max_retries})")
181
- await asyncio.sleep(wait_time)
182
- continue
183
-
184
- response.raise_for_status()
185
-
186
- # Parse response
187
- try:
188
- data = await response.json()
189
- return data
190
- except aiohttp.ContentTypeError:
191
- text = await response.text()
192
- logging.warning(f"Non-JSON response received: {text[:200]}")
193
- return {"text": text}
194
-
195
- except aiohttp.ClientError as e:
196
- retry_count += 1
197
- if retry_count > self.__data.max_retries:
198
- raise Exception(f"Request failed after {self.__data.max_retries} retries: {e}")
199
-
200
- wait_time = self.__data.retry_delay * (2 ** retry_count)
201
- logging.warning(f"Request error: {e}. Retrying in {wait_time}s...")
202
- await asyncio.sleep(wait_time)
203
- continue
204
-
205
- raise Exception("Maximum retries exceeded")
206
-
207
- async def __fetch_data(self) -> Any:
208
- """Fetch data from API"""
209
- headers = self.__prepare_headers()
210
- response = await self.__make_request(self.__data.url, headers)
211
-
212
- # Extract data from response using path if specified
213
- data = self.__get_nested_value(response, self.__data.response_data_path)
214
- result = data if data is not None else response
215
-
216
- return result
1
+ import asyncio, logging, aiohttp
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
+ from langchain_core.documents import Document
4
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
5
+ from typing import List, Union, Optional, Dict, Any, Literal
6
+ from pydantic import BaseModel, Field, AliasChoices, field_validator
7
+ import json
8
+ import os
9
+
10
+
11
+ class AuthConfig(BaseModel):
12
+ """
13
+ Configuration for API authentication.
14
+
15
+ Attributes:
16
+ type: Type of authentication (bearer, basic, api_key, custom, none)
17
+ token: Bearer token or API key value
18
+ username: Username for basic auth
19
+ password: Password for basic auth
20
+ header_name: Custom header name for API key
21
+ prefix: Prefix for the auth value (e.g., 'Bearer', 'Token')
22
+ """
23
+ type: Literal["bearer", "basic", "api_key", "custom", "none"] = Field(default="none")
24
+ token: Optional[str] = Field(default=None)
25
+ username: Optional[str] = Field(default=None)
26
+ password: Optional[str] = Field(default=None)
27
+ header_name: Optional[str] = Field(default=None, validation_alias=AliasChoices("headerName", "header_name"))
28
+ prefix: Optional[str] = Field(default=None)
29
+
30
+
31
+ class ApiParams(BaseModel):
32
+ """
33
+ Generic API Integration Parameters.
34
+
35
+ Attributes:
36
+ url: The base URL of the API endpoint
37
+ method: HTTP method (GET, POST, PUT, DELETE, PATCH)
38
+ headers: Custom headers to include in the request
39
+ params: Query parameters for the request
40
+ body: Request body for POST/PUT/PATCH requests
41
+ auth: Authentication configuration
42
+ response_data_path: JSON path to extract data from response (e.g., 'data.items', 'results')
43
+ max_retries: Maximum number of retry attempts for failed requests
44
+ retry_delay: Base delay in seconds between retries (uses exponential backoff)
45
+ timeout: Request timeout in seconds
46
+ """
47
+ url: str = Field(validation_alias=AliasChoices("url", "endpoint"))
48
+ method: Literal["GET", "POST", "PUT", "DELETE", "PATCH"] = Field(default="GET")
49
+ headers: Optional[Dict[str, str]] = Field(default_factory=dict)
50
+ params: Optional[Dict[str, Any]] = Field(default_factory=dict)
51
+ body: Optional[Union[Dict[str, Any], str]] = Field(default=None)
52
+ auth: Optional[AuthConfig] = Field(default_factory=lambda: AuthConfig())
53
+ response_data_path: Optional[str] = Field(default=None, validation_alias=AliasChoices("responseDataPath", "response_data_path"))
54
+ max_retries: int = Field(default=5, validation_alias=AliasChoices("maxRetries", "max_retries"))
55
+ retry_delay: float = Field(default=1.0, validation_alias=AliasChoices("retryDelay", "retry_delay"))
56
+ timeout: int = Field(default=30)
57
+
58
+ @field_validator('auth', mode='before')
59
+ @classmethod
60
+ def parse_auth(cls, v):
61
+ """Parse auth config from dict if needed"""
62
+ if isinstance(v, dict):
63
+ return AuthConfig(**v)
64
+ return v or AuthConfig()
65
+
66
+
67
+ class Api(IntegrationStrategy):
68
+ """
69
+ Generic API Integration that supports:
70
+ - Multiple HTTP methods (GET, POST, PUT, DELETE, PATCH)
71
+ - Various authentication types (Bearer, Basic, API Key, Custom)
72
+ - Custom headers and parameters
73
+ - Automatic retry with exponential backoff
74
+ - Flexible response data extraction
75
+ """
76
+
77
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str, int, list]]):
78
+ super().__init__(knowledgebase_path, data)
79
+ self.__data = ApiParams.model_validate(self.data)
80
+
81
+ def working_subdirectory(self) -> str:
82
+ return 'api_integration'
83
+
84
+ async def run(self) -> None:
85
+ """Fetch data from the API and save to JSON file"""
86
+ _data = await self.__fetch_data()
87
+ json_file_path = os.path.join(self.working_directory, 'api_data.json')
88
+ with open(json_file_path, 'w', encoding='utf-8') as f:
89
+ json.dump(_data, f, ensure_ascii=False, indent=2)
90
+ logging.info(f"Saved {len(_data) if isinstance(_data, list) else 1} items to {json_file_path}")
91
+
92
+ async def load(self) -> list[Document]:
93
+ """Load data from API and convert to documents"""
94
+ await self.run()
95
+ await asyncio.sleep(1)
96
+ return await Loader(self.working_directory).load()
97
+
98
+ def __prepare_headers(self) -> Dict[str, str]:
99
+ """Prepare request headers with authentication"""
100
+ headers = self.__data.headers.copy() if self.__data.headers else {}
101
+
102
+ # Add Content-Type if not present
103
+ if 'Content-Type' not in headers and self.__data.method in ["POST", "PUT", "PATCH"]:
104
+ headers['Content-Type'] = 'application/json'
105
+
106
+ # Add authentication
107
+ auth = self.__data.auth
108
+ if auth.type == "bearer":
109
+ prefix = auth.prefix or "Bearer"
110
+ headers['Authorization'] = f"{prefix} {auth.token}"
111
+ elif auth.type == "basic":
112
+ import base64
113
+ credentials = f"{auth.username}:{auth.password}"
114
+ encoded = base64.b64encode(credentials.encode()).decode()
115
+ headers['Authorization'] = f"Basic {encoded}"
116
+ elif auth.type == "api_key" and auth.header_name:
117
+ prefix = f"{auth.prefix} " if auth.prefix else ""
118
+ headers[auth.header_name] = f"{prefix}{auth.token}"
119
+
120
+ return headers
121
+
122
+ def __get_nested_value(self, data: Any, path: Optional[str]) -> Any:
123
+ """Extract nested value from data using dot notation path"""
124
+ if not path:
125
+ return data
126
+
127
+ keys = path.split('.')
128
+ current = data
129
+ for key in keys:
130
+ if isinstance(current, dict):
131
+ current = current.get(key)
132
+ elif isinstance(current, list) and key.isdigit():
133
+ current = current[int(key)]
134
+ else:
135
+ return None
136
+
137
+ if current is None:
138
+ return None
139
+
140
+ return current
141
+
142
+ async def __make_request(
143
+ self,
144
+ url: str,
145
+ headers: Dict[str, str],
146
+ params: Optional[Dict[str, Any]] = None
147
+ ) -> Dict[str, Any]:
148
+ """Make HTTP request with retry logic"""
149
+ retry_count = 0
150
+
151
+ while retry_count <= self.__data.max_retries:
152
+ try:
153
+ timeout = aiohttp.ClientTimeout(total=self.__data.timeout)
154
+
155
+ async with aiohttp.ClientSession(timeout=timeout) as session:
156
+ request_kwargs = {
157
+ "headers": headers,
158
+ "params": params or self.__data.params
159
+ }
160
+
161
+ # Add body for POST/PUT/PATCH
162
+ if self.__data.method in ["POST", "PUT", "PATCH"] and self.__data.body:
163
+ if isinstance(self.__data.body, dict):
164
+ request_kwargs["json"] = self.__data.body
165
+ else:
166
+ request_kwargs["data"] = self.__data.body
167
+
168
+ async with session.request(
169
+ self.__data.method,
170
+ url,
171
+ **request_kwargs
172
+ ) as response:
173
+ # Check response status
174
+ if response.status == 429: # Rate limit
175
+ retry_count += 1
176
+ if retry_count > self.__data.max_retries:
177
+ raise Exception("Rate limit exceeded. Maximum retries reached.")
178
+
179
+ wait_time = self.__data.retry_delay * (2 ** retry_count)
180
+ logging.warning(f"Rate limited. Waiting {wait_time}s (Attempt {retry_count}/{self.__data.max_retries})")
181
+ await asyncio.sleep(wait_time)
182
+ continue
183
+
184
+ response.raise_for_status()
185
+
186
+ # Parse response
187
+ try:
188
+ data = await response.json()
189
+ return data
190
+ except aiohttp.ContentTypeError:
191
+ text = await response.text()
192
+ logging.warning(f"Non-JSON response received: {text[:200]}")
193
+ return {"text": text}
194
+
195
+ except aiohttp.ClientError as e:
196
+ retry_count += 1
197
+ if retry_count > self.__data.max_retries:
198
+ raise Exception(f"Request failed after {self.__data.max_retries} retries: {e}")
199
+
200
+ wait_time = self.__data.retry_delay * (2 ** retry_count)
201
+ logging.warning(f"Request error: {e}. Retrying in {wait_time}s...")
202
+ await asyncio.sleep(wait_time)
203
+ continue
204
+
205
+ raise Exception("Maximum retries exceeded")
206
+
207
+ async def __fetch_data(self) -> Any:
208
+ """Fetch data from API"""
209
+ headers = self.__prepare_headers()
210
+ response = await self.__make_request(self.__data.url, headers)
211
+
212
+ # Extract data from response using path if specified
213
+ data = self.__get_nested_value(response, self.__data.response_data_path)
214
+ result = data if data is not None else response
215
+
216
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.105
3
+ Version: 0.0.107
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -19,7 +19,7 @@ ws_bom_robot_app/llm/nebuly_handler.py,sha256=wFO2UG849kv5hmjM5EoOp0Jsloy-BtQjrR
19
19
  ws_bom_robot_app/llm/feedbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  ws_bom_robot_app/llm/feedbacks/feedback_manager.py,sha256=vNcZLG9IKhurAk7hjBqyFgQTjnh3Cd4GnxeYsX7ZdiA,2922
21
21
  ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- ws_bom_robot_app/llm/models/api.py,sha256=QfJ--9ONGi0b6Ua5km7N2MM7fkT1lu2kSRxMBg6ZlbI,12346
22
+ ws_bom_robot_app/llm/models/api.py,sha256=jvoU8z82L7xGkqo2a2m--0OxZrENoLVPuucA-rdr74o,12798
23
23
  ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
24
24
  ws_bom_robot_app/llm/models/feedback.py,sha256=pYNQGxNOBgeAAfdJLI95l7ePLBI5tVdsgnyjp5oMOQU,1722
25
25
  ws_bom_robot_app/llm/models/kb.py,sha256=oVSw6_dmNxikAHrPqcfxDXz9M0ezLIYuxpgvzfs_Now,9514
@@ -27,13 +27,13 @@ ws_bom_robot_app/llm/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
27
27
  ws_bom_robot_app/llm/providers/llm_manager.py,sha256=oVeEmZUnR1ysV-BI_zpwQ-gpXqmhSzjKFQQAHtaFGSI,16596
28
28
  ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  ws_bom_robot_app/llm/tools/tool_builder.py,sha256=CtZwJ94aj0YGA3yVWkyCUxNE7WgU2zWjhl_tEfEskxw,3432
30
- ws_bom_robot_app/llm/tools/tool_manager.py,sha256=ThiaBCDOn192NOaLHsxbNZXYs5fptqlfgCHW-9h2eVY,15989
30
+ ws_bom_robot_app/llm/tools/tool_manager.py,sha256=6qPzFbcp9zH3tiNMfGBPV_N4WAp65LUIfmjOXvNU8mI,18973
31
31
  ws_bom_robot_app/llm/tools/utils.py,sha256=Ba7ScFZPVJ3ke8KLO8ik1wyR2f_zC99Bikqx0OGnKoI,1924
32
32
  ws_bom_robot_app/llm/tools/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  ws_bom_robot_app/llm/tools/models/main.py,sha256=1hICqHs-KS2heenkH7b2eH0N2GrPaaNGBrn64cl_A40,827
34
34
  ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  ws_bom_robot_app/llm/utils/agent.py,sha256=uFuSfYMfGIE2WCKGNSKL-T2SDFn-tUKvbAYbGTPIw6g,1445
36
- ws_bom_robot_app/llm/utils/chunker.py,sha256=u0l2t3bIQihOpLRlcrO23bNBda3kgzayyGAIR0YZUqQ,4069
36
+ ws_bom_robot_app/llm/utils/chunker.py,sha256=-WfDG6xUU_oOUJmWhDlQbI1hsGCRkmnKyqkY_bEG8WA,7420
37
37
  ws_bom_robot_app/llm/utils/cleanup.py,sha256=ARLZTX4mLbkLCEnMdIWYDYEAPOjzfy1laLGkYnxZe30,3063
38
38
  ws_bom_robot_app/llm/utils/cms.py,sha256=gfIXvY3DxgbgDf0LCzyekWitaduxKGLHfV6gbRmh8zk,6960
39
39
  ws_bom_robot_app/llm/utils/download.py,sha256=rvc88E63UGHnFVlJJeMb05Z2FcBYIITqKnIE3ldEu6I,7293
@@ -41,15 +41,15 @@ ws_bom_robot_app/llm/utils/print.py,sha256=HK3zhZOd4cEyXZ8QcudLtTIfqqtMOERce_yTo
41
41
  ws_bom_robot_app/llm/utils/secrets.py,sha256=-HtqLIDVIJrpvGC5YhPAVyLsq8P4ChVM5g3GOfdwqVk,878
42
42
  ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
43
43
  ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ws_bom_robot_app/llm/vector_store/generator.py,sha256=W_hi_UOPaSjnEuazhUFIrMAwTvz64Du8_gpiVAxFlVc,6451
44
+ ws_bom_robot_app/llm/vector_store/generator.py,sha256=nSj8aLARr4h1SJlkEI7X1hDef195fAPKEi2fFkl7_wM,6504
45
45
  ws_bom_robot_app/llm/vector_store/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- ws_bom_robot_app/llm/vector_store/db/base.py,sha256=pIlHTg83bLdGfbZorilSqeJ5QKgpGU8fxF3c-5pLWJo,8490
47
- ws_bom_robot_app/llm/vector_store/db/chroma.py,sha256=oe0p_OlqTGFXEvtC8AXiElUWnfHXTXk97_suYh7kirU,4622
48
- ws_bom_robot_app/llm/vector_store/db/faiss.py,sha256=rCMq_dhg1-NM8G5L_VEdDIvgmkWLXL3r5EreaqxR3Oc,3925
46
+ ws_bom_robot_app/llm/vector_store/db/base.py,sha256=GhTkOq4ms_vUf_nuncyskUpI6kWPKDQi5dfLU5zduFY,8576
47
+ ws_bom_robot_app/llm/vector_store/db/chroma.py,sha256=9tnEKQLvBt5TPthULR08ktDkcpFjuIxuYV7REFp9kuY,4752
48
+ ws_bom_robot_app/llm/vector_store/db/faiss.py,sha256=lHpBZV1s_OZTiRlcVM-KJBf2wWWkzvYm_gt57BdbbUs,4055
49
49
  ws_bom_robot_app/llm/vector_store/db/manager.py,sha256=5rqBvc0QKmHFUgVHqBAr1Y4FZRl-w-ylGMjgXZywrdA,533
50
- ws_bom_robot_app/llm/vector_store/db/qdrant.py,sha256=-36YOXjNtDeWveREnGd1SZF3hT7_Peg_pAT5uoxXcQU,3237
50
+ ws_bom_robot_app/llm/vector_store/db/qdrant.py,sha256=v3YKLZ9_ysaNB64UVA1JCYg-W1BMGfo9CLCG4roXtJ4,3323
51
51
  ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- ws_bom_robot_app/llm/vector_store/integration/api.py,sha256=0fW2uKW3P_WaX8J18LRTMTVZ1VP3t_bHkocDtm1f_uc,8184
52
+ ws_bom_robot_app/llm/vector_store/integration/api.py,sha256=jivsqw3iMr4isnxi-jQYtFWPtBcTgIDe88hiUqXv5NE,8400
53
53
  ws_bom_robot_app/llm/vector_store/integration/azure.py,sha256=OEa96Dlf1CX0tjrTjX4KP3D_HTn249ukc9sluPbdOyU,3389
54
54
  ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=Aat4vQCmsrTiWrzUi2_h-RKzJLhObsKFfZrzs1TnQP8,4385
55
55
  ws_bom_robot_app/llm/vector_store/integration/confluence.py,sha256=TMmGe53tHRTgHJ7nA8DqZVodo3aMEzHrrSdl0-I0-S0,4350
@@ -70,7 +70,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
70
70
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=InpRwKPxp0tuM4drezBvxxAWHe3XTmu60MGvFsT7RPE,7176
71
71
  ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=RFYSZkZAYtU8wJSd1rN2T0lVo-wK1-ddtr6bH2fBr6Q,5170
72
72
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=LDppW0ZATo4_1hh-KlsAM3TLawBvwBxva_a7k5Oz1sc,858
73
- ws_bom_robot_app-0.0.105.dist-info/METADATA,sha256=dOAkYLVKxkr3-FcVVFK475-jH80yd_s3DQSB4bLnl0c,11011
74
- ws_bom_robot_app-0.0.105.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
- ws_bom_robot_app-0.0.105.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
76
- ws_bom_robot_app-0.0.105.dist-info/RECORD,,
73
+ ws_bom_robot_app-0.0.107.dist-info/METADATA,sha256=ctg-rgQKDvVvLr_L_wJxP1FwhNdl6NZRYdYB5JhV9Ys,11011
74
+ ws_bom_robot_app-0.0.107.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
75
+ ws_bom_robot_app-0.0.107.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
76
+ ws_bom_robot_app-0.0.107.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5