ws-bom-robot-app 0.0.88__py3-none-any.whl → 0.0.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ws_bom_robot_app/llm/agent_description.py +123 -123
  2. ws_bom_robot_app/llm/agent_handler.py +174 -174
  3. ws_bom_robot_app/llm/agent_lcel.py +50 -50
  4. ws_bom_robot_app/llm/defaut_prompt.py +15 -15
  5. ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -66
  6. ws_bom_robot_app/llm/main.py +158 -158
  7. ws_bom_robot_app/llm/models/feedback.py +30 -30
  8. ws_bom_robot_app/llm/nebuly_handler.py +185 -185
  9. ws_bom_robot_app/llm/tools/tool_builder.py +68 -68
  10. ws_bom_robot_app/llm/tools/tool_manager.py +332 -332
  11. ws_bom_robot_app/llm/tools/utils.py +41 -41
  12. ws_bom_robot_app/llm/utils/agent.py +34 -34
  13. ws_bom_robot_app/llm/utils/cms.py +114 -114
  14. ws_bom_robot_app/llm/utils/download.py +183 -183
  15. ws_bom_robot_app/llm/utils/print.py +29 -29
  16. ws_bom_robot_app/llm/vector_store/db/base.py +3 -0
  17. ws_bom_robot_app/llm/vector_store/db/chroma.py +1 -0
  18. ws_bom_robot_app/llm/vector_store/db/faiss.py +1 -0
  19. ws_bom_robot_app/llm/vector_store/db/qdrant.py +1 -0
  20. ws_bom_robot_app/llm/vector_store/generator.py +137 -137
  21. ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -143
  22. ws_bom_robot_app/llm/vector_store/integration/sitemap.py +3 -1
  23. ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -102
  24. ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
  25. {ws_bom_robot_app-0.0.88.dist-info → ws_bom_robot_app-0.0.89.dist-info}/METADATA +2 -2
  26. {ws_bom_robot_app-0.0.88.dist-info → ws_bom_robot_app-0.0.89.dist-info}/RECORD +28 -28
  27. {ws_bom_robot_app-0.0.88.dist-info → ws_bom_robot_app-0.0.89.dist-info}/WHEEL +0 -0
  28. {ws_bom_robot_app-0.0.88.dist-info → ws_bom_robot_app-0.0.89.dist-info}/top_level.txt +0 -0
@@ -1,183 +1,183 @@
1
- import httpx
2
- from typing import List,Optional
3
- import os, logging, aiohttp, asyncio, hashlib, json
4
- import uuid
5
- from pydantic import BaseModel
6
- import base64, requests, mimetypes
7
- from urllib.parse import urlparse
8
- from tqdm.asyncio import tqdm
9
- from ws_bom_robot_app.config import config
10
- import aiofiles
11
-
12
- async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
13
- tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
14
- results = await asyncio.gather(*tasks, return_exceptions=False)
15
- for i, result in enumerate(results):
16
- if not result:
17
- raise Exception(f"Download failed for file: {urls[i]}")
18
-
19
- async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
20
- """
21
- Downloads a file from a given URL to a destination path asynchronously.
22
-
23
- Args:
24
- url: The URL of the file to download
25
- destination: The local path where the file should be saved
26
- chunk_size: Size of chunks to download (default: 8192 bytes)
27
-
28
- Returns:
29
- str: Path to the downloaded file if successful, None otherwise
30
-
31
- Raises:
32
- Various exceptions are caught and logged
33
- """
34
- try:
35
- # Ensure the destination directory exists
36
- os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
37
-
38
- async with httpx.AsyncClient(timeout=30.0) as client:
39
- if authorization:
40
- headers = {'Authorization': authorization}
41
- async with client.stream("GET", url, headers=headers) as response:
42
- # Check if the request was successful
43
- if response.status_code != 200:
44
- logging.error(f"Failed to download file. Status code: {response.status_code}")
45
- return None
46
-
47
- # Get the total file size if available
48
- total_size = int(response.headers.get('content-length', 0))
49
- # Open the destination file and write chunks
50
- with open(destination, 'wb') as f:
51
- with tqdm(
52
- total=total_size,
53
- desc="Downloading",
54
- unit='B',
55
- unit_scale=True,
56
- unit_divisor=1024
57
- ) as pbar:
58
- async for chunk in response.aiter_bytes(chunk_size):
59
- if chunk:
60
- f.write(chunk)
61
- pbar.update(len(chunk))
62
-
63
- logging.info(f"File downloaded successfully to {destination}")
64
- return destination
65
-
66
- except httpx.RequestError as e:
67
- logging.error(f"Network error occurred: {str(e)}")
68
- return None
69
- except asyncio.TimeoutError:
70
- logging.error("Download timed out")
71
- return None
72
- except IOError as e:
73
- logging.error(f"IO error occurred: {str(e)}")
74
- return None
75
- except Exception as e:
76
- logging.error(f"Unexpected error occurred: {str(e)}")
77
- return None
78
- finally:
79
- # If download failed and file was partially created, clean it up
80
- if os.path.exists(destination) and os.path.getsize(destination) == 0:
81
- try:
82
- os.remove(destination)
83
- logging.info(f"Cleaned up incomplete download: {destination}")
84
- except OSError:
85
- pass
86
-
87
- class Base64File(BaseModel):
88
- """Base64 encoded file representation"""
89
- url: str
90
- base64_url: str
91
- base64_content: str
92
- name: str
93
- extension: str
94
- mime_type: str
95
-
96
- @staticmethod
97
- def _is_base64_data_uri(url: str) -> bool:
98
- """Check if URL is already a base64 data URI"""
99
- return (isinstance(url, str) and
100
- url.startswith('data:') and
101
- ';base64,' in url and
102
- len(url.split(',')) == 2)
103
-
104
- async def from_url(url: str) -> "Base64File":
105
- """Download file and return as base64 data URI"""
106
- def _cache_file(url: str) -> str:
107
- _hash = hashlib.md5(url.encode()).hexdigest()
108
- return os.path.join(config.robot_data_folder, config.robot_data_attachment_folder, f"{_hash}.json")
109
- async def from_cache(url: str) -> "Base64File":
110
- """Check if file is already downloaded and return data"""
111
- _file = _cache_file(url)
112
- if os.path.exists(_file):
113
- try:
114
- async with aiofiles.open(_file, 'rb') as f:
115
- content = await f.read()
116
- return Base64File(**json.loads(content))
117
- except Exception as e:
118
- logging.error(f"Error reading cache file {_file}: {e}")
119
- return None
120
- return None
121
- async def to_cache(file: "Base64File", url: str) -> None:
122
- """Save file to cache"""
123
- _file = _cache_file(url)
124
- try:
125
- async with aiofiles.open(_file, 'wb') as f:
126
- await f.write(file.model_dump_json().encode('utf-8'))
127
- except Exception as e:
128
- logging.error(f"Error writing cache file {_file}: {e}")
129
-
130
- # special case: base64 data URI
131
- if Base64File._is_base64_data_uri(url):
132
- mime_type = url.split(';')[0].replace('data:', '')
133
- base64_content = url.split(',')[1]
134
- extension=mime_type.split('/')[-1]
135
- name = f"file-{uuid.uuid4()}.{extension}"
136
- return Base64File(
137
- url=url,
138
- base64_url=url,
139
- base64_content=base64_content,
140
- name=name,
141
- extension=extension,
142
- mime_type=mime_type
143
- )
144
-
145
- # default download
146
- _error = None
147
- try:
148
- if _content := await from_cache(url):
149
- return _content
150
- async with httpx.AsyncClient(timeout=30.0) as client:
151
- response = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
152
- logging.info(f"Downloading {url} - Status: {response.status_code}")
153
- response.raise_for_status()
154
- content = response.read()
155
- # mime type detection
156
- mime_type = response.headers.get('content-type', '').split(';')[0]
157
- if not mime_type:
158
- mime_type, _ = mimetypes.guess_type(urlparse(url).path)
159
- if not mime_type:
160
- mime_type = 'application/octet-stream'
161
- # to base64
162
- base64_content = base64.b64encode(content).decode('utf-8')
163
- name = url.split('/')[-1]
164
- extension = name.split('.')[-1]
165
- except Exception as e:
166
- _error = f"Failed to download file from {url}: {e}"
167
- logging.error(_error)
168
- base64_content = base64.b64encode(_error.encode('utf-8')).decode('utf-8')
169
- name = "download_error.txt"
170
- mime_type = "text/plain"
171
- extension = "txt"
172
-
173
- _file = Base64File(
174
- url=url,
175
- base64_url= f"data:{mime_type};base64,{base64_content}",
176
- base64_content=base64_content,
177
- name=name,
178
- extension=extension,
179
- mime_type=mime_type
180
- )
181
- if not _error:
182
- await to_cache(_file, url)
183
- return _file
1
+ import httpx
2
+ from typing import List,Optional
3
+ import os, logging, aiohttp, asyncio, hashlib, json
4
+ import uuid
5
+ from pydantic import BaseModel
6
+ import base64, requests, mimetypes
7
+ from urllib.parse import urlparse
8
+ from tqdm.asyncio import tqdm
9
+ from ws_bom_robot_app.config import config
10
+ import aiofiles
11
+
12
+ async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
13
+ tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
14
+ results = await asyncio.gather(*tasks, return_exceptions=False)
15
+ for i, result in enumerate(results):
16
+ if not result:
17
+ raise Exception(f"Download failed for file: {urls[i]}")
18
+
19
+ async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
20
+ """
21
+ Downloads a file from a given URL to a destination path asynchronously.
22
+
23
+ Args:
24
+ url: The URL of the file to download
25
+ destination: The local path where the file should be saved
26
+ chunk_size: Size of chunks to download (default: 8192 bytes)
27
+
28
+ Returns:
29
+ str: Path to the downloaded file if successful, None otherwise
30
+
31
+ Raises:
32
+ Various exceptions are caught and logged
33
+ """
34
+ try:
35
+ # Ensure the destination directory exists
36
+ os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
37
+
38
+ async with httpx.AsyncClient(timeout=30.0) as client:
39
+ if authorization:
40
+ headers = {'Authorization': authorization}
41
+ async with client.stream("GET", url, headers=headers) as response:
42
+ # Check if the request was successful
43
+ if response.status_code != 200:
44
+ logging.error(f"Failed to download file. Status code: {response.status_code}")
45
+ return None
46
+
47
+ # Get the total file size if available
48
+ total_size = int(response.headers.get('content-length', 0))
49
+ # Open the destination file and write chunks
50
+ with open(destination, 'wb') as f:
51
+ with tqdm(
52
+ total=total_size,
53
+ desc="Downloading",
54
+ unit='B',
55
+ unit_scale=True,
56
+ unit_divisor=1024
57
+ ) as pbar:
58
+ async for chunk in response.aiter_bytes(chunk_size):
59
+ if chunk:
60
+ f.write(chunk)
61
+ pbar.update(len(chunk))
62
+
63
+ logging.info(f"File downloaded successfully to {destination}")
64
+ return destination
65
+
66
+ except httpx.RequestError as e:
67
+ logging.error(f"Network error occurred: {str(e)}")
68
+ return None
69
+ except asyncio.TimeoutError:
70
+ logging.error("Download timed out")
71
+ return None
72
+ except IOError as e:
73
+ logging.error(f"IO error occurred: {str(e)}")
74
+ return None
75
+ except Exception as e:
76
+ logging.error(f"Unexpected error occurred: {str(e)}")
77
+ return None
78
+ finally:
79
+ # If download failed and file was partially created, clean it up
80
+ if os.path.exists(destination) and os.path.getsize(destination) == 0:
81
+ try:
82
+ os.remove(destination)
83
+ logging.info(f"Cleaned up incomplete download: {destination}")
84
+ except OSError:
85
+ pass
86
+
87
+ class Base64File(BaseModel):
88
+ """Base64 encoded file representation"""
89
+ url: str
90
+ base64_url: str
91
+ base64_content: str
92
+ name: str
93
+ extension: str
94
+ mime_type: str
95
+
96
+ @staticmethod
97
+ def _is_base64_data_uri(url: str) -> bool:
98
+ """Check if URL is already a base64 data URI"""
99
+ return (isinstance(url, str) and
100
+ url.startswith('data:') and
101
+ ';base64,' in url and
102
+ len(url.split(',')) == 2)
103
+
104
+ async def from_url(url: str) -> "Base64File":
105
+ """Download file and return as base64 data URI"""
106
+ def _cache_file(url: str) -> str:
107
+ _hash = hashlib.md5(url.encode()).hexdigest()
108
+ return os.path.join(config.robot_data_folder, config.robot_data_attachment_folder, f"{_hash}.json")
109
+ async def from_cache(url: str) -> "Base64File":
110
+ """Check if file is already downloaded and return data"""
111
+ _file = _cache_file(url)
112
+ if os.path.exists(_file):
113
+ try:
114
+ async with aiofiles.open(_file, 'rb') as f:
115
+ content = await f.read()
116
+ return Base64File(**json.loads(content))
117
+ except Exception as e:
118
+ logging.error(f"Error reading cache file {_file}: {e}")
119
+ return None
120
+ return None
121
+ async def to_cache(file: "Base64File", url: str) -> None:
122
+ """Save file to cache"""
123
+ _file = _cache_file(url)
124
+ try:
125
+ async with aiofiles.open(_file, 'wb') as f:
126
+ await f.write(file.model_dump_json().encode('utf-8'))
127
+ except Exception as e:
128
+ logging.error(f"Error writing cache file {_file}: {e}")
129
+
130
+ # special case: base64 data URI
131
+ if Base64File._is_base64_data_uri(url):
132
+ mime_type = url.split(';')[0].replace('data:', '')
133
+ base64_content = url.split(',')[1]
134
+ extension=mime_type.split('/')[-1]
135
+ name = f"file-{uuid.uuid4()}.{extension}"
136
+ return Base64File(
137
+ url=url,
138
+ base64_url=url,
139
+ base64_content=base64_content,
140
+ name=name,
141
+ extension=extension,
142
+ mime_type=mime_type
143
+ )
144
+
145
+ # default download
146
+ _error = None
147
+ try:
148
+ if _content := await from_cache(url):
149
+ return _content
150
+ async with httpx.AsyncClient(timeout=30.0) as client:
151
+ response = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
152
+ logging.info(f"Downloading {url} - Status: {response.status_code}")
153
+ response.raise_for_status()
154
+ content = response.read()
155
+ # mime type detection
156
+ mime_type = response.headers.get('content-type', '').split(';')[0]
157
+ if not mime_type:
158
+ mime_type, _ = mimetypes.guess_type(urlparse(url).path)
159
+ if not mime_type:
160
+ mime_type = 'application/octet-stream'
161
+ # to base64
162
+ base64_content = base64.b64encode(content).decode('utf-8')
163
+ name = url.split('/')[-1]
164
+ extension = name.split('.')[-1]
165
+ except Exception as e:
166
+ _error = f"Failed to download file from {url}: {e}"
167
+ logging.error(_error)
168
+ base64_content = base64.b64encode(_error.encode('utf-8')).decode('utf-8')
169
+ name = "download_error.txt"
170
+ mime_type = "text/plain"
171
+ extension = "txt"
172
+
173
+ _file = Base64File(
174
+ url=url,
175
+ base64_url= f"data:{mime_type};base64,{base64_content}",
176
+ base64_content=base64_content,
177
+ name=name,
178
+ extension=extension,
179
+ mime_type=mime_type
180
+ )
181
+ if not _error:
182
+ await to_cache(_file, url)
183
+ return _file
@@ -1,29 +1,29 @@
1
- import os, sys, json
2
-
3
- class HiddenPrints:
4
- def __enter__(self):
5
- self._original_stdout = sys.stdout
6
- self._original_stderr = sys.stderr
7
-
8
- sys.stdout = open(os.devnull, 'w')
9
- sys.stderr = open(os.devnull, 'w')
10
-
11
- def __exit__(self, exc_type, exc_val, exc_tb):
12
- sys.stdout.close()
13
- sys.stderr.close()
14
- sys.stdout = self._original_stdout
15
- sys.stderr = self._original_stderr
16
-
17
- def print_json(data) -> str:
18
- return print_single_json(data) + ","
19
-
20
- def print_single_json(data) -> str:
21
- return json.dumps(data, sort_keys=True)
22
-
23
- def print_string(data: str) -> str:
24
- if data != "":
25
- return print_json(data)
26
-
27
- def print_single_string(data: str) -> str:
28
- if data != "":
29
- return print_single_json(data)
1
+ import os, sys, json
2
+
3
+ class HiddenPrints:
4
+ def __enter__(self):
5
+ self._original_stdout = sys.stdout
6
+ self._original_stderr = sys.stderr
7
+
8
+ sys.stdout = open(os.devnull, 'w')
9
+ sys.stderr = open(os.devnull, 'w')
10
+
11
+ def __exit__(self, exc_type, exc_val, exc_tb):
12
+ sys.stdout.close()
13
+ sys.stderr.close()
14
+ sys.stdout = self._original_stdout
15
+ sys.stderr = self._original_stderr
16
+
17
+ def print_json(data) -> str:
18
+ return print_single_json(data) + ","
19
+
20
+ def print_single_json(data) -> str:
21
+ return json.dumps(data, sort_keys=True)
22
+
23
+ def print_string(data: str) -> str:
24
+ if data != "":
25
+ return print_json(data)
26
+
27
+ def print_single_string(data: str) -> str:
28
+ if data != "":
29
+ return print_single_json(data)
@@ -178,6 +178,9 @@ class VectorDBStrategy(ABC):
178
178
  return await retriever.ainvoke(query, config={"source": kwargs.get("source", "retriever")})
179
179
 
180
180
  @staticmethod
181
+ def _remove_empty_documents(docs: List[Document]) -> List[Document]:
182
+ return [doc for doc in docs if doc.page_content and doc.page_content.strip()]
183
+ @staticmethod
181
184
  def _remove_duplicates(docs: List[Document]) -> List[Document]:
182
185
  seen = set()
183
186
  return [doc for doc in docs if not (doc.page_content in seen or seen.add(doc.page_content))]
@@ -49,6 +49,7 @@ class Chroma(VectorDBStrategy):
49
49
  **kwargs
50
50
  ) -> Optional[str]:
51
51
  try:
52
+ documents = self._remove_empty_documents(documents)
52
53
  chunked_docs = DocumentChunker.chunk(documents)
53
54
  batches = self._batch_documents_by_tokens(chunked_docs)
54
55
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
@@ -33,6 +33,7 @@ class Faiss(VectorDBStrategy):
33
33
  **kwargs
34
34
  ) -> Optional[str]:
35
35
  try:
36
+ documents = self._remove_empty_documents(documents)
36
37
  chunked_docs = DocumentChunker.chunk(documents)
37
38
  batches = self._batch_documents_by_tokens(chunked_docs)
38
39
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
@@ -17,6 +17,7 @@ class Qdrant(VectorDBStrategy):
17
17
  **kwargs
18
18
  ) -> Optional[str]:
19
19
  try:
20
+ documents = self._remove_empty_documents(documents)
20
21
  chunked_docs = DocumentChunker.chunk(documents)
21
22
  if not os.path.exists(storage_id):
22
23
  os.makedirs(storage_id)