ws-bom-robot-app 0.0.73__tar.gz → 0.0.74__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ws_bom_robot_app-0.0.73/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.74}/PKG-INFO +4 -4
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/requirements.txt +3 -3
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/setup.py +1 -1
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/base.py +47 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/chroma.py +27 -8
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/faiss.py +34 -8
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74/ws_bom_robot_app.egg-info}/PKG-INFO +4 -4
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app.egg-info/requires.txt +3 -3
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/MANIFEST.in +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/README.md +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/pyproject.toml +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/setup.cfg +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/auth.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/config.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/cron_manager.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/agent_context.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/agent_description.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/agent_handler.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/api.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/main.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/models/api.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/models/base.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/models/feedback.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/models/kb.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/nebuly_handler.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/providers/llm_manager.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/settings.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/utils.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/agent.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/cms.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/download.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/kb.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/print.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/integration/thron.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/main.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/task_manager.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/util.py +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
- {ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.74
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -19,7 +19,7 @@ Requires-Dist: fastapi[standard]==0.115.14
|
|
|
19
19
|
Requires-Dist: chevron==0.14.0
|
|
20
20
|
Requires-Dist: langchain==0.3.26
|
|
21
21
|
Requires-Dist: langchain-community==0.3.26
|
|
22
|
-
Requires-Dist: langchain-core==0.3.
|
|
22
|
+
Requires-Dist: langchain-core==0.3.72
|
|
23
23
|
Requires-Dist: langchain-openai==0.3.27
|
|
24
24
|
Requires-Dist: langchain-anthropic==0.3.6
|
|
25
25
|
Requires-Dist: langchain-ibm==0.3.14
|
|
@@ -28,8 +28,8 @@ Requires-Dist: langchain-google-vertexai==2.0.27
|
|
|
28
28
|
Requires-Dist: langchain-groq==0.3.5
|
|
29
29
|
Requires-Dist: langchain-ollama==0.3.3
|
|
30
30
|
Requires-Dist: faiss-cpu==1.11.0
|
|
31
|
-
Requires-Dist: chromadb==1.0.
|
|
32
|
-
Requires-Dist: langchain_chroma==0.2.
|
|
31
|
+
Requires-Dist: chromadb==1.0.15
|
|
32
|
+
Requires-Dist: langchain_chroma==0.2.5
|
|
33
33
|
Requires-Dist: fastembed==0.7.1
|
|
34
34
|
Requires-Dist: langchain-qdrant==0.2.0
|
|
35
35
|
Requires-Dist: qdrant-client==1.15.0
|
|
@@ -10,7 +10,7 @@ chevron==0.14.0
|
|
|
10
10
|
#framework
|
|
11
11
|
langchain==0.3.26
|
|
12
12
|
langchain-community==0.3.26
|
|
13
|
-
langchain-core==0.3.
|
|
13
|
+
langchain-core==0.3.72
|
|
14
14
|
langchain-openai==0.3.27
|
|
15
15
|
langchain-anthropic==0.3.6 #issue get_models() from 0.3.7
|
|
16
16
|
langchain-ibm==0.3.14
|
|
@@ -21,8 +21,8 @@ langchain-ollama==0.3.3
|
|
|
21
21
|
|
|
22
22
|
#vector DB
|
|
23
23
|
faiss-cpu==1.11.0
|
|
24
|
-
chromadb==1.0.
|
|
25
|
-
langchain_chroma==0.2.
|
|
24
|
+
chromadb==1.0.15
|
|
25
|
+
langchain_chroma==0.2.5
|
|
26
26
|
fastembed==0.7.1 #qdrant sparse embedding
|
|
27
27
|
langchain-qdrant==0.2.0
|
|
28
28
|
qdrant-client==1.15.0
|
|
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
|
|
|
4
4
|
|
|
5
5
|
setup(
|
|
6
6
|
name="ws_bom_robot_app",
|
|
7
|
-
version="0.0.
|
|
7
|
+
version="0.0.74",
|
|
8
8
|
description="A FastAPI application serving ws bom/robot/llm platform ai.",
|
|
9
9
|
long_description=open("README.md", encoding='utf-8').read(),
|
|
10
10
|
long_description_content_type="text/markdown",
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/base.py
RENAMED
|
@@ -7,6 +7,7 @@ from langchain_core.language_models import BaseChatModel
|
|
|
7
7
|
from langchain_core.vectorstores.base import VectorStoreRetriever, VectorStore
|
|
8
8
|
from langchain.retrievers import SelfQueryRetriever
|
|
9
9
|
from langchain.chains.query_constructor.schema import AttributeInfo
|
|
10
|
+
import tiktoken
|
|
10
11
|
|
|
11
12
|
class VectorDBStrategy(ABC):
|
|
12
13
|
class VectorDBStrategy:
|
|
@@ -49,6 +50,52 @@ class VectorDBStrategy(ABC):
|
|
|
49
50
|
Asynchronously invokes multiple retrievers in parallel, then merges
|
|
50
51
|
their results while removing duplicates.
|
|
51
52
|
"""
|
|
53
|
+
def __init__(self):
|
|
54
|
+
self.max_tokens_per_batch = 300_000 * 0.8 # conservative limit below 300k openai limit: https://platform.openai.com/docs/api-reference/embeddings/create
|
|
55
|
+
try:
|
|
56
|
+
self.encoding = tiktoken.get_encoding("cl100k_base") # text-embedding-3-small, text-embedding-3-large: https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
|
|
57
|
+
except Exception:
|
|
58
|
+
self.encoding = None
|
|
59
|
+
|
|
60
|
+
def _count_tokens(self, text: str) -> int:
|
|
61
|
+
"""Count tokens in text using tiktoken or fallback estimation"""
|
|
62
|
+
if self.encoding:
|
|
63
|
+
try:
|
|
64
|
+
return len(self.encoding.encode(text))
|
|
65
|
+
except Exception:
|
|
66
|
+
pass
|
|
67
|
+
# fallback: rough estimation (1 token ≈ 4 characters)
|
|
68
|
+
return len(text) // 4
|
|
69
|
+
|
|
70
|
+
def _batch_documents_by_tokens(self, documents: list[Document]) -> list[list[Document]]:
|
|
71
|
+
"""Split documents into batches based on token count"""
|
|
72
|
+
if not documents:
|
|
73
|
+
return []
|
|
74
|
+
batches = []
|
|
75
|
+
current_batch = []
|
|
76
|
+
current_token_count = 0
|
|
77
|
+
|
|
78
|
+
for doc in documents:
|
|
79
|
+
doc_tokens = self._count_tokens(doc.page_content)
|
|
80
|
+
# check if adding this document exceeds the limit
|
|
81
|
+
if current_token_count + doc_tokens > self.max_tokens_per_batch:
|
|
82
|
+
# start new batch if current batch is not empty
|
|
83
|
+
if current_batch:
|
|
84
|
+
batches.append(current_batch)
|
|
85
|
+
# reset current batch
|
|
86
|
+
current_batch = [doc]
|
|
87
|
+
current_token_count = doc_tokens # reset to current doc's tokens
|
|
88
|
+
else:
|
|
89
|
+
# add to current batch
|
|
90
|
+
current_batch.append(doc)
|
|
91
|
+
current_token_count += doc_tokens
|
|
92
|
+
|
|
93
|
+
# add final batch if not empty
|
|
94
|
+
if current_batch:
|
|
95
|
+
batches.append(current_batch)
|
|
96
|
+
|
|
97
|
+
return batches
|
|
98
|
+
|
|
52
99
|
_CACHE: dict[str, VectorStore] = {}
|
|
53
100
|
def _clear_cache(self, key: str):
|
|
54
101
|
if key in self._CACHE:
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/chroma.py
RENAMED
|
@@ -38,6 +38,9 @@ class Chroma(VectorDBStrategy):
|
|
|
38
38
|
Returns:
|
|
39
39
|
CHROMA: The retrieved or newly created Chroma instance.
|
|
40
40
|
"""
|
|
41
|
+
def __init__(self):
|
|
42
|
+
super().__init__()
|
|
43
|
+
|
|
41
44
|
async def create(
|
|
42
45
|
self,
|
|
43
46
|
embeddings: Embeddings,
|
|
@@ -47,19 +50,35 @@ class Chroma(VectorDBStrategy):
|
|
|
47
50
|
) -> Optional[str]:
|
|
48
51
|
try:
|
|
49
52
|
chunked_docs = DocumentChunker.chunk(documents)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
batches = self._batch_documents_by_tokens(chunked_docs)
|
|
54
|
+
logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
|
|
55
|
+
_instance: CHROMA = None
|
|
56
|
+
for i, batch in enumerate(batches):
|
|
57
|
+
batch_tokens = sum(self._count_tokens(doc.page_content) for doc in batch)
|
|
58
|
+
logging.info(f"processing batch {i+1}/{len(batches)} with {len(batch)} docs ({batch_tokens:,} tokens)")
|
|
59
|
+
# create instance from first batch
|
|
60
|
+
if _instance is None:
|
|
61
|
+
_instance = await asyncio.to_thread(
|
|
62
|
+
CHROMA.from_documents,
|
|
63
|
+
documents=batch,
|
|
64
|
+
embedding=embeddings,
|
|
65
|
+
persist_directory=storage_id
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
# merge to existing instance
|
|
69
|
+
await _instance.aadd_documents(batch)
|
|
70
|
+
# add a small delay to avoid rate limiting
|
|
71
|
+
if i < len(batches) - 1: # except last batch
|
|
72
|
+
await asyncio.sleep(1)
|
|
73
|
+
if _instance:
|
|
74
|
+
self._clear_cache(storage_id)
|
|
75
|
+
logging.info(f"Successfully created {Chroma.__name__} index with {len(chunked_docs)} total documents")
|
|
57
76
|
return storage_id
|
|
58
77
|
except Exception as e:
|
|
59
78
|
logging.error(f"{Chroma.__name__} create error: {e}")
|
|
60
79
|
raise e
|
|
61
80
|
finally:
|
|
62
|
-
del documents
|
|
81
|
+
del documents, chunked_docs, _instance
|
|
63
82
|
gc.collect()
|
|
64
83
|
|
|
65
84
|
def get_loader(
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/faiss.py
RENAMED
|
@@ -22,6 +22,9 @@ class Faiss(VectorDBStrategy):
|
|
|
22
22
|
was previously loaded and cached, it returns the cached instance; otherwise,
|
|
23
23
|
it loads the index from local storage and caches it for subsequent use.
|
|
24
24
|
"""
|
|
25
|
+
def __init__(self):
|
|
26
|
+
super().__init__()
|
|
27
|
+
|
|
25
28
|
async def create(
|
|
26
29
|
self,
|
|
27
30
|
embeddings: Embeddings,
|
|
@@ -31,19 +34,42 @@ class Faiss(VectorDBStrategy):
|
|
|
31
34
|
) -> Optional[str]:
|
|
32
35
|
try:
|
|
33
36
|
chunked_docs = DocumentChunker.chunk(documents)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
37
|
+
batches = self._batch_documents_by_tokens(chunked_docs)
|
|
38
|
+
logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
|
|
39
|
+
_instance: FAISS = None
|
|
40
|
+
for i, batch in enumerate(batches):
|
|
41
|
+
batch_tokens = sum(self._count_tokens(doc.page_content) for doc in batch)
|
|
42
|
+
logging.info(f"processing batch {i+1}/{len(batches)} with {len(batch)} docs ({batch_tokens:,} tokens)")
|
|
43
|
+
# init
|
|
44
|
+
_batch_instance = await asyncio.to_thread(
|
|
45
|
+
FAISS.from_documents,
|
|
46
|
+
batch,
|
|
47
|
+
embeddings
|
|
48
|
+
)
|
|
49
|
+
# create instance from first batch
|
|
50
|
+
if _instance is None:
|
|
51
|
+
_instance = _batch_instance
|
|
52
|
+
else:
|
|
53
|
+
# merge to existing instance
|
|
54
|
+
await asyncio.to_thread(
|
|
55
|
+
_instance.merge_from,
|
|
56
|
+
_batch_instance
|
|
57
|
+
)
|
|
58
|
+
del _batch_instance
|
|
59
|
+
gc.collect()
|
|
60
|
+
# add a small delay to avoid rate limiting
|
|
61
|
+
if i < len(batches) - 1: # except last batch
|
|
62
|
+
await asyncio.sleep(1)
|
|
63
|
+
if _instance:
|
|
64
|
+
await asyncio.to_thread(_instance.save_local, storage_id)
|
|
65
|
+
self._clear_cache(storage_id)
|
|
66
|
+
logging.info(f"Successfully created {Faiss.__name__} index with {len(chunked_docs)} total documents")
|
|
41
67
|
return storage_id
|
|
42
68
|
except Exception as e:
|
|
43
69
|
logging.error(f"{Faiss.__name__} create error: {e}")
|
|
44
70
|
raise e
|
|
45
71
|
finally:
|
|
46
|
-
del documents, _instance
|
|
72
|
+
del documents, chunked_docs, _instance
|
|
47
73
|
gc.collect()
|
|
48
74
|
|
|
49
75
|
def get_loader(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.74
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -19,7 +19,7 @@ Requires-Dist: fastapi[standard]==0.115.14
|
|
|
19
19
|
Requires-Dist: chevron==0.14.0
|
|
20
20
|
Requires-Dist: langchain==0.3.26
|
|
21
21
|
Requires-Dist: langchain-community==0.3.26
|
|
22
|
-
Requires-Dist: langchain-core==0.3.
|
|
22
|
+
Requires-Dist: langchain-core==0.3.72
|
|
23
23
|
Requires-Dist: langchain-openai==0.3.27
|
|
24
24
|
Requires-Dist: langchain-anthropic==0.3.6
|
|
25
25
|
Requires-Dist: langchain-ibm==0.3.14
|
|
@@ -28,8 +28,8 @@ Requires-Dist: langchain-google-vertexai==2.0.27
|
|
|
28
28
|
Requires-Dist: langchain-groq==0.3.5
|
|
29
29
|
Requires-Dist: langchain-ollama==0.3.3
|
|
30
30
|
Requires-Dist: faiss-cpu==1.11.0
|
|
31
|
-
Requires-Dist: chromadb==1.0.
|
|
32
|
-
Requires-Dist: langchain_chroma==0.2.
|
|
31
|
+
Requires-Dist: chromadb==1.0.15
|
|
32
|
+
Requires-Dist: langchain_chroma==0.2.5
|
|
33
33
|
Requires-Dist: fastembed==0.7.1
|
|
34
34
|
Requires-Dist: langchain-qdrant==0.2.0
|
|
35
35
|
Requires-Dist: qdrant-client==1.15.0
|
|
@@ -7,7 +7,7 @@ fastapi[standard]==0.115.14
|
|
|
7
7
|
chevron==0.14.0
|
|
8
8
|
langchain==0.3.26
|
|
9
9
|
langchain-community==0.3.26
|
|
10
|
-
langchain-core==0.3.
|
|
10
|
+
langchain-core==0.3.72
|
|
11
11
|
langchain-openai==0.3.27
|
|
12
12
|
langchain-anthropic==0.3.6
|
|
13
13
|
langchain-ibm==0.3.14
|
|
@@ -16,8 +16,8 @@ langchain-google-vertexai==2.0.27
|
|
|
16
16
|
langchain-groq==0.3.5
|
|
17
17
|
langchain-ollama==0.3.3
|
|
18
18
|
faiss-cpu==1.11.0
|
|
19
|
-
chromadb==1.0.
|
|
20
|
-
langchain_chroma==0.2.
|
|
19
|
+
chromadb==1.0.15
|
|
20
|
+
langchain_chroma==0.2.5
|
|
21
21
|
fastembed==0.7.1
|
|
22
22
|
langchain-qdrant==0.2.0
|
|
23
23
|
qdrant-client==1.15.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/agent_description.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/feedbacks/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/providers/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/providers/llm_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/models/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/models/main.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/tool_builder.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/tools/tool_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/manager.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/db/qdrant.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app/llm/vector_store/loader/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.73 → ws_bom_robot_app-0.0.74}/ws_bom_robot_app.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|