ws-bom-robot-app 0.0.39__tar.gz → 0.0.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {ws_bom_robot_app-0.0.39/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.41}/PKG-INFO +1 -1
  2. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/setup.py +1 -1
  3. ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/base.py +143 -0
  4. ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/chroma.py +77 -0
  5. ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/faiss.py +64 -0
  6. ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/manager.py +15 -0
  7. ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/qdrant.py +58 -0
  8. ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/integration/jira.py +118 -0
  9. ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  10. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41/ws_bom_robot_app.egg-info}/PKG-INFO +1 -1
  11. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app.egg-info/SOURCES.txt +6 -0
  12. ws_bom_robot_app-0.0.39/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -114
  13. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/MANIFEST.in +0 -0
  14. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/README.md +0 -0
  15. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/pyproject.toml +0 -0
  16. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/requirements.txt +0 -0
  17. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/setup.cfg +0 -0
  18. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/__init__.py +0 -0
  19. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/auth.py +0 -0
  20. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/config.py +0 -0
  21. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/cron_manager.py +0 -0
  22. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/__init__.py +0 -0
  23. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/agent_description.py +0 -0
  24. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  25. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  26. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/api.py +0 -0
  27. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  28. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/main.py +0 -0
  29. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  30. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/models/api.py +0 -0
  31. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/models/base.py +0 -0
  32. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/models/kb.py +0 -0
  33. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
  34. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/providers/llm_manager.py +0 -0
  35. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/settings.py +0 -0
  36. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  37. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  38. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  39. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  40. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  41. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  42. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  43. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/agent.py +0 -0
  44. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
  45. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/download.py +0 -0
  46. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  47. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/print.py +0 -0
  48. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
  49. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  50. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  51. {ws_bom_robot_app-0.0.39/ws_bom_robot_app/llm/vector_store/integration → ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db}/__init__.py +0 -0
  52. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  53. {ws_bom_robot_app-0.0.39/ws_bom_robot_app/llm/vector_store/loader → ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/integration}/__init__.py +0 -0
  54. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  55. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  56. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  57. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  58. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  59. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  60. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  61. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  62. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  63. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  64. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  65. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  66. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  67. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -0
  68. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
  69. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  70. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/main.py +0 -0
  71. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/task_manager.py +0 -0
  72. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app/util.py +0 -0
  73. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  74. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app.egg-info/requires.txt +0 -0
  75. {ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.39
3
+ Version: 0.0.41
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
4
4
 
5
5
  setup(
6
6
  name="ws_bom_robot_app",
7
- version="0.0.39",
7
+ version="0.0.41",
8
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
9
9
  long_description=open("README.md", encoding='utf-8').read(),
10
10
  long_description_content_type="text/markdown",
@@ -0,0 +1,143 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Optional, List, Dict
3
+ import asyncio
4
+ from langchain_core.documents import Document
5
+ from langchain_core.embeddings import Embeddings
6
+ from langchain_core.language_models import BaseChatModel
7
+ from langchain_core.vectorstores.base import VectorStoreRetriever, VectorStore
8
+ from langchain.retrievers import SelfQueryRetriever
9
+ from langchain.chains.query_constructor.schema import AttributeInfo
10
+
11
+ class VectorDBStrategy(ABC):
12
+ class VectorDBStrategy:
13
+ """
14
+ A strategy interface for managing vector databases. It caches and retrieves vector
15
+ stores, providing mechanisms for creating them, retrieving them, and invoking
16
+ document searches.
17
+ Attributes:
18
+ _CACHE (dict[str, VectorStore]):
19
+ A dictionary that caches loaded VectoreStore(e.g. Faiss,Chroma,qDrant) indexes keyed by their storage IDs.
20
+ Methods:
21
+ create(embeddings, documents, storage_id, **kwargs):
22
+ Asynchronously create a vector store using the provided embeddings,
23
+ documents, and a unique storage identifier. Returns the created
24
+ store's ID or None if creation fails.
25
+ get_loader(embeddings, storage_id, **kwargs):
26
+ Retrieve a vector store loader based on the provided embeddings
27
+ and storage identifier. This loader can be used to perform
28
+ further operations like retrieving documents.
29
+ get_retriever(embeddings, storage_id, search_type, search_kwargs, **kwargs):
30
+ Retrieve a VectorStoreRetriever for searching documents. Supports
31
+ different search methods (e.g., similarity, mmr) and employs the
32
+ appropriate strategy based on the search_type argument.
33
+ supports_self_query():
34
+ Indicates whether this strategy supports self-querying functionality.
35
+ By default, returns True.
36
+ _get_self_query_retriever(llm, store, description, metadata):
37
+ Creates a SelfQueryRetriever using the specified language model,
38
+ vector store, document description, and metadata. Used internally
39
+ for self-querying when supported.
40
+ invoke(embeddings, storage_id, query, search_type, search_kwargs, **kwargs):
41
+ Asynchronously searches for documents based on a query. Depending
42
+ on arguments and available metadata, either uses a self-query
43
+ retriever or falls back to other retrieval methods (e.g., mixed
44
+ similarity and mmr).
45
+ _remove_duplicates(docs):
46
+ Removes duplicate documents by checking their page content,
47
+ returning a list with unique results.
48
+ _combine_search(retrievers, query):
49
+ Asynchronously invokes multiple retrievers in parallel, then merges
50
+ their results while removing duplicates.
51
+ """
52
+ _CACHE: dict[str, VectorStore] = {}
53
+ def _clear_cache(self, key: str):
54
+ if key in self._CACHE:
55
+ del self._CACHE[key]
56
+
57
+ @abstractmethod
58
+ async def create(
59
+ self,
60
+ embeddings: Embeddings,
61
+ documents: List[Document],
62
+ storage_id: str,
63
+ **kwargs
64
+ ) -> Optional[str]:
65
+ pass
66
+
67
+ @abstractmethod
68
+ def get_loader(
69
+ self,
70
+ embeddings: Embeddings,
71
+ storage_id: str,
72
+ **kwargs
73
+ ) -> VectorStore:
74
+ pass
75
+
76
+ def get_retriever(
77
+ self,
78
+ embeddings: Embeddings,
79
+ storage_id: str,
80
+ search_type: str,
81
+ search_kwargs: Dict[str, Any],
82
+ **kwargs
83
+ ) -> VectorStoreRetriever:
84
+ return self.get_loader(embeddings, storage_id).as_retriever(
85
+ search_type=search_type,
86
+ search_kwargs=search_kwargs
87
+ )
88
+
89
+ def supports_self_query(self) -> bool:
90
+ return True
91
+
92
+ @staticmethod
93
+ def _get_self_query_retriever(llm:BaseChatModel,store:VectorStore,description:str, metadata: list[AttributeInfo]) -> SelfQueryRetriever:
94
+ return SelfQueryRetriever.from_llm(
95
+ llm=llm,
96
+ vectorstore=store,
97
+ document_contents=description,
98
+ metadata_field_info=metadata,
99
+ enable_limit=True,
100
+ verbose=True
101
+ )
102
+
103
+ async def invoke(
104
+ self,
105
+ embeddings: Embeddings,
106
+ storage_id: str,
107
+ query: str,
108
+ search_type: str,
109
+ search_kwargs: Dict[str, Any],
110
+ **kwargs
111
+ ) -> List[Document]:
112
+ if self.supports_self_query():
113
+ if "app_tool" in kwargs and "llm" in kwargs:
114
+ from ws_bom_robot_app.llm.tools.tool_manager import LlmAppTool
115
+ app_tool: LlmAppTool = kwargs["app_tool"]
116
+ _description,_metadata=app_tool.get_vector_filtering()
117
+ if _description and _metadata:
118
+ llm: BaseChatModel = kwargs["llm"]
119
+ retriever = VectorDBStrategy._get_self_query_retriever(llm,self.get_loader(embeddings, storage_id),_description,_metadata)
120
+ return await retriever.ainvoke(query)
121
+ if search_type == "mixed":
122
+ similarity_retriever = self.get_retriever(embeddings, storage_id, "similarity", search_kwargs)
123
+ mmr_kwargs = {
124
+ "k": search_kwargs.get("k", 4),
125
+ "fetch_k": search_kwargs.get("fetch_k", 20),
126
+ "lambda_mult": search_kwargs.get("lambda_mult", 0.2),
127
+ }
128
+ mmr_retriever = self.get_retriever(embeddings, storage_id, "mmr", mmr_kwargs)
129
+ return await VectorDBStrategy._combine_search([similarity_retriever, mmr_retriever], query)
130
+ retriever = self.get_retriever(embeddings, storage_id, search_type, search_kwargs)
131
+ return await retriever.ainvoke(query)
132
+
133
+ @staticmethod
134
+ def _remove_duplicates(docs: List[Document]) -> List[Document]:
135
+ seen = set()
136
+ return [doc for doc in docs if not (doc.page_content in seen or seen.add(doc.page_content))]
137
+ @staticmethod
138
+ async def _combine_search(
139
+ retrievers: List[VectorStoreRetriever],
140
+ query: str
141
+ ) -> List[Document]:
142
+ tasks = [retriever.ainvoke(query) for retriever in retrievers]
143
+ return VectorDBStrategy._remove_duplicates([doc for res in await asyncio.gather(*tasks) for doc in res])
@@ -0,0 +1,77 @@
1
+ from langchain_chroma import Chroma as CHROMA
2
+ from langchain_core.documents import Document
3
+ from typing import Any, Optional
4
+ import asyncio, gc, logging
5
+ from langchain_core.embeddings import Embeddings
6
+ from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
7
+ from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
8
+
9
+
10
+ class Chroma(VectorDBStrategy):
11
+ """
12
+ A strategy class for interacting with a Chroma-based vector store implementation.
13
+ This class provides methods to create a Chroma vector store from a list of documents
14
+ and retrieve an existing Chroma instance. The vector store can be used to perform
15
+ operations such as embedding documents, persisting them to a storage directory, and
16
+ later loading them for retrieval tasks.
17
+ Attributes:
18
+ _CACHE (dict[str, CHROMA]): A cache to store and reuse Chroma instances.
19
+ Methods:
20
+ create(embeddings, documents, storage_id, **kwargs):
21
+ Creates a new Chroma instance after chunking the provided documents
22
+ and embedding them. Persists the vector store in the given storage directory.
23
+ If any error occurs during creation, logs the error and returns None.
24
+ Args:
25
+ embeddings (Embeddings): The embeddings strategy used to embed documents.
26
+ documents (list[Document]): The list of documents to be chunked and embedded.
27
+ storage_id (str): The directory where the Chroma vector store should be persisted.
28
+ **kwargs: Additional keyword arguments.
29
+ Returns:
30
+ Optional[str]: The storage ID if creation is successful; otherwise, None.
31
+ get_loader(embeddings, storage_id, **kwargs):
32
+ Retrieves a Chroma instance from the cache if it exists;
33
+ otherwise, creates and caches a new instance using the given embeddings and storage ID.
34
+ Args:
35
+ embeddings (Embeddings): The embeddings strategy used to create or load the Chroma instance.
36
+ storage_id (str): The directory where the Chroma vector store is persisted.
37
+ **kwargs: Additional keyword arguments.
38
+ Returns:
39
+ CHROMA: The retrieved or newly created Chroma instance.
40
+ """
41
+ async def create(
42
+ self,
43
+ embeddings: Embeddings,
44
+ documents: list[Document],
45
+ storage_id: str,
46
+ **kwargs
47
+ ) -> Optional[str]:
48
+ try:
49
+ chunked_docs = DocumentChunker.chunk(documents)
50
+ await asyncio.to_thread(
51
+ CHROMA.from_documents,
52
+ documents=chunked_docs,
53
+ embedding=embeddings,
54
+ persist_directory=storage_id
55
+ )
56
+ self._clear_cache(storage_id)
57
+ return storage_id
58
+ except Exception as e:
59
+ logging.error(f"{Chroma.__name__} create error: {e}")
60
+ raise e
61
+ finally:
62
+ del documents
63
+ gc.collect()
64
+
65
+ def get_loader(
66
+ self,
67
+ embeddings: Embeddings,
68
+ storage_id: str,
69
+ **kwargs
70
+ ) -> CHROMA:
71
+ if storage_id not in self._CACHE:
72
+ self._CACHE[storage_id] = CHROMA(
73
+ collection_name="default",
74
+ embedding_function=embeddings,
75
+ persist_directory=storage_id
76
+ )
77
+ return self._CACHE[storage_id]
@@ -0,0 +1,64 @@
1
+ from langchain_community.vectorstores.faiss import FAISS
2
+ from langchain_core.documents import Document
3
+ from typing import Any, Optional
4
+ import asyncio, gc, logging
5
+ from langchain_core.embeddings import Embeddings
6
+ from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
7
+ from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
8
+
9
+ class Faiss(VectorDBStrategy):
10
+ """
11
+ Faiss is a vector database strategy that leverages a FAISS index to store and retrieve
12
+ vectorized documents. It provides methods for creating a new FAISS index and for
13
+ loading an existing index from a local directory, with an internal caching mechanism
14
+ to optimize repeated retrievals.
15
+ Methods:
16
+ create(
17
+ Asynchronously creates a FAISS index from the given documents, using the
18
+ provided embeddings, then saves it locally under the specified storage ID.
19
+ Returns the storage ID if successful, or None otherwise.
20
+ get_loader(
21
+ Retrieves a FAISS index associated with a given storage ID. If this index
22
+ was previously loaded and cached, it returns the cached instance; otherwise,
23
+ it loads the index from local storage and caches it for subsequent use.
24
+ """
25
+ async def create(
26
+ self,
27
+ embeddings: Embeddings,
28
+ documents: list[Document],
29
+ storage_id: str,
30
+ **kwargs
31
+ ) -> Optional[str]:
32
+ try:
33
+ chunked_docs = DocumentChunker.chunk(documents)
34
+ _instance = await asyncio.to_thread(
35
+ FAISS.from_documents,
36
+ chunked_docs,
37
+ embeddings
38
+ )
39
+ await asyncio.to_thread(_instance.save_local, storage_id)
40
+ self._clear_cache(storage_id)
41
+ return storage_id
42
+ except Exception as e:
43
+ logging.error(f"{Faiss.__name__} create error: {e}")
44
+ raise e
45
+ finally:
46
+ del documents, _instance
47
+ gc.collect()
48
+
49
+ def get_loader(
50
+ self,
51
+ embeddings: Embeddings,
52
+ storage_id: str,
53
+ **kwargs
54
+ ) -> FAISS:
55
+ if storage_id not in self._CACHE:
56
+ self._CACHE[storage_id] = FAISS.load_local(
57
+ folder_path=storage_id,
58
+ embeddings=embeddings,
59
+ allow_dangerous_deserialization=True
60
+ )
61
+ return self._CACHE[storage_id]
62
+
63
+ def supports_self_query(self) -> bool:
64
+ return False
@@ -0,0 +1,15 @@
1
+ from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
2
+ from ws_bom_robot_app.llm.vector_store.db.chroma import Chroma
3
+ from ws_bom_robot_app.llm.vector_store.db.faiss import Faiss
4
+ from ws_bom_robot_app.llm.vector_store.db.qdrant import Qdrant
5
+
6
+ class VectorDbManager:
7
+ _list: dict[str, VectorDBStrategy] = {
8
+ "chroma": Chroma(),
9
+ "faiss": Faiss(),
10
+ "qdrant": Qdrant()
11
+ }
12
+
13
+ @classmethod
14
+ def get_strategy(cls, name: str) -> VectorDBStrategy:
15
+ return cls._list.get(name.lower(), Faiss())
@@ -0,0 +1,58 @@
1
+ from langchain_qdrant import QdrantVectorStore as QDRANT, FastEmbedSparse, RetrievalMode
2
+ from qdrant_client import QdrantClient
3
+ from langchain_core.documents import Document
4
+ from typing import Any, Optional
5
+ import asyncio, gc, logging, os
6
+ from langchain_core.embeddings import Embeddings
7
+ from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
8
+ from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
9
+
10
+
11
+ class Qdrant(VectorDBStrategy):
12
+ async def create(
13
+ self,
14
+ embeddings: Embeddings,
15
+ documents: list[Document],
16
+ storage_id: str,
17
+ **kwargs
18
+ ) -> Optional[str]:
19
+ try:
20
+ chunked_docs = DocumentChunker.chunk(documents)
21
+ if not os.path.exists(storage_id):
22
+ os.makedirs(storage_id)
23
+
24
+ def _create():
25
+ QDRANT.from_documents(
26
+ documents=chunked_docs,
27
+ embedding=embeddings,
28
+ sparse_embedding=kwargs['sparse_embedding'] if 'sparse_embedding' in kwargs else FastEmbedSparse(),
29
+ collection_name="default",
30
+ path=storage_id,
31
+ retrieval_mode=RetrievalMode.HYBRID
32
+ )
33
+
34
+ await asyncio.to_thread(_create)
35
+ self._clear_cache(storage_id)
36
+ return storage_id
37
+ except Exception as e:
38
+ logging.error(f"{Qdrant.__name__} create error: {e}")
39
+ raise e
40
+ finally:
41
+ del documents
42
+ gc.collect()
43
+
44
+ def get_loader(
45
+ self,
46
+ embeddings: Embeddings,
47
+ storage_id: str,
48
+ **kwargs
49
+ ) -> QDRANT:
50
+ if storage_id not in self._CACHE:
51
+ self._CACHE[storage_id] = QDRANT(
52
+ client=QdrantClient(path=storage_id),
53
+ collection_name="default",
54
+ embedding=embeddings,
55
+ sparse_embedding=FastEmbedSparse(),
56
+ retrieval_mode=RetrievalMode.HYBRID,
57
+ )
58
+ return self._CACHE[storage_id]
@@ -0,0 +1,118 @@
1
+ import asyncio, os
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
+ from langchain_core.documents import Document
4
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
5
+ from pydantic import BaseModel, Field, AliasChoices
6
+ from typing import Any, Optional, Union
7
+ from unstructured_ingest.interfaces import ProcessorConfig, ReadConfig
8
+ from unstructured_ingest.connector.jira import SimpleJiraConfig, JiraAccessConfig, JiraSourceConnector, JiraIngestDoc, nested_object_to_field_getter, _get_id_fields_for_issue, _get_project_fields_for_issue
9
+ from unstructured_ingest.runner import JiraRunner
10
+
11
+
12
+ class JiraParams(BaseModel):
13
+ """
14
+ JiraParams is a Pydantic model that represents the parameters required to interact with a Jira instance.
15
+
16
+ Attributes:
17
+ url (str): The URL of the Jira instance, e.g., 'https://example.atlassian.net'.
18
+ access_token (str): The access token for authenticating with the Jira API.
19
+ user_email (str): The email address of the Jira user.
20
+ projects (list[str]): A list of project keys or IDs to interact with, e.g., ['SCRUM', 'PROJ1'].
21
+ boards (Optional[list[str]]): An optional list of board IDs to interact with. Defaults to None, e.g., ['1', '2'].
22
+ issues (Optional[list[str]]): An optional list of issue keys or IDs to interact with. Defaults to None, e.g., ['SCRUM-1', 'PROJ1-1'].
23
+ """
24
+ url: str = Field(..., pattern=r'^https?:\/\/.+')
25
+ access_token: str = Field(..., validation_alias=AliasChoices("accessToken","access_token"), min_length=1)
26
+ user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"), min_length=1)
27
+ projects: list[str]
28
+ boards: Optional[list[str]] | None = None
29
+ issues: Optional[list[str]] | None = None
30
+
31
+ class Jira(IntegrationStrategy):
32
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
33
+ super().__init__(knowledgebase_path, data)
34
+ self.__data = JiraParams.model_validate(self.data)
35
+ def working_subdirectory(self) -> str:
36
+ return 'jira'
37
+ def run(self) -> None:
38
+ access_config = JiraAccessConfig(
39
+ api_token=self.__data.access_token
40
+ )
41
+ config = SimpleJiraConfig(
42
+ user_email=self.__data.user_email,
43
+ url = self.__data.url,
44
+ access_config=access_config,
45
+ projects=self.__data.projects,
46
+ boards=self.__data.boards,
47
+ issues=self.__data.issues
48
+ )
49
+ # runner override: waiting for v2 migration https://github.com/Unstructured-IO/unstructured-ingest/issues/106
50
+ runner = _JiraRunner(
51
+ connector_config=config,
52
+ processor_config=ProcessorConfig(reprocess=False,verbose=False,num_processes=2,raise_on_error=False),
53
+ read_config=ReadConfig(download_dir=self.working_directory,re_download=True,preserve_downloads=True,download_only=True),
54
+ partition_config=None,
55
+ retry_strategy_config=None
56
+ )
57
+ runner.run()
58
+ async def load(self) -> list[Document]:
59
+ await asyncio.to_thread(self.run)
60
+ await asyncio.sleep(1)
61
+ return await Loader(self.working_directory).load()
62
+
63
+
64
+ # region override
65
+ class _JiraIngestDoc(JiraIngestDoc):
66
+ def _get_dropdown_custom_fields_for_issue(issue: dict, c_sep=" " * 5, r_sep="\n") -> str:
67
+ def _parse_value(value: Any) -> Any:
68
+ if isinstance(value, dict):
69
+ _candidate = ["displayName", "name", "value"]
70
+ for item in _candidate:
71
+ if item in value:
72
+ return value[item]
73
+ return value
74
+ def _remap_custom_fields(fields: dict):
75
+ remapped_fields = {}
76
+ for field_key, field_value in fields.items():
77
+ new_key = next((map_item["name"] for map_item in _JiraSourceConnector.CUSTOM_FIELDS if field_key == map_item["id"]), field_key)
78
+ if new_key != field_value:
79
+ remapped_fields[new_key] = field_value
80
+ return remapped_fields
81
+ filtered_fields = {key: _parse_value(value) for key, value in issue.items() if value is not None and type(value) not in [list]}
82
+ custom_fields =_remap_custom_fields(filtered_fields)
83
+ return (r_sep + c_sep ).join([f"{key}: {value}{r_sep}" for key, value in custom_fields.items()])
84
+ def __init__(self, *args, **kwargs):
85
+ super().__init__(*args, **kwargs)
86
+ _issue = self.issue
87
+ _nested: dict = nested_object_to_field_getter(_issue["fields"])
88
+ document = "\n\n\n".join(
89
+ [
90
+ _get_id_fields_for_issue(_issue),
91
+ _get_project_fields_for_issue(_nested),
92
+ _JiraIngestDoc._get_dropdown_custom_fields_for_issue(_nested)
93
+ ],
94
+ )
95
+ _full_filename = str(self.filename)
96
+ _file_extension = _full_filename.split(".")[-1]
97
+ _file_without_extension = _full_filename.replace(f".{_file_extension}","")
98
+ os.makedirs(os.path.dirname(_file_without_extension), exist_ok=True)
99
+ with open(f"{_file_without_extension}_extra.{_file_extension}", "w", encoding="utf8") as f:
100
+ f.write(document)
101
+
102
+ class _JiraSourceConnector(JiraSourceConnector):
103
+ CUSTOM_FIELDS: list | None = None
104
+ def __set_custom_fields(self) -> None:
105
+ _custom_fields = self.jira.get_all_custom_fields()
106
+ _JiraSourceConnector.CUSTOM_FIELDS = [{"id":item["id"],"name":item["name"]} for item in _custom_fields]
107
+ self._jira = None # fix serialization
108
+ def __init__(self, *args, **kwargs):
109
+ super().__init__(*args, **kwargs)
110
+ if not _JiraSourceConnector.CUSTOM_FIELDS:
111
+ self.__set_custom_fields()
112
+ def get_ingest_docs(self) -> list[_JiraIngestDoc]:
113
+ return [_JiraIngestDoc(**item.__dict__) for item in super().get_ingest_docs()]
114
+
115
+ class _JiraRunner(JiraRunner):
116
+ def get_source_connector_cls(self):
117
+ return _JiraSourceConnector
118
+ # endregion
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.39
3
+ Version: 0.0.41
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -45,6 +45,12 @@ ws_bom_robot_app/llm/utils/secrets.py
45
45
  ws_bom_robot_app/llm/utils/webhooks.py
46
46
  ws_bom_robot_app/llm/vector_store/__init__.py
47
47
  ws_bom_robot_app/llm/vector_store/generator.py
48
+ ws_bom_robot_app/llm/vector_store/db/__init__.py
49
+ ws_bom_robot_app/llm/vector_store/db/base.py
50
+ ws_bom_robot_app/llm/vector_store/db/chroma.py
51
+ ws_bom_robot_app/llm/vector_store/db/faiss.py
52
+ ws_bom_robot_app/llm/vector_store/db/manager.py
53
+ ws_bom_robot_app/llm/vector_store/db/qdrant.py
48
54
  ws_bom_robot_app/llm/vector_store/integration/__init__.py
49
55
  ws_bom_robot_app/llm/vector_store/integration/azure.py
50
56
  ws_bom_robot_app/llm/vector_store/integration/base.py
@@ -1,114 +0,0 @@
1
- import asyncio
2
- from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
- from unstructured_ingest.interfaces import ProcessorConfig, ReadConfig
4
- from unstructured_ingest.connector.jira import SimpleJiraConfig, JiraAccessConfig
5
- from unstructured_ingest.runner import JiraRunner
6
- from langchain_core.documents import Document
7
- from ws_bom_robot_app.llm.vector_store.loader.base import Loader
8
- from pydantic import BaseModel, Field, AliasChoices
9
- from typing import Optional, Union
10
- import requests
11
- import unstructured_ingest.connector.jira
12
-
13
- class JiraParams(BaseModel):
14
- """
15
- JiraParams is a Pydantic model that represents the parameters required to interact with a Jira instance.
16
-
17
- Attributes:
18
- url (str): The URL of the Jira instance, e.g., 'https://example.atlassian.net'.
19
- access_token (str): The access token for authenticating with the Jira API.
20
- user_email (str): The email address of the Jira user.
21
- projects (list[str]): A list of project keys or IDs to interact with, e.g., ['SCRUM', 'PROJ1'].
22
- boards (Optional[list[str]]): An optional list of board IDs to interact with. Defaults to None, e.g., ['1', '2'].
23
- issues (Optional[list[str]]): An optional list of issue keys or IDs to interact with. Defaults to None, e.g., ['SCRUM-1', 'PROJ1-1'].
24
- """
25
- url: str
26
- access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
27
- user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
28
- projects: list[str]
29
- boards: Optional[list[str]] | None = None
30
- issues: Optional[list[str]] | None = None
31
- fieldsMappingUrl: Optional[str] | None = None
32
-
33
- class Jira(IntegrationStrategy):
34
- DEFAULT_C_SEP = " " * 5
35
- DEFAULT_R_SEP = "\n"
36
- def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
37
- super().__init__(knowledgebase_path, data)
38
- self.__data = JiraParams.model_validate(self.data)
39
- def working_subdirectory(self) -> str:
40
- return 'jira'
41
- def run(self) -> None:
42
- unstructured_ingest.connector.jira._get_dropdown_fields_for_issue = self._get_dropdown_fields_for_issue
43
- access_config = JiraAccessConfig(
44
- api_token=self.__data.access_token
45
- )
46
- config = SimpleJiraConfig(
47
- user_email=self.__data.user_email,
48
- url = self.__data.url,
49
- access_config=access_config,
50
- projects=self.__data.projects,
51
- boards=self.__data.boards,
52
- issues=self.__data.issues
53
- )
54
- runner = JiraRunner(
55
- connector_config=config,
56
- processor_config=ProcessorConfig(reprocess=False,verbose=False,num_processes=2,raise_on_error=False),
57
- read_config=ReadConfig(download_dir=self.working_directory,re_download=True,preserve_downloads=True,download_only=True),
58
- partition_config=None,
59
- retry_strategy_config=None
60
- )
61
- runner.run()
62
- async def load(self) -> list[Document]:
63
- await asyncio.to_thread(self.run)
64
- await asyncio.sleep(1)
65
- return await Loader(self.working_directory).load()
66
-
67
- def _remap_custom_fields(self, field_list):
68
- auth = (self.__data.user_email, self.__data.access_token)
69
- response = requests.get(self.__data.fieldsMappingUrl, auth=auth)
70
-
71
- if response.status_code == 200:
72
- mapper: dict = response.json()
73
- remapped_field_list = {}
74
- for field_key, field_value in field_list.items():
75
- new_key = None
76
- for map_item in mapper:
77
- if field_key == map_item["id"]:
78
- # Usa il nome mappato come nuova chiave
79
- new_key = map_item["name"]
80
- break
81
-
82
- if new_key is None:
83
- new_key = field_key
84
-
85
- remapped_field_list[new_key] = field_value
86
-
87
- return remapped_field_list
88
-
89
- def _get_dropdown_fields_for_issue(self, issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
90
- all_fields = {}
91
- for key, value in issue.items():
92
- if value is not None:
93
- if isinstance(value, list) and (len(value) > 0):
94
- all_fields[key] = value
95
- else:
96
- all_fields[key] = value
97
- mapped_fields = self._remap_custom_fields(all_fields)
98
- return f"""
99
- IssueType:{issue["issuetype"]["name"]}
100
- {r_sep}
101
- Status:{issue["status"]["name"]}
102
- {r_sep}
103
- Priority:{issue["priority"]}
104
- {r_sep}
105
- AssigneeID_Name:{issue["assignee"]["accountId"]}{c_sep}{issue["assignee"]["displayName"]}
106
- {r_sep}
107
- ReporterAdr_Name:{issue["reporter"]["emailAddress"]}{c_sep}{issue["reporter"]["displayName"]}
108
- {r_sep}
109
- Labels:{c_sep.join(issue["labels"])}
110
- {r_sep}
111
- Components:{c_sep.join([component["name"] for component in issue["components"]])}
112
- {r_sep}
113
- {(r_sep + c_sep ).join([f"{key}:{value}{r_sep}" for key, value in mapped_fields.items()])}
114
- """