PyPI - ws-bom-robot-app - Versions diffs - 0.0.39__tar.gz → 0.0.41__tar.gz - Mend

ws-bom-robot-app 0.0.39tar.gz → 0.0.41tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

{ws_bom_robot_app-0.0.39/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.41}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ws_bom_robot_app
-Version: 0.0.39
+Version: 0.0.41
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa

{ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
 setup(
     name="ws_bom_robot_app",
-    version="0.0.39",
+    version="0.0.41",
     description="A FastAPI application serving ws bom/robot/llm platform ai.",
     long_description=open("README.md", encoding='utf-8').read(),
     long_description_content_type="text/markdown",

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/base.py ADDED Viewed

@@ -0,0 +1,143 @@
+from abc import ABC, abstractmethod
+from typing import Any, Optional, List, Dict
+import asyncio
+from langchain_core.documents import Document
+from langchain_core.embeddings import Embeddings
+from langchain_core.language_models import BaseChatModel
+from langchain_core.vectorstores.base import VectorStoreRetriever, VectorStore
+from langchain.retrievers import SelfQueryRetriever
+from langchain.chains.query_constructor.schema import AttributeInfo
+class VectorDBStrategy(ABC):
+    class VectorDBStrategy:
+      """
+      A strategy interface for managing vector databases. It caches and retrieves vector
+      stores, providing mechanisms for creating them, retrieving them, and invoking
+      document searches.
+      Attributes:
+        _CACHE (dict[str, VectorStore]):
+          A dictionary that caches loaded VectoreStore(e.g. Faiss,Chroma,qDrant) indexes keyed by their storage IDs.
+      Methods:
+        create(embeddings, documents, storage_id, **kwargs):
+          Asynchronously create a vector store using the provided embeddings,
+          documents, and a unique storage identifier. Returns the created
+          store's ID or None if creation fails.
+        get_loader(embeddings, storage_id, **kwargs):
+          Retrieve a vector store loader based on the provided embeddings
+          and storage identifier. This loader can be used to perform
+          further operations like retrieving documents.
+        get_retriever(embeddings, storage_id, search_type, search_kwargs, **kwargs):
+          Retrieve a VectorStoreRetriever for searching documents. Supports
+          different search methods (e.g., similarity, mmr) and employs the
+          appropriate strategy based on the search_type argument.
+        supports_self_query():
+          Indicates whether this strategy supports self-querying functionality.
+          By default, returns True.
+        _get_self_query_retriever(llm, store, description, metadata):
+          Creates a SelfQueryRetriever using the specified language model,
+          vector store, document description, and metadata. Used internally
+          for self-querying when supported.
+        invoke(embeddings, storage_id, query, search_type, search_kwargs, **kwargs):
+          Asynchronously searches for documents based on a query. Depending
+          on arguments and available metadata, either uses a self-query
+          retriever or falls back to other retrieval methods (e.g., mixed
+          similarity and mmr).
+        _remove_duplicates(docs):
+          Removes duplicate documents by checking their page content,
+          returning a list with unique results.
+        _combine_search(retrievers, query):
+          Asynchronously invokes multiple retrievers in parallel, then merges
+          their results while removing duplicates.
+      """
+    _CACHE: dict[str, VectorStore] = {}
+    def _clear_cache(self, key: str):
+        if key in self._CACHE:
+            del self._CACHE[key]
+    @abstractmethod
+    async def create(
+        self,
+        embeddings: Embeddings,
+        documents: List[Document],
+        storage_id: str,
+        **kwargs
+    ) -> Optional[str]:
+        pass
+    @abstractmethod
+    def get_loader(
+        self,
+        embeddings: Embeddings,
+        storage_id: str,
+        **kwargs
+    ) -> VectorStore:
+        pass
+    def get_retriever(
+        self,
+        embeddings: Embeddings,
+        storage_id: str,
+        search_type: str,
+        search_kwargs: Dict[str, Any],
+        **kwargs
+    ) -> VectorStoreRetriever:
+        return self.get_loader(embeddings, storage_id).as_retriever(
+            search_type=search_type,
+            search_kwargs=search_kwargs
+        )
+    def supports_self_query(self) -> bool:
+        return True
+    @staticmethod
+    def _get_self_query_retriever(llm:BaseChatModel,store:VectorStore,description:str, metadata: list[AttributeInfo]) -> SelfQueryRetriever:
+        return SelfQueryRetriever.from_llm(
+            llm=llm,
+            vectorstore=store,
+            document_contents=description,
+            metadata_field_info=metadata,
+            enable_limit=True,
+            verbose=True
+        )
+    async def invoke(
+        self,
+        embeddings: Embeddings,
+        storage_id: str,
+        query: str,
+        search_type: str,
+        search_kwargs: Dict[str, Any],
+        **kwargs
+    ) -> List[Document]:
+        if self.supports_self_query():
+            if "app_tool" in kwargs and "llm" in kwargs:
+                from ws_bom_robot_app.llm.tools.tool_manager import LlmAppTool
+                app_tool: LlmAppTool = kwargs["app_tool"]
+                _description,_metadata=app_tool.get_vector_filtering()
+                if _description and _metadata:
+                    llm: BaseChatModel = kwargs["llm"]
+                    retriever = VectorDBStrategy._get_self_query_retriever(llm,self.get_loader(embeddings, storage_id),_description,_metadata)
+                    return await retriever.ainvoke(query)
+        if search_type == "mixed":
+            similarity_retriever = self.get_retriever(embeddings, storage_id, "similarity", search_kwargs)
+            mmr_kwargs = {
+                "k": search_kwargs.get("k", 4),
+                "fetch_k": search_kwargs.get("fetch_k", 20),
+                "lambda_mult": search_kwargs.get("lambda_mult", 0.2),
+            }
+            mmr_retriever = self.get_retriever(embeddings, storage_id, "mmr", mmr_kwargs)
+            return await VectorDBStrategy._combine_search([similarity_retriever, mmr_retriever], query)
+        retriever = self.get_retriever(embeddings, storage_id, search_type, search_kwargs)
+        return await retriever.ainvoke(query)
+    @staticmethod
+    def _remove_duplicates(docs: List[Document]) -> List[Document]:
+        seen = set()
+        return [doc for doc in docs if not (doc.page_content in seen or seen.add(doc.page_content))]
+    @staticmethod
+    async def _combine_search(
+        retrievers: List[VectorStoreRetriever],
+        query: str
+    ) -> List[Document]:
+        tasks = [retriever.ainvoke(query) for retriever in retrievers]
+        return VectorDBStrategy._remove_duplicates([doc for res in await asyncio.gather(*tasks) for doc in res])

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/chroma.py ADDED Viewed

@@ -0,0 +1,77 @@
+from langchain_chroma import Chroma as CHROMA
+from langchain_core.documents import Document
+from typing import Any, Optional
+import asyncio, gc, logging
+from langchain_core.embeddings import Embeddings
+from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
+from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
+class Chroma(VectorDBStrategy):
+    """
+    A strategy class for interacting with a Chroma-based vector store implementation.
+    This class provides methods to create a Chroma vector store from a list of documents
+    and retrieve an existing Chroma instance. The vector store can be used to perform
+    operations such as embedding documents, persisting them to a storage directory, and
+    later loading them for retrieval tasks.
+    Attributes:
+      _CACHE (dict[str, CHROMA]): A cache to store and reuse Chroma instances.
+    Methods:
+      create(embeddings, documents, storage_id, **kwargs):
+        Creates a new Chroma instance after chunking the provided documents
+        and embedding them. Persists the vector store in the given storage directory.
+        If any error occurs during creation, logs the error and returns None.
+        Args:
+          embeddings (Embeddings): The embeddings strategy used to embed documents.
+          documents (list[Document]): The list of documents to be chunked and embedded.
+          storage_id (str): The directory where the Chroma vector store should be persisted.
+          **kwargs: Additional keyword arguments.
+        Returns:
+          Optional[str]: The storage ID if creation is successful; otherwise, None.
+      get_loader(embeddings, storage_id, **kwargs):
+        Retrieves a Chroma instance from the cache if it exists;
+        otherwise, creates and caches a new instance using the given embeddings and storage ID.
+        Args:
+          embeddings (Embeddings): The embeddings strategy used to create or load the Chroma instance.
+          storage_id (str): The directory where the Chroma vector store is persisted.
+          **kwargs: Additional keyword arguments.
+        Returns:
+          CHROMA: The retrieved or newly created Chroma instance.
+    """
+    async def create(
+        self,
+        embeddings: Embeddings,
+        documents: list[Document],
+        storage_id: str,
+        **kwargs
+    ) -> Optional[str]:
+        try:
+            chunked_docs = DocumentChunker.chunk(documents)
+            await asyncio.to_thread(
+                CHROMA.from_documents,
+                documents=chunked_docs,
+                embedding=embeddings,
+                persist_directory=storage_id
+            )
+            self._clear_cache(storage_id)
+            return storage_id
+        except Exception as e:
+            logging.error(f"{Chroma.__name__} create error: {e}")
+            raise e
+        finally:
+            del documents
+            gc.collect()
+    def get_loader(
+        self,
+        embeddings: Embeddings,
+        storage_id: str,
+        **kwargs
+    ) -> CHROMA:
+        if storage_id not in self._CACHE:
+            self._CACHE[storage_id] = CHROMA(
+                collection_name="default",
+                embedding_function=embeddings,
+                persist_directory=storage_id
+            )
+        return self._CACHE[storage_id]

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/faiss.py ADDED Viewed

@@ -0,0 +1,64 @@
+from langchain_community.vectorstores.faiss import FAISS
+from langchain_core.documents import Document
+from typing import Any, Optional
+import asyncio, gc, logging
+from langchain_core.embeddings import Embeddings
+from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
+from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
+class Faiss(VectorDBStrategy):
+    """
+    Faiss is a vector database strategy that leverages a FAISS index to store and retrieve
+    vectorized documents. It provides methods for creating a new FAISS index and for
+    loading an existing index from a local directory, with an internal caching mechanism
+    to optimize repeated retrievals.
+    Methods:
+      create(
+        Asynchronously creates a FAISS index from the given documents, using the
+        provided embeddings, then saves it locally under the specified storage ID.
+        Returns the storage ID if successful, or None otherwise.
+      get_loader(
+        Retrieves a FAISS index associated with a given storage ID. If this index
+        was previously loaded and cached, it returns the cached instance; otherwise,
+        it loads the index from local storage and caches it for subsequent use.
+    """
+    async def create(
+        self,
+        embeddings: Embeddings,
+        documents: list[Document],
+        storage_id: str,
+        **kwargs
+    ) -> Optional[str]:
+        try:
+            chunked_docs = DocumentChunker.chunk(documents)
+            _instance = await asyncio.to_thread(
+                FAISS.from_documents,
+                chunked_docs,
+                embeddings
+            )
+            await asyncio.to_thread(_instance.save_local, storage_id)
+            self._clear_cache(storage_id)
+            return storage_id
+        except Exception as e:
+            logging.error(f"{Faiss.__name__} create error: {e}")
+            raise e
+        finally:
+            del documents, _instance
+            gc.collect()
+    def get_loader(
+        self,
+        embeddings: Embeddings,
+        storage_id: str,
+        **kwargs
+    ) -> FAISS:
+        if storage_id not in self._CACHE:
+            self._CACHE[storage_id] = FAISS.load_local(
+                folder_path=storage_id,
+                embeddings=embeddings,
+                allow_dangerous_deserialization=True
+            )
+        return self._CACHE[storage_id]
+    def supports_self_query(self) -> bool:
+        return False

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/manager.py ADDED Viewed

@@ -0,0 +1,15 @@
+from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
+from ws_bom_robot_app.llm.vector_store.db.chroma import Chroma
+from ws_bom_robot_app.llm.vector_store.db.faiss import Faiss
+from ws_bom_robot_app.llm.vector_store.db.qdrant import Qdrant
+class VectorDbManager:
+  _list: dict[str, VectorDBStrategy] = {
+    "chroma": Chroma(),
+    "faiss": Faiss(),
+    "qdrant": Qdrant()
+  }
+  @classmethod
+  def get_strategy(cls, name: str) -> VectorDBStrategy:
+      return cls._list.get(name.lower(), Faiss())

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/db/qdrant.py ADDED Viewed

@@ -0,0 +1,58 @@
+from langchain_qdrant import QdrantVectorStore as QDRANT, FastEmbedSparse, RetrievalMode
+from qdrant_client import QdrantClient
+from langchain_core.documents import Document
+from typing import Any, Optional
+import asyncio, gc, logging, os
+from langchain_core.embeddings import Embeddings
+from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
+from ws_bom_robot_app.llm.vector_store.db.base import VectorDBStrategy
+class Qdrant(VectorDBStrategy):
+    async def create(
+        self,
+        embeddings: Embeddings,
+        documents: list[Document],
+        storage_id: str,
+        **kwargs
+    ) -> Optional[str]:
+        try:
+            chunked_docs = DocumentChunker.chunk(documents)
+            if not os.path.exists(storage_id):
+                os.makedirs(storage_id)
+            def _create():
+              QDRANT.from_documents(
+                  documents=chunked_docs,
+                  embedding=embeddings,
+                  sparse_embedding=kwargs['sparse_embedding'] if 'sparse_embedding' in kwargs else FastEmbedSparse(),
+                  collection_name="default",
+                  path=storage_id,
+                  retrieval_mode=RetrievalMode.HYBRID
+              )
+            await asyncio.to_thread(_create)
+            self._clear_cache(storage_id)
+            return storage_id
+        except Exception as e:
+            logging.error(f"{Qdrant.__name__} create error: {e}")
+            raise e
+        finally:
+            del documents
+            gc.collect()
+    def get_loader(
+        self,
+        embeddings: Embeddings,
+        storage_id: str,
+        **kwargs
+    ) -> QDRANT:
+        if storage_id not in self._CACHE:
+            self._CACHE[storage_id] = QDRANT(
+                client=QdrantClient(path=storage_id),
+                collection_name="default",
+                embedding=embeddings,
+                sparse_embedding=FastEmbedSparse(),
+                retrieval_mode=RetrievalMode.HYBRID,
+            )
+        return self._CACHE[storage_id]

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/integration/jira.py ADDED Viewed

@@ -0,0 +1,118 @@
+import asyncio, os
+from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
+from langchain_core.documents import Document
+from ws_bom_robot_app.llm.vector_store.loader.base import Loader
+from pydantic import BaseModel, Field, AliasChoices
+from typing import Any, Optional, Union
+from unstructured_ingest.interfaces import  ProcessorConfig, ReadConfig
+from unstructured_ingest.connector.jira import SimpleJiraConfig, JiraAccessConfig, JiraSourceConnector, JiraIngestDoc, nested_object_to_field_getter, _get_id_fields_for_issue, _get_project_fields_for_issue
+from unstructured_ingest.runner import JiraRunner
+class JiraParams(BaseModel):
+  """
+  JiraParams is a Pydantic model that represents the parameters required to interact with a Jira instance.
+  Attributes:
+    url (str): The URL of the Jira instance, e.g., 'https://example.atlassian.net'.
+    access_token (str): The access token for authenticating with the Jira API.
+    user_email (str): The email address of the Jira user.
+    projects (list[str]): A list of project keys or IDs to interact with, e.g., ['SCRUM', 'PROJ1'].
+    boards (Optional[list[str]]): An optional list of board IDs to interact with. Defaults to None, e.g., ['1', '2'].
+    issues (Optional[list[str]]): An optional list of issue keys or IDs to interact with. Defaults to None, e.g., ['SCRUM-1', 'PROJ1-1'].
+  """
+  url: str = Field(..., pattern=r'^https?:\/\/.+')
+  access_token: str = Field(..., validation_alias=AliasChoices("accessToken","access_token"), min_length=1)
+  user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"), min_length=1)
+  projects: list[str]
+  boards: Optional[list[str]] | None = None
+  issues: Optional[list[str]] | None = None
+class Jira(IntegrationStrategy):
+  def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
+    super().__init__(knowledgebase_path, data)
+    self.__data = JiraParams.model_validate(self.data)
+  def working_subdirectory(self) -> str:
+    return 'jira'
+  def run(self) -> None:
+    access_config = JiraAccessConfig(
+      api_token=self.__data.access_token
+    )
+    config = SimpleJiraConfig(
+      user_email=self.__data.user_email,
+      url = self.__data.url,
+      access_config=access_config,
+      projects=self.__data.projects,
+      boards=self.__data.boards,
+      issues=self.__data.issues
+    )
+    # runner override: waiting for v2 migration https://github.com/Unstructured-IO/unstructured-ingest/issues/106
+    runner = _JiraRunner(
+      connector_config=config,
+      processor_config=ProcessorConfig(reprocess=False,verbose=False,num_processes=2,raise_on_error=False),
+      read_config=ReadConfig(download_dir=self.working_directory,re_download=True,preserve_downloads=True,download_only=True),
+      partition_config=None,
+      retry_strategy_config=None
+      )
+    runner.run()
+  async def load(self) -> list[Document]:
+      await asyncio.to_thread(self.run)
+      await asyncio.sleep(1)
+      return await Loader(self.working_directory).load()
+# region override
+class _JiraIngestDoc(JiraIngestDoc):
+  def _get_dropdown_custom_fields_for_issue(issue: dict, c_sep=" " * 5, r_sep="\n") -> str:
+      def _parse_value(value: Any) -> Any:
+          if isinstance(value, dict):
+            _candidate = ["displayName", "name", "value"]
+            for item in _candidate:
+                if item in value:
+                    return value[item]
+          return value
+      def _remap_custom_fields(fields: dict):
+        remapped_fields = {}
+        for field_key, field_value in fields.items():
+          new_key = next((map_item["name"] for map_item in _JiraSourceConnector.CUSTOM_FIELDS if field_key == map_item["id"]), field_key)
+          if new_key != field_value:
+            remapped_fields[new_key] = field_value
+        return remapped_fields
+      filtered_fields = {key: _parse_value(value) for key, value in issue.items() if value is not None and type(value) not in [list]}
+      custom_fields =_remap_custom_fields(filtered_fields)
+      return (r_sep + c_sep ).join([f"{key}: {value}{r_sep}" for key, value in custom_fields.items()])
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    _issue = self.issue
+    _nested: dict = nested_object_to_field_getter(_issue["fields"])
+    document = "\n\n\n".join(
+              [
+                  _get_id_fields_for_issue(_issue),
+                  _get_project_fields_for_issue(_nested),
+                  _JiraIngestDoc._get_dropdown_custom_fields_for_issue(_nested)
+              ],
+          )
+    _full_filename = str(self.filename)
+    _file_extension  = _full_filename.split(".")[-1]
+    _file_without_extension = _full_filename.replace(f".{_file_extension}","")
+    os.makedirs(os.path.dirname(_file_without_extension), exist_ok=True)
+    with open(f"{_file_without_extension}_extra.{_file_extension}", "w", encoding="utf8") as f:
+      f.write(document)
+class _JiraSourceConnector(JiraSourceConnector):
+  CUSTOM_FIELDS: list | None = None
+  def __set_custom_fields(self) -> None:
+    _custom_fields = self.jira.get_all_custom_fields()
+    _JiraSourceConnector.CUSTOM_FIELDS = [{"id":item["id"],"name":item["name"]} for item in _custom_fields]
+    self._jira = None # fix serialization
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    if not _JiraSourceConnector.CUSTOM_FIELDS:
+      self.__set_custom_fields()
+  def get_ingest_docs(self) -> list[_JiraIngestDoc]:
+     return [_JiraIngestDoc(**item.__dict__) for item in super().get_ingest_docs()]
+class _JiraRunner(JiraRunner):
+  def get_source_connector_cls(self):
+    return _JiraSourceConnector
+# endregion

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/vector_store/loader/__init__.py ADDED Viewed

File without changes

{ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41/ws_bom_robot_app.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ws_bom_robot_app
-Version: 0.0.39
+Version: 0.0.41
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa

{ws_bom_robot_app-0.0.39 → ws_bom_robot_app-0.0.41}/ws_bom_robot_app.egg-info/SOURCES.txt RENAMED Viewed

@@ -45,6 +45,12 @@ ws_bom_robot_app/llm/utils/secrets.py
 ws_bom_robot_app/llm/utils/webhooks.py
 ws_bom_robot_app/llm/vector_store/__init__.py
 ws_bom_robot_app/llm/vector_store/generator.py
+ws_bom_robot_app/llm/vector_store/db/__init__.py
+ws_bom_robot_app/llm/vector_store/db/base.py
+ws_bom_robot_app/llm/vector_store/db/chroma.py
+ws_bom_robot_app/llm/vector_store/db/faiss.py
+ws_bom_robot_app/llm/vector_store/db/manager.py
+ws_bom_robot_app/llm/vector_store/db/qdrant.py
 ws_bom_robot_app/llm/vector_store/integration/__init__.py
 ws_bom_robot_app/llm/vector_store/integration/azure.py
 ws_bom_robot_app/llm/vector_store/integration/base.py

ws_bom_robot_app-0.0.39/ws_bom_robot_app/llm/vector_store/integration/jira.py DELETED Viewed

@@ -1,114 +0,0 @@
-import asyncio
-from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
-from unstructured_ingest.interfaces import  ProcessorConfig, ReadConfig
-from unstructured_ingest.connector.jira import SimpleJiraConfig, JiraAccessConfig
-from unstructured_ingest.runner import JiraRunner
-from langchain_core.documents import Document
-from ws_bom_robot_app.llm.vector_store.loader.base import Loader
-from pydantic import BaseModel, Field, AliasChoices
-from typing import Optional, Union
-import requests
-import unstructured_ingest.connector.jira
-class JiraParams(BaseModel):
-  """
-  JiraParams is a Pydantic model that represents the parameters required to interact with a Jira instance.
-  Attributes:
-    url (str): The URL of the Jira instance, e.g., 'https://example.atlassian.net'.
-    access_token (str): The access token for authenticating with the Jira API.
-    user_email (str): The email address of the Jira user.
-    projects (list[str]): A list of project keys or IDs to interact with, e.g., ['SCRUM', 'PROJ1'].
-    boards (Optional[list[str]]): An optional list of board IDs to interact with. Defaults to None, e.g., ['1', '2'].
-    issues (Optional[list[str]]): An optional list of issue keys or IDs to interact with. Defaults to None, e.g., ['SCRUM-1', 'PROJ1-1'].
-  """
-  url: str
-  access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
-  user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
-  projects: list[str]
-  boards: Optional[list[str]] | None = None
-  issues: Optional[list[str]] | None = None
-  fieldsMappingUrl: Optional[str] | None = None
-class Jira(IntegrationStrategy):
-  DEFAULT_C_SEP = " " * 5
-  DEFAULT_R_SEP = "\n"
-  def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
-    super().__init__(knowledgebase_path, data)
-    self.__data = JiraParams.model_validate(self.data)
-  def working_subdirectory(self) -> str:
-    return 'jira'
-  def run(self) -> None:
-    unstructured_ingest.connector.jira._get_dropdown_fields_for_issue = self._get_dropdown_fields_for_issue
-    access_config = JiraAccessConfig(
-      api_token=self.__data.access_token
-    )
-    config = SimpleJiraConfig(
-      user_email=self.__data.user_email,
-      url = self.__data.url,
-      access_config=access_config,
-      projects=self.__data.projects,
-      boards=self.__data.boards,
-      issues=self.__data.issues
-    )
-    runner = JiraRunner(
-      connector_config=config,
-      processor_config=ProcessorConfig(reprocess=False,verbose=False,num_processes=2,raise_on_error=False),
-      read_config=ReadConfig(download_dir=self.working_directory,re_download=True,preserve_downloads=True,download_only=True),
-      partition_config=None,
-      retry_strategy_config=None
-      )
-    runner.run()
-  async def load(self) -> list[Document]:
-      await asyncio.to_thread(self.run)
-      await asyncio.sleep(1)
-      return await Loader(self.working_directory).load()
-  def _remap_custom_fields(self, field_list):
-    auth = (self.__data.user_email, self.__data.access_token)
-    response = requests.get(self.__data.fieldsMappingUrl, auth=auth)
-    if response.status_code == 200:
-      mapper: dict = response.json()
-    remapped_field_list = {}
-    for field_key, field_value in field_list.items():
-        new_key = None
-        for map_item in mapper:
-            if field_key == map_item["id"]:
-                # Usa il nome mappato come nuova chiave
-                new_key = map_item["name"]
-                break
-        if new_key is None:
-            new_key = field_key
-        remapped_field_list[new_key] = field_value
-    return remapped_field_list
-  def _get_dropdown_fields_for_issue(self, issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
-      all_fields = {}
-      for key, value in issue.items():
-          if value is not None:
-              if isinstance(value, list) and (len(value) > 0):
-                  all_fields[key] = value
-              else:
-                  all_fields[key] = value
-      mapped_fields = self._remap_custom_fields(all_fields)
-      return f"""
-      IssueType:{issue["issuetype"]["name"]}
-      {r_sep}
-      Status:{issue["status"]["name"]}
-      {r_sep}
-      Priority:{issue["priority"]}
-      {r_sep}
-      AssigneeID_Name:{issue["assignee"]["accountId"]}{c_sep}{issue["assignee"]["displayName"]}
-      {r_sep}
-      ReporterAdr_Name:{issue["reporter"]["emailAddress"]}{c_sep}{issue["reporter"]["displayName"]}
-      {r_sep}
-      Labels:{c_sep.join(issue["labels"])}
-      {r_sep}
-      Components:{c_sep.join([component["name"] for component in issue["components"]])}
-      {r_sep}
-      {(r_sep + c_sep ).join([f"{key}:{value}{r_sep}" for key, value in mapped_fields.items()])}
-      """