ws-bom-robot-app 0.0.95__py3-none-any.whl → 0.0.96__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,38 @@
1
1
  import asyncio
2
+ import json
3
+ from pathlib import Path
2
4
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
5
  from unstructured_ingest.processes.connectors.google_drive import GoogleDriveConnectionConfig, GoogleDriveDownloaderConfig, GoogleDriveIndexerConfig, GoogleDriveAccessConfig
6
+ from unstructured_ingest.data_types.file_data import FileData as OriginalFileData, BatchFileData as OriginalBatchFileData
4
7
  from langchain_core.documents import Document
5
8
  from ws_bom_robot_app.llm.vector_store.loader.base import Loader
6
9
  from typing import Union
7
10
  from pydantic import BaseModel, Field, AliasChoices
11
+
12
+ # UTF-8 safe FileData classes
13
+ class FileData(OriginalFileData):
14
+ @classmethod
15
+ def from_file(cls, path: str):
16
+ path = Path(path).resolve()
17
+ if not path.exists() or not path.is_file():
18
+ raise ValueError(f"file path not valid: {path}")
19
+ for encoding in ['utf-8', 'cp1252', 'iso-8859-1', 'latin-1']:
20
+ try:
21
+ with open(str(path), "r", encoding=encoding) as f:
22
+ return cls.model_validate(json.load(f))
23
+ except (UnicodeDecodeError, UnicodeError):
24
+ continue
25
+ raise ValueError(f"Could not decode file {path} with any supported encoding")
26
+
27
+ def to_file(self, path: str) -> None:
28
+ path = Path(path).resolve()
29
+ path.parent.mkdir(parents=True, exist_ok=True)
30
+ with open(str(path), "w", encoding="utf-8") as f:
31
+ json.dump(self.model_dump(), f, indent=2, ensure_ascii=False)
32
+
33
+ class BatchFileData(OriginalBatchFileData, FileData):
34
+ pass
35
+
8
36
  class GoogleDriveParams(BaseModel):
9
37
  """
10
38
  GoogleDriveParams is a model that holds parameters for Google Drive integration.
@@ -42,26 +70,27 @@ class GoogleDrive(IntegrationStrategy):
42
70
  super().__init__(knowledgebase_path, data)
43
71
  self.__data = GoogleDriveParams.model_validate(self.data)
44
72
  self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
73
+ self._apply_encoding_fix()
74
+
75
+ def _apply_encoding_fix(self):
76
+ """Replace FileData classes with UTF-8 safe versions"""
77
+ import unstructured_ingest.data_types.file_data as fd
78
+ fd.FileData = FileData
79
+ fd.BatchFileData = BatchFileData
80
+ fd.file_data_from_file = lambda path: BatchFileData.from_file(path) if path else FileData.from_file(path)
81
+
45
82
  def working_subdirectory(self) -> str:
46
83
  return 'googledrive'
84
+
47
85
  def run(self) -> None:
48
- indexer_config = GoogleDriveIndexerConfig(
49
- extensions=self.__data.extensions,
50
- recursive=self.__data.recursive
51
- )
52
- downloader_config = GoogleDriveDownloaderConfig(
53
- download_dir=self.working_directory
54
- )
55
- connection_config = GoogleDriveConnectionConfig(
56
- access_config=GoogleDriveAccessConfig(
57
- service_account_key=self.__data.service_account_key
58
- ),
59
- drive_id=self.__data.drive_id
60
- )
61
86
  self.__unstructured_ingest.pipeline(
62
- indexer_config,
63
- downloader_config,
64
- connection_config).run()
87
+ GoogleDriveIndexerConfig(extensions=self.__data.extensions, recursive=self.__data.recursive),
88
+ GoogleDriveDownloaderConfig(download_dir=self.working_directory),
89
+ GoogleDriveConnectionConfig(
90
+ access_config=GoogleDriveAccessConfig(service_account_key=self.__data.service_account_key),
91
+ drive_id=self.__data.drive_id
92
+ )
93
+ ).run()
65
94
  async def load(self) -> list[Document]:
66
95
  await asyncio.to_thread(self.run)
67
96
  await asyncio.sleep(1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.95
3
+ Version: 0.0.96
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -55,7 +55,7 @@ ws_bom_robot_app/llm/vector_store/integration/confluence.py,sha256=TMmGe53tHRTgH
55
55
  ws_bom_robot_app/llm/vector_store/integration/dropbox.py,sha256=vDEVTq7xkXNvpirMkJHm90WzxcSQqCXNc8PBwzLvSH4,2626
56
56
  ws_bom_robot_app/llm/vector_store/integration/gcs.py,sha256=P-NKwNag6fkY3bzFvVkAK5Ayl5CKM8T0MvkaFFwSyT0,3181
57
57
  ws_bom_robot_app/llm/vector_store/integration/github.py,sha256=1J4Ph3s58ngEIH5HyCMeeD6lVo2GzdU8y41BvPSLZcc,2441
58
- ws_bom_robot_app/llm/vector_store/integration/googledrive.py,sha256=8vW1CZ8lmfpzIIFSsoBl4zfWDa9kFJ6Bhd2O0d7F_4M,3735
58
+ ws_bom_robot_app/llm/vector_store/integration/googledrive.py,sha256=pQQKWsAskg_6FgC4PVmKY1fMvM8BiFxlUVhh5ERBOF4,5016
59
59
  ws_bom_robot_app/llm/vector_store/integration/jira.py,sha256=LPxSXPf268FKTS3wnejssDw6_GIpEPJ3QaNgRgPnb60,6718
60
60
  ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=S5z8LK_RcsCmWvLiBX-cea44CpVAXccND47oUOJ0Yus,1898
61
61
  ws_bom_robot_app/llm/vector_store/integration/s3.py,sha256=_SAuPfyK7lIz7Jq1LiBavkF1lre5yqe6DGlMYnxMa4o,3317
@@ -69,7 +69,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
69
69
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=GjUS2oaz0LHOSal5pipBkomZtrYUNcKPSd8bzhUU5Dc,6889
70
70
  ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=IOv1A0HSIWiHWQFzI4fdApfxrKgXOqwmC3mPXlKplqQ,4012
71
71
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=LDppW0ZATo4_1hh-KlsAM3TLawBvwBxva_a7k5Oz1sc,858
72
- ws_bom_robot_app-0.0.95.dist-info/METADATA,sha256=s11Wm9YdnDgdeh8XxZxENQFOTO8KXS43zFavz1n3GmA,10116
73
- ws_bom_robot_app-0.0.95.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
74
- ws_bom_robot_app-0.0.95.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
75
- ws_bom_robot_app-0.0.95.dist-info/RECORD,,
72
+ ws_bom_robot_app-0.0.96.dist-info/METADATA,sha256=-CABRo25yuOPMqmaE_DrR1AGXZkkAY3LZDSx6jZBYXY,10116
73
+ ws_bom_robot_app-0.0.96.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
74
+ ws_bom_robot_app-0.0.96.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
75
+ ws_bom_robot_app-0.0.96.dist-info/RECORD,,