ws-bom-robot-app 0.0.94__py3-none-any.whl → 0.0.96__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,38 @@
1
1
  import asyncio
2
+ import json
3
+ from pathlib import Path
2
4
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
5
  from unstructured_ingest.processes.connectors.google_drive import GoogleDriveConnectionConfig, GoogleDriveDownloaderConfig, GoogleDriveIndexerConfig, GoogleDriveAccessConfig
6
+ from unstructured_ingest.data_types.file_data import FileData as OriginalFileData, BatchFileData as OriginalBatchFileData
4
7
  from langchain_core.documents import Document
5
8
  from ws_bom_robot_app.llm.vector_store.loader.base import Loader
6
9
  from typing import Union
7
10
  from pydantic import BaseModel, Field, AliasChoices
11
+
12
+ # UTF-8 safe FileData classes
13
+ class FileData(OriginalFileData):
14
+ @classmethod
15
+ def from_file(cls, path: str):
16
+ path = Path(path).resolve()
17
+ if not path.exists() or not path.is_file():
18
+ raise ValueError(f"file path not valid: {path}")
19
+ for encoding in ['utf-8', 'cp1252', 'iso-8859-1', 'latin-1']:
20
+ try:
21
+ with open(str(path), "r", encoding=encoding) as f:
22
+ return cls.model_validate(json.load(f))
23
+ except (UnicodeDecodeError, UnicodeError):
24
+ continue
25
+ raise ValueError(f"Could not decode file {path} with any supported encoding")
26
+
27
+ def to_file(self, path: str) -> None:
28
+ path = Path(path).resolve()
29
+ path.parent.mkdir(parents=True, exist_ok=True)
30
+ with open(str(path), "w", encoding="utf-8") as f:
31
+ json.dump(self.model_dump(), f, indent=2, ensure_ascii=False)
32
+
33
+ class BatchFileData(OriginalBatchFileData, FileData):
34
+ pass
35
+
8
36
  class GoogleDriveParams(BaseModel):
9
37
  """
10
38
  GoogleDriveParams is a model that holds parameters for Google Drive integration.
@@ -42,26 +70,27 @@ class GoogleDrive(IntegrationStrategy):
42
70
  super().__init__(knowledgebase_path, data)
43
71
  self.__data = GoogleDriveParams.model_validate(self.data)
44
72
  self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
73
+ self._apply_encoding_fix()
74
+
75
+ def _apply_encoding_fix(self):
76
+ """Replace FileData classes with UTF-8 safe versions"""
77
+ import unstructured_ingest.data_types.file_data as fd
78
+ fd.FileData = FileData
79
+ fd.BatchFileData = BatchFileData
80
+ fd.file_data_from_file = lambda path: BatchFileData.from_file(path) if path else FileData.from_file(path)
81
+
45
82
  def working_subdirectory(self) -> str:
46
83
  return 'googledrive'
84
+
47
85
  def run(self) -> None:
48
- indexer_config = GoogleDriveIndexerConfig(
49
- extensions=self.__data.extensions,
50
- recursive=self.__data.recursive
51
- )
52
- downloader_config = GoogleDriveDownloaderConfig(
53
- download_dir=self.working_directory
54
- )
55
- connection_config = GoogleDriveConnectionConfig(
56
- access_config=GoogleDriveAccessConfig(
57
- service_account_key=self.__data.service_account_key
58
- ),
59
- drive_id=self.__data.drive_id
60
- )
61
86
  self.__unstructured_ingest.pipeline(
62
- indexer_config,
63
- downloader_config,
64
- connection_config).run()
87
+ GoogleDriveIndexerConfig(extensions=self.__data.extensions, recursive=self.__data.recursive),
88
+ GoogleDriveDownloaderConfig(download_dir=self.working_directory),
89
+ GoogleDriveConnectionConfig(
90
+ access_config=GoogleDriveAccessConfig(service_account_key=self.__data.service_account_key),
91
+ drive_id=self.__data.drive_id
92
+ )
93
+ ).run()
65
94
  async def load(self) -> list[Document]:
66
95
  await asyncio.to_thread(self.run)
67
96
  await asyncio.sleep(1)
@@ -57,7 +57,7 @@ class Shopify(IntegrationStrategy):
57
57
  "Content-Type": "application/json"
58
58
  }
59
59
 
60
- all_products: List[dict] = []
60
+ all_data: List[dict] = []
61
61
  has_next_page = True
62
62
  cursor = None
63
63
  retry_count = 0
@@ -111,19 +111,19 @@ class Shopify(IntegrationStrategy):
111
111
  retry_count = 0
112
112
 
113
113
  # Estrae i dati
114
- products_data = data["data"]["products"]
115
- edges = products_data["edges"]
116
- page_info = products_data["pageInfo"]
114
+ _data = list(data["data"].values())[0]
115
+ edges = _data["edges"]
116
+ page_info = _data["pageInfo"]
117
117
 
118
- # Aggiungi i prodotti alla lista
118
+ # Aggiungi i dati alla lista
119
119
  for edge in edges:
120
- all_products.append(edge["node"])
120
+ all_data.append(edge["node"])
121
121
 
122
122
  # Aggiorna il cursore e il flag per la paginazione
123
123
  has_next_page = page_info["hasNextPage"]
124
124
  cursor = page_info["endCursor"]
125
125
 
126
- print(f"Recuperati {len(edges)} prodotti. Totale: {len(all_products)}")
126
+ print(f"Recuperati {len(edges)} prodotti. Totale: {len(all_data)}")
127
127
 
128
128
  # Piccola pausa per evitare di saturare l'API
129
129
  await asyncio.sleep(0.1)
@@ -139,5 +139,5 @@ class Shopify(IntegrationStrategy):
139
139
  else:
140
140
  raise Exception("Too many network errors. Stopping execution.")
141
141
 
142
- logging.info(f"Data retrieval completed! Total products: {len(all_products)}")
143
- return all_products
142
+ logging.info(f"Data retrieval completed! Total data: {len(all_data)}")
143
+ return all_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.94
3
+ Version: 0.0.96
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -55,13 +55,13 @@ ws_bom_robot_app/llm/vector_store/integration/confluence.py,sha256=TMmGe53tHRTgH
55
55
  ws_bom_robot_app/llm/vector_store/integration/dropbox.py,sha256=vDEVTq7xkXNvpirMkJHm90WzxcSQqCXNc8PBwzLvSH4,2626
56
56
  ws_bom_robot_app/llm/vector_store/integration/gcs.py,sha256=P-NKwNag6fkY3bzFvVkAK5Ayl5CKM8T0MvkaFFwSyT0,3181
57
57
  ws_bom_robot_app/llm/vector_store/integration/github.py,sha256=1J4Ph3s58ngEIH5HyCMeeD6lVo2GzdU8y41BvPSLZcc,2441
58
- ws_bom_robot_app/llm/vector_store/integration/googledrive.py,sha256=8vW1CZ8lmfpzIIFSsoBl4zfWDa9kFJ6Bhd2O0d7F_4M,3735
58
+ ws_bom_robot_app/llm/vector_store/integration/googledrive.py,sha256=pQQKWsAskg_6FgC4PVmKY1fMvM8BiFxlUVhh5ERBOF4,5016
59
59
  ws_bom_robot_app/llm/vector_store/integration/jira.py,sha256=LPxSXPf268FKTS3wnejssDw6_GIpEPJ3QaNgRgPnb60,6718
60
60
  ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=S5z8LK_RcsCmWvLiBX-cea44CpVAXccND47oUOJ0Yus,1898
61
61
  ws_bom_robot_app/llm/vector_store/integration/s3.py,sha256=_SAuPfyK7lIz7Jq1LiBavkF1lre5yqe6DGlMYnxMa4o,3317
62
62
  ws_bom_robot_app/llm/vector_store/integration/sftp.py,sha256=g6f-FKkEktx7nJahb7RKyQ4pM9wGik0_xXMDfWup-1c,2845
63
63
  ws_bom_robot_app/llm/vector_store/integration/sharepoint.py,sha256=DhBcAwgr1u-dQ_8TxeLPu7kzr_EDogCRQeBrIULtWfo,4898
64
- ws_bom_robot_app/llm/vector_store/integration/shopify.py,sha256=pJzd1yBo_NWSAls88wl8FSBWXPzE-N-tOT4oxiZft2A,5531
64
+ ws_bom_robot_app/llm/vector_store/integration/shopify.py,sha256=Q0W3rRV-3xox303KhSiiIxTJNIXIbMZiF7yME8dW-FE,5485
65
65
  ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=YKQ_0VUSW9NQ3svVKuas2OLk_fsTQuxg4B_zCBzKx_s,5282
66
66
  ws_bom_robot_app/llm/vector_store/integration/slack.py,sha256=hiE1kkg7868mbP2wVWQLmC1fK2jIE1lT7f8hVN0NqeY,2636
67
67
  ws_bom_robot_app/llm/vector_store/integration/thron.py,sha256=6XefkQxS-qF4yAH_sH1n2EONZvTiWiAAx_bb24y8QEQ,9330
@@ -69,7 +69,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
69
69
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=GjUS2oaz0LHOSal5pipBkomZtrYUNcKPSd8bzhUU5Dc,6889
70
70
  ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=IOv1A0HSIWiHWQFzI4fdApfxrKgXOqwmC3mPXlKplqQ,4012
71
71
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=LDppW0ZATo4_1hh-KlsAM3TLawBvwBxva_a7k5Oz1sc,858
72
- ws_bom_robot_app-0.0.94.dist-info/METADATA,sha256=n44evGwT4k5tC98mDH7o3FQZ9965owDk589PVJbg2SI,10116
73
- ws_bom_robot_app-0.0.94.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
74
- ws_bom_robot_app-0.0.94.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
75
- ws_bom_robot_app-0.0.94.dist-info/RECORD,,
72
+ ws_bom_robot_app-0.0.96.dist-info/METADATA,sha256=-CABRo25yuOPMqmaE_DrR1AGXZkkAY3LZDSx6jZBYXY,10116
73
+ ws_bom_robot_app-0.0.96.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
74
+ ws_bom_robot_app-0.0.96.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
75
+ ws_bom_robot_app-0.0.96.dist-info/RECORD,,