ws-bom-robot-app 0.0.104__py3-none-any.whl → 0.0.106__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,143 +1,147 @@
1
- import asyncio, logging, aiohttp
2
- from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
- from langchain_core.documents import Document
4
- from ws_bom_robot_app.llm.vector_store.loader.base import Loader
5
- from typing import List, Union, Optional
6
- from pydantic import BaseModel, Field, AliasChoices, field_validator
7
- import json
8
- import os
9
-
10
- class ShopifyParams(BaseModel):
11
- """
12
- ShopifyParams is a model that defines the parameters required for Shopify integration.
13
-
14
- Attributes:
15
- shop_name (str): The shop name for Shopify.
16
- access_token (str): The access token for Shopify.
17
- graphql_query (Union[str, dict]): The GraphQL query string or dict for Shopify.
18
- """
19
- shop_name: str = Field(validation_alias=AliasChoices("shopName","shop_name"))
20
- access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
21
- graphql_query: Union[str, dict] = Field(validation_alias=AliasChoices("graphqlQuery","graphql_query"))
22
-
23
- @field_validator('graphql_query')
24
- @classmethod
25
- def extract_query_string(cls, v):
26
- """Extract the query string from dict format if needed"""
27
- if isinstance(v, dict) and 'query' in v:
28
- return v['query']
29
- return v
30
-
31
- class Shopify(IntegrationStrategy):
32
- def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
33
- super().__init__(knowledgebase_path, data)
34
- self.__data = ShopifyParams.model_validate(self.data)
35
-
36
- def working_subdirectory(self) -> str:
37
- return 'shopify'
38
-
39
- async def run(self) -> None:
40
- _data = await self.__get_data()
41
- json_file_path = os.path.join(self.working_directory, 'shopify_data.json')
42
- with open(json_file_path, 'w', encoding='utf-8') as f:
43
- json.dump(_data, f, ensure_ascii=False)
44
-
45
- async def load(self) -> list[Document]:
46
- await self.run()
47
- await asyncio.sleep(1)
48
- return await Loader(self.working_directory).load()
49
-
50
- async def __get_data(self, page_size: int = 50) -> List[dict]:
51
- # URL dell'API
52
- url = f"https://{self.__data.shop_name}.myshopify.com/admin/api/2024-07/graphql.json"
53
-
54
- # Headers
55
- headers = {
56
- "X-Shopify-Access-Token": self.__data.access_token,
57
- "Content-Type": "application/json"
58
- }
59
-
60
- all_data: List[dict] = []
61
- has_next_page = True
62
- cursor = None
63
- retry_count = 0
64
- max_retries = 5
65
-
66
- while has_next_page:
67
- # Variables per la query
68
- variables = {
69
- "first": page_size
70
- }
71
-
72
- if cursor:
73
- variables["after"] = cursor
74
-
75
- # Payload della richiesta
76
- payload = {
77
- "query": self.__data.graphql_query,
78
- "variables": variables
79
- }
80
-
81
- try:
82
- # Effettua la richiesta
83
- async with aiohttp.ClientSession() as session:
84
- async with session.post(url, headers=headers, json=payload) as response:
85
- # Controlla se la risposta è JSON
86
- try:
87
- data = await response.json()
88
- except aiohttp.ContentTypeError:
89
- text = await response.text()
90
- logging.error(f"Non-JSON response received. Status code: {response.status}")
91
- logging.error(f"Content: {text}")
92
- raise Exception("Invalid response from API")
93
-
94
- # Gestione del throttling
95
- if "errors" in data:
96
- error = data["errors"][0]
97
- if error.get("extensions", {}).get("code") == "THROTTLED":
98
- retry_count += 1
99
- if retry_count > max_retries:
100
- raise Exception("Too many throttling attempts. Stopping execution.")
101
-
102
- # Aspetta un po' più a lungo ad ogni tentativo
103
- wait_time = 2 ** retry_count # Backoff esponenziale
104
- print(f"Rate limit reached. Waiting {wait_time} seconds... (Attempt {retry_count}/{max_retries})")
105
- await asyncio.sleep(wait_time)
106
- continue
107
- else:
108
- raise Exception(f"GraphQL errors: {data['errors']}")
109
-
110
- # Resetta il contatore dei retry se la richiesta è andata bene
111
- retry_count = 0
112
-
113
- # Estrae i dati
114
- _data = list(data["data"].values())[0]
115
- edges = _data["edges"]
116
- page_info = _data["pageInfo"]
117
-
118
- # Aggiungi i dati alla lista
119
- for edge in edges:
120
- all_data.append(edge["node"])
121
-
122
- # Aggiorna il cursore e il flag per la paginazione
123
- has_next_page = page_info["hasNextPage"]
124
- cursor = page_info["endCursor"]
125
-
126
- print(f"Recuperati {len(edges)} prodotti. Totale: {len(all_data)}")
127
-
128
- # Piccola pausa per evitare di saturare l'API
129
- await asyncio.sleep(0.1)
130
-
131
- except aiohttp.ClientError as e:
132
- logging.error(f"Connection error: {e}")
133
- retry_count += 1
134
- if retry_count <= max_retries:
135
- wait_time = 2 ** retry_count
136
- logging.warning(f"Retrying in {wait_time} seconds...")
137
- await asyncio.sleep(wait_time)
138
- continue
139
- else:
140
- raise Exception("Too many network errors. Stopping execution.")
141
-
142
- logging.info(f"Data retrieval completed! Total data: {len(all_data)}")
143
- return all_data
1
+ import asyncio, logging, aiohttp
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
+ from langchain_core.documents import Document
4
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
5
+ from typing import List, Union, Optional
6
+ from pydantic import BaseModel, Field, AliasChoices, field_validator
7
+ import json
8
+ import os
9
+
10
+ class ShopifyParams(BaseModel):
11
+ """
12
+ ShopifyParams is a model that defines the parameters required for Shopify integration.
13
+
14
+ Attributes:
15
+ shop_name (str): The shop name for Shopify.
16
+ access_token (str): The access token for Shopify.
17
+ graphql_query (Union[str, dict]): The GraphQL query string or dict for Shopify.
18
+ """
19
+ shop_name: str = Field(validation_alias=AliasChoices("shopName","shop_name"))
20
+ access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
21
+ graphql_query: Union[str, dict] = Field(validation_alias=AliasChoices("graphqlQuery","graphql_query"))
22
+ filter_handle: Optional[List[str]] = Field(default=None, validation_alias=AliasChoices("filterHandle","filter_handle"))
23
+
24
+ @field_validator('graphql_query')
25
+ @classmethod
26
+ def extract_query_string(cls, v):
27
+ """Extract the query string from dict format if needed"""
28
+ if isinstance(v, dict) and 'query' in v:
29
+ return v['query']
30
+ return v
31
+
32
+ class Shopify(IntegrationStrategy):
33
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
34
+ super().__init__(knowledgebase_path, data)
35
+ self.__data = ShopifyParams.model_validate(self.data)
36
+
37
+ def working_subdirectory(self) -> str:
38
+ return 'shopify'
39
+
40
+ async def run(self) -> None:
41
+ _data = await self.__get_data()
42
+ json_file_path = os.path.join(self.working_directory, 'shopify_data.json')
43
+ with open(json_file_path, 'w', encoding='utf-8') as f:
44
+ json.dump(_data, f, ensure_ascii=False)
45
+
46
+ async def load(self) -> list[Document]:
47
+ await self.run()
48
+ await asyncio.sleep(1)
49
+ return await Loader(self.working_directory).load()
50
+
51
+ async def __get_data(self, page_size: int = 50) -> List[dict]:
52
+ # URL dell'API
53
+ url = f"https://{self.__data.shop_name}.myshopify.com/admin/api/2024-07/graphql.json"
54
+
55
+ # Headers
56
+ headers = {
57
+ "X-Shopify-Access-Token": self.__data.access_token,
58
+ "Content-Type": "application/json"
59
+ }
60
+
61
+ all_data: List[dict] = []
62
+ has_next_page = True
63
+ cursor = None
64
+ retry_count = 0
65
+ max_retries = 5
66
+
67
+ while has_next_page:
68
+ # Variables per la query
69
+ variables = {
70
+ "first": page_size
71
+ }
72
+
73
+ if cursor:
74
+ variables["after"] = cursor
75
+
76
+ # Payload della richiesta
77
+ payload = {
78
+ "query": self.__data.graphql_query,
79
+ "variables": variables
80
+ }
81
+
82
+ try:
83
+ # Effettua la richiesta
84
+ async with aiohttp.ClientSession() as session:
85
+ async with session.post(url, headers=headers, json=payload) as response:
86
+ # Controlla se la risposta è JSON
87
+ try:
88
+ data = await response.json()
89
+ except aiohttp.ContentTypeError:
90
+ text = await response.text()
91
+ logging.error(f"Non-JSON response received. Status code: {response.status}")
92
+ logging.error(f"Content: {text}")
93
+ raise Exception("Invalid response from API")
94
+
95
+ # Gestione del throttling
96
+ if "errors" in data:
97
+ error = data["errors"][0]
98
+ if error.get("extensions", {}).get("code") == "THROTTLED":
99
+ retry_count += 1
100
+ if retry_count > max_retries:
101
+ raise Exception("Too many throttling attempts. Stopping execution.")
102
+
103
+ # Aspetta un po' più a lungo ad ogni tentativo
104
+ wait_time = 2 ** retry_count # Backoff esponenziale
105
+ print(f"Rate limit reached. Waiting {wait_time} seconds... (Attempt {retry_count}/{max_retries})")
106
+ await asyncio.sleep(wait_time)
107
+ continue
108
+ else:
109
+ raise Exception(f"GraphQL errors: {data['errors']}")
110
+
111
+ # Resetta il contatore dei retry se la richiesta è andata bene
112
+ retry_count = 0
113
+
114
+ # Estrae i dati
115
+ _data = list(data["data"].values())[0]
116
+ edges = _data["edges"]
117
+ page_info = _data["pageInfo"]
118
+
119
+ # Aggiungi i dati alla lista
120
+ for edge in edges:
121
+ all_data.append(edge["node"])
122
+
123
+ # Aggiorna il cursore e il flag per la paginazione
124
+ has_next_page = page_info["hasNextPage"]
125
+ cursor = page_info["endCursor"]
126
+
127
+ print(f"Recuperati {len(edges)} prodotti. Totale: {len(all_data)}")
128
+ await asyncio.sleep(0.1)
129
+
130
+ except aiohttp.ClientError as e:
131
+ logging.error(f"Connection error: {e}")
132
+ retry_count += 1
133
+ if retry_count <= max_retries:
134
+ wait_time = 2 ** retry_count
135
+ logging.warning(f"Retrying in {wait_time} seconds...")
136
+ await asyncio.sleep(wait_time)
137
+ continue
138
+ else:
139
+ raise Exception("Too many network errors. Stopping execution.")
140
+
141
+ logging.info(f"Data retrieval completed! Total data: {len(all_data)}")
142
+ return self.__filter_by_handle(all_data)
143
+
144
+ def __filter_by_handle(self, data: List[dict]) -> List[dict]:
145
+ if not self.__data.filter_handle:
146
+ return data
147
+ return [item for item in data if item.get('handle') not in self.__data.filter_handle]