srx-lib-azure 0.1.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- srx_lib_azure/__init__.py +19 -2
- srx_lib_azure/blob.py +139 -25
- srx_lib_azure/document.py +262 -0
- srx_lib_azure/email.py +68 -10
- srx_lib_azure/speech.py +464 -0
- srx_lib_azure/table.py +438 -19
- srx_lib_azure-0.4.0.dist-info/METADATA +134 -0
- srx_lib_azure-0.4.0.dist-info/RECORD +9 -0
- srx_lib_azure-0.1.5.dist-info/METADATA +0 -70
- srx_lib_azure-0.1.5.dist-info/RECORD +0 -7
- {srx_lib_azure-0.1.5.dist-info → srx_lib_azure-0.4.0.dist-info}/WHEEL +0 -0
srx_lib_azure/__init__.py
CHANGED
|
@@ -1,6 +1,23 @@
|
|
|
1
1
|
from .blob import AzureBlobService
|
|
2
|
+
from .document import AzureDocumentIntelligenceService
|
|
2
3
|
from .email import EmailService
|
|
3
4
|
from .table import AzureTableService
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
|
|
6
|
+
# Optional import - only available if speech extra is installed
|
|
7
|
+
try:
|
|
8
|
+
from .speech import AzureSpeechService
|
|
9
|
+
__all__ = [
|
|
10
|
+
"AzureBlobService",
|
|
11
|
+
"AzureDocumentIntelligenceService",
|
|
12
|
+
"AzureTableService",
|
|
13
|
+
"EmailService",
|
|
14
|
+
"AzureSpeechService",
|
|
15
|
+
]
|
|
16
|
+
except ImportError:
|
|
17
|
+
# Speech SDK not installed - service not available
|
|
18
|
+
__all__ = [
|
|
19
|
+
"AzureBlobService",
|
|
20
|
+
"AzureDocumentIntelligenceService",
|
|
21
|
+
"AzureTableService",
|
|
22
|
+
"EmailService",
|
|
23
|
+
]
|
srx_lib_azure/blob.py
CHANGED
|
@@ -4,24 +4,43 @@ from datetime import datetime, timedelta, timezone
|
|
|
4
4
|
from typing import Optional, BinaryIO, Tuple
|
|
5
5
|
|
|
6
6
|
from azure.storage.blob import BlobServiceClient, BlobSasPermissions, generate_blob_sas
|
|
7
|
+
from azure.core.exceptions import (
|
|
8
|
+
ResourceNotFoundError,
|
|
9
|
+
ClientAuthenticationError,
|
|
10
|
+
HttpResponseError,
|
|
11
|
+
)
|
|
7
12
|
from fastapi import UploadFile
|
|
8
13
|
|
|
9
14
|
from loguru import logger
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
class AzureBlobService:
|
|
13
|
-
"""Minimal Azure Blob helper with SAS URL generation.
|
|
18
|
+
"""Minimal Azure Blob helper with SAS URL generation.
|
|
14
19
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
self.sas_token = os.getenv("AZURE_SAS_TOKEN")
|
|
20
|
-
self.base_blob_url = os.getenv("AZURE_BLOB_URL")
|
|
20
|
+
All configuration can be passed explicitly via constructor. If omitted, falls back
|
|
21
|
+
to environment variables. By default, it does not warn at startup when not
|
|
22
|
+
configured; operations will error if required values are missing.
|
|
23
|
+
"""
|
|
21
24
|
|
|
22
|
-
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
*,
|
|
28
|
+
connection_string: Optional[str] = None,
|
|
29
|
+
account_key: Optional[str] = None,
|
|
30
|
+
container_name: Optional[str] = None,
|
|
31
|
+
base_blob_url: Optional[str] = None,
|
|
32
|
+
sas_token: Optional[str] = None,
|
|
33
|
+
warn_if_unconfigured: bool = False,
|
|
34
|
+
) -> None:
|
|
35
|
+
self.container_name = container_name or os.getenv("AZURE_BLOB_CONTAINER", "uploads")
|
|
36
|
+
self.connection_string = connection_string or os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
37
|
+
self.account_key = account_key or os.getenv("AZURE_STORAGE_ACCOUNT_KEY")
|
|
38
|
+
self.sas_token = sas_token or os.getenv("AZURE_SAS_TOKEN")
|
|
39
|
+
self.base_blob_url = base_blob_url or os.getenv("AZURE_BLOB_URL")
|
|
40
|
+
|
|
41
|
+
if warn_if_unconfigured and not self.connection_string:
|
|
23
42
|
logger.warning(
|
|
24
|
-
"Azure Storage connection string not configured; blob operations
|
|
43
|
+
"Azure Storage connection string not configured; blob operations may fail."
|
|
25
44
|
)
|
|
26
45
|
|
|
27
46
|
def _get_blob_service(self) -> BlobServiceClient:
|
|
@@ -35,9 +54,7 @@ class AzureBlobService:
|
|
|
35
54
|
return None, None
|
|
36
55
|
try:
|
|
37
56
|
clean = self.connection_string.strip().strip('"').strip("'")
|
|
38
|
-
parts = dict(
|
|
39
|
-
seg.split("=", 1) for seg in clean.split(";") if "=" in seg
|
|
40
|
-
)
|
|
57
|
+
parts = dict(seg.split("=", 1) for seg in clean.split(";") if "=" in seg)
|
|
41
58
|
account_name = parts.get("AccountName")
|
|
42
59
|
account_key = parts.get("AccountKey") or self.account_key
|
|
43
60
|
return account_name, account_key
|
|
@@ -79,9 +96,20 @@ class AzureBlobService:
|
|
|
79
96
|
if self.base_blob_url:
|
|
80
97
|
base_url = self.base_blob_url.strip().strip('"').strip("'").rstrip("/")
|
|
81
98
|
return f"{base_url}/{blob_name}?{sas}"
|
|
82
|
-
return
|
|
99
|
+
return (
|
|
100
|
+
f"https://{account_name}.blob.core.windows.net/{self.container_name}/{blob_name}?{sas}"
|
|
101
|
+
)
|
|
83
102
|
|
|
84
103
|
async def upload_file(self, file: UploadFile, blob_path: str) -> Optional[str]:
|
|
104
|
+
"""Upload a file to Azure Blob Storage and return a SAS URL.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
file: File to upload
|
|
108
|
+
blob_path: Destination path in the container
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
SAS URL if successful, None on error
|
|
112
|
+
"""
|
|
85
113
|
if not self.connection_string:
|
|
86
114
|
logger.error("Azure Storage connection string not configured")
|
|
87
115
|
return None
|
|
@@ -91,13 +119,37 @@ class AzureBlobService:
|
|
|
91
119
|
container = client.get_container_client(self.container_name)
|
|
92
120
|
content = await file.read()
|
|
93
121
|
blob_client = container.get_blob_client(blob_path)
|
|
94
|
-
blob_client.upload_blob(
|
|
122
|
+
blob_client.upload_blob(
|
|
123
|
+
content,
|
|
124
|
+
overwrite=True,
|
|
125
|
+
content_type=file.content_type or "application/octet-stream",
|
|
126
|
+
)
|
|
95
127
|
return self._generate_sas_url(blob_path)
|
|
128
|
+
except ClientAuthenticationError as e:
|
|
129
|
+
logger.error(f"Authentication failed uploading {file.filename}: {e}")
|
|
130
|
+
return None
|
|
131
|
+
except HttpResponseError as e:
|
|
132
|
+
logger.error(
|
|
133
|
+
f"Azure service error uploading {file.filename}: {e.status_code} - {e.message}"
|
|
134
|
+
)
|
|
135
|
+
return None
|
|
96
136
|
except Exception as e:
|
|
97
|
-
logger.error(f"
|
|
137
|
+
logger.error(f"Unexpected error uploading {file.filename}: {e}")
|
|
98
138
|
return None
|
|
99
139
|
|
|
100
|
-
async def upload_stream(
|
|
140
|
+
async def upload_stream(
|
|
141
|
+
self, stream: BinaryIO, blob_path: str, content_type: str = "application/octet-stream"
|
|
142
|
+
) -> Optional[str]:
|
|
143
|
+
"""Upload a binary stream to Azure Blob Storage and return a SAS URL.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
stream: Binary stream to upload
|
|
147
|
+
blob_path: Destination path in the container
|
|
148
|
+
content_type: MIME type of the content
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
SAS URL if successful, None on error
|
|
152
|
+
"""
|
|
101
153
|
if not self.connection_string:
|
|
102
154
|
logger.error("Azure Storage connection string not configured")
|
|
103
155
|
return None
|
|
@@ -108,12 +160,27 @@ class AzureBlobService:
|
|
|
108
160
|
blob_client = container.get_blob_client(blob_path)
|
|
109
161
|
blob_client.upload_blob(stream, overwrite=True, content_type=content_type)
|
|
110
162
|
return self._generate_sas_url(blob_path)
|
|
163
|
+
except ClientAuthenticationError as e:
|
|
164
|
+
logger.error(f"Authentication failed uploading stream to {blob_path}: {e}")
|
|
165
|
+
return None
|
|
166
|
+
except HttpResponseError as e:
|
|
167
|
+
logger.error(
|
|
168
|
+
f"Azure service error uploading stream to {blob_path}: {e.status_code} - {e.message}"
|
|
169
|
+
)
|
|
170
|
+
return None
|
|
111
171
|
except Exception as e:
|
|
112
|
-
logger.error(f"
|
|
172
|
+
logger.error(f"Unexpected error uploading stream to {blob_path}: {e}")
|
|
113
173
|
return None
|
|
114
174
|
|
|
115
175
|
async def download_file(self, blob_path: str) -> Optional[bytes]:
|
|
116
|
-
"""Download a blob's content as bytes.
|
|
176
|
+
"""Download a blob's content as bytes.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
bytes if successful, None if blob doesn't exist
|
|
180
|
+
|
|
181
|
+
Raises:
|
|
182
|
+
RuntimeError: For connection/auth errors (caller should handle)
|
|
183
|
+
"""
|
|
117
184
|
if not self.connection_string:
|
|
118
185
|
logger.error("Azure Storage connection string not configured")
|
|
119
186
|
return None
|
|
@@ -125,9 +192,24 @@ class AzureBlobService:
|
|
|
125
192
|
content = download_stream.readall()
|
|
126
193
|
logger.info(f"Successfully downloaded {blob_path}")
|
|
127
194
|
return content
|
|
128
|
-
except
|
|
129
|
-
|
|
195
|
+
except ResourceNotFoundError:
|
|
196
|
+
# Blob doesn't exist - this is expected in many scenarios
|
|
197
|
+
logger.info(f"Blob not found: {blob_path}")
|
|
130
198
|
return None
|
|
199
|
+
except ClientAuthenticationError as e:
|
|
200
|
+
# Auth errors should not be retried - they need credential fixes
|
|
201
|
+
logger.error(f"Authentication failed for {blob_path}: {e}")
|
|
202
|
+
raise RuntimeError(f"Azure authentication failed: {e}") from e
|
|
203
|
+
except HttpResponseError as e:
|
|
204
|
+
# Other Azure service errors (rate limits, service issues, etc.)
|
|
205
|
+
logger.error(
|
|
206
|
+
f"Azure service error downloading {blob_path}: {e.status_code} - {e.message}"
|
|
207
|
+
)
|
|
208
|
+
raise RuntimeError(f"Azure Blob download failed for {blob_path}: {e.message}") from e
|
|
209
|
+
except Exception as e:
|
|
210
|
+
# Catch-all for unexpected errors (network, etc.)
|
|
211
|
+
logger.error(f"Unexpected error downloading {blob_path}: {e}")
|
|
212
|
+
raise RuntimeError(f"Unexpected error downloading {blob_path}: {e}") from e
|
|
131
213
|
|
|
132
214
|
async def download_to_temp_file(self, blob_path: str) -> Optional[str]:
|
|
133
215
|
"""Download a blob to a temporary file and return its path."""
|
|
@@ -135,7 +217,9 @@ class AzureBlobService:
|
|
|
135
217
|
if content is None:
|
|
136
218
|
return None
|
|
137
219
|
try:
|
|
138
|
-
with tempfile.NamedTemporaryFile(
|
|
220
|
+
with tempfile.NamedTemporaryFile(
|
|
221
|
+
delete=False, suffix=os.path.splitext(blob_path)[1]
|
|
222
|
+
) as tf:
|
|
139
223
|
tf.write(content)
|
|
140
224
|
path = tf.name
|
|
141
225
|
logger.info(f"Downloaded {blob_path} to temporary file: {path}")
|
|
@@ -158,7 +242,14 @@ class AzureBlobService:
|
|
|
158
242
|
return None
|
|
159
243
|
|
|
160
244
|
async def delete_file(self, blob_path: str) -> bool:
|
|
161
|
-
"""Delete a blob and return True on success.
|
|
245
|
+
"""Delete a blob and return True on success.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
blob_path: Path to the blob to delete
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
True if deleted successfully or blob doesn't exist, False on error
|
|
252
|
+
"""
|
|
162
253
|
if not self.connection_string:
|
|
163
254
|
logger.error("Azure Storage connection string not configured")
|
|
164
255
|
return False
|
|
@@ -169,12 +260,29 @@ class AzureBlobService:
|
|
|
169
260
|
blob_client.delete_blob()
|
|
170
261
|
logger.info(f"Successfully deleted {blob_path}")
|
|
171
262
|
return True
|
|
263
|
+
except ResourceNotFoundError:
|
|
264
|
+
# Blob already doesn't exist - this is still success
|
|
265
|
+
logger.info(f"Blob {blob_path} already deleted or doesn't exist")
|
|
266
|
+
return True
|
|
267
|
+
except ClientAuthenticationError as e:
|
|
268
|
+
logger.error(f"Authentication failed when deleting {blob_path}: {e}")
|
|
269
|
+
return False
|
|
270
|
+
except HttpResponseError as e:
|
|
271
|
+
logger.error(f"Azure service error deleting {blob_path}: {e.status_code} - {e.message}")
|
|
272
|
+
return False
|
|
172
273
|
except Exception as e:
|
|
173
|
-
logger.error(f"
|
|
274
|
+
logger.error(f"Unexpected error deleting {blob_path}: {e}")
|
|
174
275
|
return False
|
|
175
276
|
|
|
176
277
|
async def file_exists(self, blob_path: str) -> bool:
|
|
177
|
-
"""Check if a blob exists in the container.
|
|
278
|
+
"""Check if a blob exists in the container.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
blob_path: Path to the blob to check
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
True if blob exists, False otherwise (including on errors)
|
|
285
|
+
"""
|
|
178
286
|
if not self.connection_string:
|
|
179
287
|
logger.error("Azure Storage connection string not configured")
|
|
180
288
|
return False
|
|
@@ -183,6 +291,12 @@ class AzureBlobService:
|
|
|
183
291
|
container = client.get_container_client(self.container_name)
|
|
184
292
|
blob_client = container.get_blob_client(blob_path)
|
|
185
293
|
return blob_client.exists()
|
|
294
|
+
except ClientAuthenticationError as e:
|
|
295
|
+
logger.error(f"Authentication failed checking {blob_path}: {e}")
|
|
296
|
+
return False
|
|
297
|
+
except HttpResponseError as e:
|
|
298
|
+
logger.error(f"Azure service error checking {blob_path}: {e.status_code} - {e.message}")
|
|
299
|
+
return False
|
|
186
300
|
except Exception as e:
|
|
187
|
-
logger.error(f"
|
|
301
|
+
logger.error(f"Unexpected error checking existence of {blob_path}: {e}")
|
|
188
302
|
return False
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import io
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import Dict, Any, Optional
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from azure.ai.documentintelligence import DocumentIntelligenceClient
|
|
10
|
+
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, AnalyzeResult
|
|
11
|
+
from azure.core.credentials import AzureKeyCredential
|
|
12
|
+
from azure.core.exceptions import (
|
|
13
|
+
ClientAuthenticationError,
|
|
14
|
+
HttpResponseError,
|
|
15
|
+
ServiceRequestError,
|
|
16
|
+
)
|
|
17
|
+
except Exception: # pragma: no cover - optional dependency at import time
|
|
18
|
+
DocumentIntelligenceClient = None # type: ignore
|
|
19
|
+
AnalyzeDocumentRequest = None # type: ignore
|
|
20
|
+
AnalyzeResult = None # type: ignore
|
|
21
|
+
AzureKeyCredential = None # type: ignore
|
|
22
|
+
ClientAuthenticationError = None # type: ignore
|
|
23
|
+
HttpResponseError = None # type: ignore
|
|
24
|
+
ServiceRequestError = None # type: ignore
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class AzureDocumentIntelligenceService:
|
|
28
|
+
"""Wrapper for Azure Document Intelligence (OCR/Document Analysis).
|
|
29
|
+
|
|
30
|
+
Does not raise on missing configuration to keep the library optional.
|
|
31
|
+
If not configured, analysis calls return error responses with descriptive messages.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
endpoint: Optional[str] = None,
|
|
38
|
+
key: Optional[str] = None,
|
|
39
|
+
warn_if_unconfigured: bool = False,
|
|
40
|
+
):
|
|
41
|
+
"""Initialize Document Intelligence service.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
endpoint: Azure Document Intelligence endpoint URL
|
|
45
|
+
key: Azure Document Intelligence API key
|
|
46
|
+
warn_if_unconfigured: Whether to log a warning if not configured
|
|
47
|
+
"""
|
|
48
|
+
self.endpoint = endpoint or os.getenv("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT")
|
|
49
|
+
self.key = key or os.getenv("AZURE_DOCUMENT_INTELLIGENCE_KEY")
|
|
50
|
+
|
|
51
|
+
if not self.endpoint or not self.key or DocumentIntelligenceClient is None:
|
|
52
|
+
self.client = None
|
|
53
|
+
if warn_if_unconfigured:
|
|
54
|
+
logger.warning(
|
|
55
|
+
"AzureDocumentIntelligenceService not configured "
|
|
56
|
+
"(missing endpoint/key or azure-ai-documentintelligence SDK). "
|
|
57
|
+
"Calls will return error responses."
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
try:
|
|
61
|
+
self.client = DocumentIntelligenceClient(
|
|
62
|
+
endpoint=self.endpoint, credential=AzureKeyCredential(self.key)
|
|
63
|
+
)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
self.client = None
|
|
66
|
+
logger.warning("DocumentIntelligenceClient initialization failed: %s", e)
|
|
67
|
+
|
|
68
|
+
async def analyze_document_from_url(
|
|
69
|
+
self, url: str, model_id: str = "prebuilt-read"
|
|
70
|
+
) -> Dict[str, Any]:
|
|
71
|
+
"""Analyze a document from a URL using Azure Document Intelligence.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
url: URL of the document to analyze (must be accessible to Azure)
|
|
75
|
+
model_id: Model to use (default: "prebuilt-read" for OCR)
|
|
76
|
+
Other options: "prebuilt-layout", "prebuilt-invoice", etc.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Dict with analysis results:
|
|
80
|
+
- success (bool): Whether analysis succeeded
|
|
81
|
+
- content (str | None): Extracted text content
|
|
82
|
+
- pages (list[dict] | None): Page information
|
|
83
|
+
- page_count (int | None): Total number of pages
|
|
84
|
+
- confidence (float | None): Average OCR confidence (0-1)
|
|
85
|
+
- model_id (str | None): Model used
|
|
86
|
+
- metadata (dict | None): Additional metadata
|
|
87
|
+
- error (str | None): Error message if failed
|
|
88
|
+
"""
|
|
89
|
+
if not self.client:
|
|
90
|
+
logger.warning("Document analysis from URL skipped: service not configured")
|
|
91
|
+
return {
|
|
92
|
+
"success": False,
|
|
93
|
+
"error": "Document Intelligence service not configured",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
logger.info(f"Starting document analysis from URL: {url} (model: {model_id})")
|
|
98
|
+
|
|
99
|
+
# Run the blocking operation in a thread pool
|
|
100
|
+
poller = await asyncio.to_thread(
|
|
101
|
+
self.client.begin_analyze_document,
|
|
102
|
+
model_id,
|
|
103
|
+
AnalyzeDocumentRequest(url_source=url),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Wait for the result
|
|
107
|
+
result: AnalyzeResult = await asyncio.to_thread(poller.result)
|
|
108
|
+
|
|
109
|
+
logger.info(
|
|
110
|
+
f"Document analysis completed (model: {model_id}, pages: {len(result.pages or [])})"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return self._format_result(result, model_id)
|
|
114
|
+
|
|
115
|
+
except ClientAuthenticationError as e:
|
|
116
|
+
logger.error(f"Authentication failed for document analysis: {e}")
|
|
117
|
+
return {"success": False, "error": f"Authentication failed: {e}"}
|
|
118
|
+
except HttpResponseError as e:
|
|
119
|
+
logger.error(f"Azure service error analyzing document: {e.status_code} - {e.message}")
|
|
120
|
+
return {
|
|
121
|
+
"success": False,
|
|
122
|
+
"error": f"Azure service error ({e.status_code}): {e.message}",
|
|
123
|
+
}
|
|
124
|
+
except ServiceRequestError as e:
|
|
125
|
+
logger.error(f"Network error analyzing document: {e}")
|
|
126
|
+
return {"success": False, "error": f"Network error: {e}"}
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.error(f"Unexpected error analyzing document from URL: {e}")
|
|
129
|
+
return {"success": False, "error": f"Unexpected error: {e}"}
|
|
130
|
+
|
|
131
|
+
async def analyze_document_from_bytes(
|
|
132
|
+
self, file_content: bytes, model_id: str = "prebuilt-read"
|
|
133
|
+
) -> Dict[str, Any]:
|
|
134
|
+
"""Analyze a document from bytes using Azure Document Intelligence.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
file_content: Document content as bytes (PDF, image, etc.)
|
|
138
|
+
model_id: Model to use (default: "prebuilt-read" for OCR)
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dict with analysis results (same format as analyze_document_from_url)
|
|
142
|
+
"""
|
|
143
|
+
if not self.client:
|
|
144
|
+
logger.warning("Document analysis from bytes skipped: service not configured")
|
|
145
|
+
return {
|
|
146
|
+
"success": False,
|
|
147
|
+
"error": "Document Intelligence service not configured",
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
logger.info(
|
|
152
|
+
f"Starting document analysis from bytes (size: {len(file_content)} bytes, model: {model_id})"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Create a file-like object from bytes
|
|
156
|
+
file_stream = io.BytesIO(file_content)
|
|
157
|
+
|
|
158
|
+
# Run the blocking operation in a thread pool
|
|
159
|
+
poller = await asyncio.to_thread(
|
|
160
|
+
self.client.begin_analyze_document,
|
|
161
|
+
model_id,
|
|
162
|
+
body=file_stream,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Wait for the result
|
|
166
|
+
result: AnalyzeResult = await asyncio.to_thread(poller.result)
|
|
167
|
+
|
|
168
|
+
logger.info(
|
|
169
|
+
f"Document analysis completed (model: {model_id}, pages: {len(result.pages or [])})"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return self._format_result(result, model_id)
|
|
173
|
+
|
|
174
|
+
except ClientAuthenticationError as e:
|
|
175
|
+
logger.error(f"Authentication failed for document analysis: {e}")
|
|
176
|
+
return {"success": False, "error": f"Authentication failed: {e}"}
|
|
177
|
+
except HttpResponseError as e:
|
|
178
|
+
logger.error(f"Azure service error analyzing document: {e.status_code} - {e.message}")
|
|
179
|
+
return {
|
|
180
|
+
"success": False,
|
|
181
|
+
"error": f"Azure service error ({e.status_code}): {e.message}",
|
|
182
|
+
}
|
|
183
|
+
except ServiceRequestError as e:
|
|
184
|
+
logger.error(f"Network error analyzing document: {e}")
|
|
185
|
+
return {"success": False, "error": f"Network error: {e}"}
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.error(f"Unexpected error analyzing document from bytes: {e}")
|
|
188
|
+
return {"success": False, "error": f"Unexpected error: {e}"}
|
|
189
|
+
|
|
190
|
+
def _format_result(self, result: AnalyzeResult, model_id: str) -> Dict[str, Any]:
|
|
191
|
+
"""Format the AnalyzeResult into a dict response.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
result: Azure Document Intelligence AnalyzeResult
|
|
195
|
+
model_id: Model ID used for analysis
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Formatted dict with extracted content and metadata
|
|
199
|
+
"""
|
|
200
|
+
# Extract all text content
|
|
201
|
+
content_parts: list[str] = []
|
|
202
|
+
pages_info: list[Dict[str, Any]] = []
|
|
203
|
+
total_confidence = 0.0
|
|
204
|
+
confidence_count = 0
|
|
205
|
+
|
|
206
|
+
if result.pages:
|
|
207
|
+
for page in result.pages:
|
|
208
|
+
# Collect page info
|
|
209
|
+
page_info = {
|
|
210
|
+
"page_number": page.page_number,
|
|
211
|
+
"width": page.width,
|
|
212
|
+
"height": page.height,
|
|
213
|
+
"unit": page.unit,
|
|
214
|
+
"lines_count": len(page.lines or []),
|
|
215
|
+
"words_count": len(page.words or []),
|
|
216
|
+
}
|
|
217
|
+
pages_info.append(page_info)
|
|
218
|
+
|
|
219
|
+
# Extract text from lines
|
|
220
|
+
if page.lines:
|
|
221
|
+
for line in page.lines:
|
|
222
|
+
content_parts.append(line.content)
|
|
223
|
+
# Track confidence if available
|
|
224
|
+
if hasattr(line, "confidence") and line.confidence is not None:
|
|
225
|
+
total_confidence += line.confidence
|
|
226
|
+
confidence_count += 1
|
|
227
|
+
|
|
228
|
+
# Combine all content with newlines
|
|
229
|
+
full_content = "\n".join(content_parts)
|
|
230
|
+
|
|
231
|
+
# Calculate average confidence
|
|
232
|
+
avg_confidence = total_confidence / confidence_count if confidence_count > 0 else None
|
|
233
|
+
|
|
234
|
+
# Build metadata
|
|
235
|
+
metadata: Dict[str, Any] = {
|
|
236
|
+
"content_format": (
|
|
237
|
+
result.content_format if hasattr(result, "content_format") else None
|
|
238
|
+
),
|
|
239
|
+
"api_version": result.api_version if hasattr(result, "api_version") else None,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
# Add languages if detected
|
|
243
|
+
if hasattr(result, "languages") and result.languages:
|
|
244
|
+
metadata["languages"] = [
|
|
245
|
+
{"locale": lang.locale, "confidence": lang.confidence} for lang in result.languages
|
|
246
|
+
]
|
|
247
|
+
|
|
248
|
+
# Add styles if detected (e.g., handwriting)
|
|
249
|
+
if hasattr(result, "styles") and result.styles:
|
|
250
|
+
metadata["has_handwriting"] = any(
|
|
251
|
+
style.is_handwritten for style in result.styles if hasattr(style, "is_handwritten")
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return {
|
|
255
|
+
"success": True,
|
|
256
|
+
"content": full_content if full_content else None,
|
|
257
|
+
"pages": pages_info if pages_info else None,
|
|
258
|
+
"page_count": len(pages_info) if pages_info else None,
|
|
259
|
+
"confidence": avg_confidence,
|
|
260
|
+
"model_id": model_id,
|
|
261
|
+
"metadata": metadata,
|
|
262
|
+
}
|
srx_lib_azure/email.py
CHANGED
|
@@ -4,8 +4,16 @@ from typing import Dict, Any
|
|
|
4
4
|
|
|
5
5
|
try:
|
|
6
6
|
from azure.communication.email.aio import EmailClient
|
|
7
|
+
from azure.core.exceptions import (
|
|
8
|
+
ClientAuthenticationError,
|
|
9
|
+
HttpResponseError,
|
|
10
|
+
ServiceRequestError,
|
|
11
|
+
)
|
|
7
12
|
except Exception: # pragma: no cover - optional dependency at import time
|
|
8
13
|
EmailClient = None # type: ignore
|
|
14
|
+
ClientAuthenticationError = None # type: ignore
|
|
15
|
+
HttpResponseError = None # type: ignore
|
|
16
|
+
ServiceRequestError = None # type: ignore
|
|
9
17
|
|
|
10
18
|
logger = logging.getLogger(__name__)
|
|
11
19
|
|
|
@@ -17,14 +25,21 @@ class EmailService:
|
|
|
17
25
|
If not configured, send calls are skipped with a warning and a 'skipped' status.
|
|
18
26
|
"""
|
|
19
27
|
|
|
20
|
-
def __init__(
|
|
21
|
-
self
|
|
22
|
-
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
*,
|
|
31
|
+
connection_string: str | None = None,
|
|
32
|
+
sender_address: str | None = None,
|
|
33
|
+
warn_if_unconfigured: bool = False,
|
|
34
|
+
):
|
|
35
|
+
self.connection_string = connection_string or os.getenv("ACS_CONNECTION_STRING")
|
|
36
|
+
self.sender_address = sender_address or os.getenv("EMAIL_SENDER")
|
|
23
37
|
if not self.connection_string or not self.sender_address or EmailClient is None:
|
|
24
38
|
self.email_client = None
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
39
|
+
if warn_if_unconfigured:
|
|
40
|
+
logger.warning(
|
|
41
|
+
"EmailService not configured (missing ACS_CONNECTION_STRING/EMAIL_SENDER or azure SDK). Calls will be skipped."
|
|
42
|
+
)
|
|
28
43
|
else:
|
|
29
44
|
try:
|
|
30
45
|
self.email_client = EmailClient.from_connection_string(self.connection_string)
|
|
@@ -32,10 +47,27 @@ class EmailService:
|
|
|
32
47
|
self.email_client = None
|
|
33
48
|
logger.warning("EmailService initialization failed: %s", e)
|
|
34
49
|
|
|
35
|
-
async def send_notification(
|
|
50
|
+
async def send_notification(
|
|
51
|
+
self, recipient: str, subject: str, body: str, html: bool = False
|
|
52
|
+
) -> Dict[str, Any]:
|
|
53
|
+
"""Send an email notification via Azure Communication Services.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
recipient: Email address of the recipient
|
|
57
|
+
subject: Email subject line
|
|
58
|
+
body: Email body content
|
|
59
|
+
html: If True, send as HTML; otherwise plain text
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Dict with status, message, and optional message_id
|
|
63
|
+
- status: "success" | "error" | "skipped"
|
|
64
|
+
- message: Human-readable message
|
|
65
|
+
- message_id: Azure message ID (only on success)
|
|
66
|
+
"""
|
|
36
67
|
if not self.email_client or not self.sender_address:
|
|
37
68
|
logger.warning("Email skipped: service not configured")
|
|
38
69
|
return {"status": "skipped", "message": "Email service not configured"}
|
|
70
|
+
|
|
39
71
|
message = {
|
|
40
72
|
"content": {"subject": subject},
|
|
41
73
|
"recipients": {"to": [{"address": recipient}]},
|
|
@@ -45,15 +77,41 @@ class EmailService:
|
|
|
45
77
|
message["content"]["html"] = body
|
|
46
78
|
else:
|
|
47
79
|
message["content"]["plainText"] = body
|
|
80
|
+
|
|
48
81
|
try:
|
|
49
82
|
poller = await self.email_client.begin_send(message)
|
|
50
83
|
result = await poller.result()
|
|
51
84
|
message_id = result.get("id")
|
|
52
85
|
if message_id:
|
|
53
86
|
logger.info("Email sent to %s with Message ID: %s", recipient, message_id)
|
|
54
|
-
return {
|
|
87
|
+
return {
|
|
88
|
+
"status": "success",
|
|
89
|
+
"message": "Email sent successfully",
|
|
90
|
+
"message_id": message_id,
|
|
91
|
+
}
|
|
55
92
|
logger.error("Failed to send email. Result: %s", result)
|
|
56
93
|
return {"status": "error", "message": f"Failed to send email: {result}"}
|
|
94
|
+
except ClientAuthenticationError as e:
|
|
95
|
+
# Auth errors should not be retried - they need credential fixes
|
|
96
|
+
logger.error("Authentication failed sending email to %s: %s", recipient, e)
|
|
97
|
+
return {"status": "error", "message": f"Authentication failed: {e}"}
|
|
98
|
+
except HttpResponseError as e:
|
|
99
|
+
# Azure service errors (rate limits, invalid recipient, etc.)
|
|
100
|
+
logger.error(
|
|
101
|
+
"Azure service error sending email to %s: %s - %s",
|
|
102
|
+
recipient,
|
|
103
|
+
e.status_code,
|
|
104
|
+
e.message,
|
|
105
|
+
)
|
|
106
|
+
return {
|
|
107
|
+
"status": "error",
|
|
108
|
+
"message": f"Azure service error ({e.status_code}): {e.message}",
|
|
109
|
+
}
|
|
110
|
+
except ServiceRequestError as e:
|
|
111
|
+
# Network/connection errors - may be retryable
|
|
112
|
+
logger.error("Network error sending email to %s: %s", recipient, e)
|
|
113
|
+
return {"status": "error", "message": f"Network error: {e}"}
|
|
57
114
|
except Exception as e:
|
|
58
|
-
|
|
59
|
-
|
|
115
|
+
# Catch-all for unexpected errors
|
|
116
|
+
logger.error("Unexpected error sending email to %s: %s", recipient, e)
|
|
117
|
+
return {"status": "error", "message": f"Unexpected error: {e}"}
|