streamlit-octostar-utils 0.4.1__tar.gz → 0.4.2.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/pyproject.toml +1 -1
- streamlit_octostar_utils-0.4.2.dev2/streamlit_octostar_utils/api_crafter/contents.py +914 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/nifi.py +51 -53
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/timestamp.py +4 -1
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/LICENSE +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/README.md +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/language.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/relationships.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
|
@@ -0,0 +1,914 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Contents abstraction for NiFi entities.
|
|
3
|
+
|
|
4
|
+
Provides a FileIO-like interface for handling entity contents with support for:
|
|
5
|
+
- Full io.BufferedIOBase compatibility (read, write, seek, tell, etc.)
|
|
6
|
+
- Seeking and streaming with HTTP Range requests
|
|
7
|
+
- Multiple storage backends (memory, workspace attachments)
|
|
8
|
+
- Serialization/deserialization via from_locator()/to_locator() methods
|
|
9
|
+
- Lazy loading and efficient partial reads for large files
|
|
10
|
+
|
|
11
|
+
Storage Backends:
|
|
12
|
+
- MemoryContents: In-memory buffer (like BytesIO)
|
|
13
|
+
- WorkspaceAttachmentContents: Octostar workspace attachments with HTTP Range support
|
|
14
|
+
- TemporaryAttachmentContents: Octostar temporary blob storage (user temp bucket)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
from typing import Optional, Dict, Any, Union, BinaryIO, List
|
|
19
|
+
from enum import Enum
|
|
20
|
+
import base64
|
|
21
|
+
import httpx
|
|
22
|
+
import logging
|
|
23
|
+
from io import BytesIO, SEEK_SET, SEEK_CUR, SEEK_END
|
|
24
|
+
from copy import deepcopy
|
|
25
|
+
|
|
26
|
+
_logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ContentsLocation(Enum):
|
|
30
|
+
"""Enumeration of supported content storage locations."""
|
|
31
|
+
MEMORY = "memory"
|
|
32
|
+
WORKSPACE_ATTACHMENT = "workspace_attachment"
|
|
33
|
+
TEMPORARY_ATTACHMENT = "temporary_attachment"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Contents(ABC):
|
|
37
|
+
"""
|
|
38
|
+
Abstract base class for entity contents.
|
|
39
|
+
|
|
40
|
+
Provides a FileIO-like interface with support for seeking, streaming,
|
|
41
|
+
and multiple storage backends. Each implementation manages its own
|
|
42
|
+
locator dictionary for serialization/deserialization.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
entity_type: Optional[str] = None,
|
|
48
|
+
filetype: Optional[str] = None,
|
|
49
|
+
**kwargs
|
|
50
|
+
):
|
|
51
|
+
self._entity_type = entity_type
|
|
52
|
+
self._filetype = filetype
|
|
53
|
+
self._closed = False
|
|
54
|
+
|
|
55
|
+
# ==================== FileIO Interface ====================
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def read(self, size: int = -1) -> bytes:
|
|
59
|
+
"""
|
|
60
|
+
Read and return up to size bytes, or all bytes if size is -1.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
size: Number of bytes to read. -1 means read all.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Bytes read from the stream.
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def write(self, b: bytes) -> int:
|
|
72
|
+
"""
|
|
73
|
+
Write bytes to the stream.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
b: Bytes to write.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Number of bytes written.
|
|
80
|
+
"""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
85
|
+
"""
|
|
86
|
+
Change stream position.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
offset: Offset relative to whence.
|
|
90
|
+
whence: SEEK_SET (0) = from start, SEEK_CUR (1) = from current, SEEK_END (2) = from end.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
New absolute position.
|
|
94
|
+
"""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def tell(self) -> int:
|
|
99
|
+
"""
|
|
100
|
+
Return current stream position.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Current position in bytes.
|
|
104
|
+
"""
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def readable(self) -> bool:
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
def writable(self) -> bool:
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
def seekable(self) -> bool:
|
|
114
|
+
return True
|
|
115
|
+
|
|
116
|
+
@abstractmethod
|
|
117
|
+
def flush(self):
|
|
118
|
+
"""Flush write buffers."""
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
def close(self):
|
|
123
|
+
"""Close the stream and release resources."""
|
|
124
|
+
self._closed = True
|
|
125
|
+
|
|
126
|
+
@abstractmethod
|
|
127
|
+
def delete(self):
|
|
128
|
+
"""Delete the contents from their storage backend."""
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def closed(self) -> bool:
|
|
133
|
+
"""Check if stream is closed."""
|
|
134
|
+
return self._closed
|
|
135
|
+
|
|
136
|
+
@abstractmethod
|
|
137
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
138
|
+
"""
|
|
139
|
+
Resize the stream to the given size.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
size: New size in bytes. If None, use current position.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
New size.
|
|
146
|
+
"""
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
def __enter__(self):
|
|
150
|
+
"""Context manager entry."""
|
|
151
|
+
return self
|
|
152
|
+
|
|
153
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
154
|
+
"""Context manager exit."""
|
|
155
|
+
self.close()
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
def readline(self, size: int = -1) -> bytes:
|
|
159
|
+
"""
|
|
160
|
+
Read and return one line from the stream.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
size: Maximum number of bytes to read. -1 means no limit.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Bytes up to and including the newline character, or until EOF.
|
|
167
|
+
"""
|
|
168
|
+
result = bytearray()
|
|
169
|
+
while True:
|
|
170
|
+
if size >= 0 and len(result) >= size:
|
|
171
|
+
break
|
|
172
|
+
byte = self.read(1)
|
|
173
|
+
if not byte:
|
|
174
|
+
break
|
|
175
|
+
result.extend(byte)
|
|
176
|
+
if byte == b'\n':
|
|
177
|
+
break
|
|
178
|
+
return bytes(result)
|
|
179
|
+
|
|
180
|
+
def readlines(self, hint: int = -1) -> List[bytes]:
|
|
181
|
+
"""
|
|
182
|
+
Read and return a list of lines from the stream.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
hint: Optional size hint. If total size of lines exceeds hint, no more lines are read.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
List of lines.
|
|
189
|
+
"""
|
|
190
|
+
lines = []
|
|
191
|
+
total_size = 0
|
|
192
|
+
while True:
|
|
193
|
+
line = self.readline()
|
|
194
|
+
if not line:
|
|
195
|
+
break
|
|
196
|
+
lines.append(line)
|
|
197
|
+
total_size += len(line)
|
|
198
|
+
if hint >= 0 and total_size >= hint:
|
|
199
|
+
break
|
|
200
|
+
return lines
|
|
201
|
+
|
|
202
|
+
def writelines(self, lines: List[bytes]):
|
|
203
|
+
"""
|
|
204
|
+
Write a list of lines to the stream.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
lines: List of byte strings to write.
|
|
208
|
+
"""
|
|
209
|
+
for line in lines:
|
|
210
|
+
self.write(line)
|
|
211
|
+
|
|
212
|
+
def readinto(self, b: bytearray) -> int:
|
|
213
|
+
"""
|
|
214
|
+
Read bytes into a pre-allocated buffer.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
b: Pre-allocated bytearray to read into.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Number of bytes read.
|
|
221
|
+
"""
|
|
222
|
+
data = self.read(len(b))
|
|
223
|
+
n = len(data)
|
|
224
|
+
b[:n] = data
|
|
225
|
+
return n
|
|
226
|
+
|
|
227
|
+
def read1(self, size: int = -1) -> bytes:
|
|
228
|
+
"""
|
|
229
|
+
Read up to size bytes with at most one read() call to the underlying stream.
|
|
230
|
+
|
|
231
|
+
For most implementations, this is the same as read(). Subclasses may override
|
|
232
|
+
for optimization.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
size: Number of bytes to read. -1 means read all available.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Bytes read from the stream.
|
|
239
|
+
"""
|
|
240
|
+
return self.read(size)
|
|
241
|
+
|
|
242
|
+
# ==================== Locator Interface ====================
|
|
243
|
+
|
|
244
|
+
@abstractmethod
|
|
245
|
+
def to_locator(self) -> Dict[str, Any]:
|
|
246
|
+
"""
|
|
247
|
+
Serialize contents to a locator dictionary.
|
|
248
|
+
|
|
249
|
+
The locator contains:
|
|
250
|
+
- location: The storage backend type
|
|
251
|
+
- Additional backend-specific parameters
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Dictionary describing how to locate/access this content.
|
|
255
|
+
"""
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
@staticmethod
|
|
259
|
+
def from_locator(locator: Optional[Dict[str, Any]], client=None) -> Optional['Contents']:
|
|
260
|
+
"""
|
|
261
|
+
Deserialize contents from a locator dictionary.
|
|
262
|
+
|
|
263
|
+
Factory method that creates the appropriate Contents subclass based on the
|
|
264
|
+
locator's "location" field. This replaces the old base64.b64decode logic in NiFi.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
locator: Locator dictionary (e.g., from request.contents_pointer)
|
|
268
|
+
client: Optional Octostar client for remote operations
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Contents instance or None if locator is None/empty.
|
|
272
|
+
|
|
273
|
+
Raises:
|
|
274
|
+
ValueError: If the location type is unknown
|
|
275
|
+
"""
|
|
276
|
+
if not locator:
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
location = locator.get("location")
|
|
280
|
+
|
|
281
|
+
match location:
|
|
282
|
+
case ContentsLocation.MEMORY.value:
|
|
283
|
+
return MemoryContents._from_locator(locator)
|
|
284
|
+
case ContentsLocation.WORKSPACE_ATTACHMENT.value:
|
|
285
|
+
return WorkspaceAttachmentContents._from_locator(locator, client)
|
|
286
|
+
case ContentsLocation.TEMPORARY_ATTACHMENT.value:
|
|
287
|
+
return TemporaryAttachmentContents._from_locator(locator, client)
|
|
288
|
+
case _:
|
|
289
|
+
raise ValueError(f"Unknown contents location type: {location}")
|
|
290
|
+
|
|
291
|
+
# ==================== Utility Methods ====================
|
|
292
|
+
|
|
293
|
+
def read_all(self) -> bytes:
|
|
294
|
+
"""Read all contents and return as bytes."""
|
|
295
|
+
current_pos = self.tell()
|
|
296
|
+
self.seek(0, SEEK_SET)
|
|
297
|
+
data = self.read()
|
|
298
|
+
self.seek(current_pos, SEEK_SET)
|
|
299
|
+
return data
|
|
300
|
+
|
|
301
|
+
def getvalue(self) -> bytes:
|
|
302
|
+
"""
|
|
303
|
+
Return the entire contents without moving the position.
|
|
304
|
+
|
|
305
|
+
This matches BytesIO.getvalue() behavior.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Entire contents as bytes.
|
|
309
|
+
"""
|
|
310
|
+
return self.read_all()
|
|
311
|
+
|
|
312
|
+
def write_all(self, data: bytes):
|
|
313
|
+
"""Write all data, replacing existing contents."""
|
|
314
|
+
self.seek(0, SEEK_SET)
|
|
315
|
+
self.truncate(0)
|
|
316
|
+
self.write(data)
|
|
317
|
+
self.flush()
|
|
318
|
+
|
|
319
|
+
def __len__(self) -> int:
|
|
320
|
+
"""
|
|
321
|
+
Return the length of the contents.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Total size in bytes.
|
|
325
|
+
"""
|
|
326
|
+
current_pos = self.tell()
|
|
327
|
+
self.seek(0, SEEK_END)
|
|
328
|
+
length = self.tell()
|
|
329
|
+
self.seek(current_pos, SEEK_SET)
|
|
330
|
+
return length
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
class MemoryContents(Contents):
|
|
334
|
+
"""In-memory contents implementation using BytesIO."""
|
|
335
|
+
|
|
336
|
+
def __init__(
|
|
337
|
+
self,
|
|
338
|
+
entity_type: Optional[str] = None,
|
|
339
|
+
filetype: Optional[str] = None,
|
|
340
|
+
*,
|
|
341
|
+
initial_data: Optional[bytes] = None,
|
|
342
|
+
**kwargs
|
|
343
|
+
):
|
|
344
|
+
super().__init__(entity_type, filetype, **kwargs)
|
|
345
|
+
self._buffer = BytesIO(initial_data or b"")
|
|
346
|
+
|
|
347
|
+
def read(self, size: int = -1) -> bytes:
|
|
348
|
+
return self._buffer.read(size)
|
|
349
|
+
|
|
350
|
+
def write(self, b: bytes) -> int:
|
|
351
|
+
return self._buffer.write(b)
|
|
352
|
+
|
|
353
|
+
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
354
|
+
return self._buffer.seek(offset, whence)
|
|
355
|
+
|
|
356
|
+
def tell(self) -> int:
|
|
357
|
+
return self._buffer.tell()
|
|
358
|
+
|
|
359
|
+
def flush(self):
|
|
360
|
+
self._buffer.flush()
|
|
361
|
+
|
|
362
|
+
def close(self):
|
|
363
|
+
if not self._closed:
|
|
364
|
+
self._buffer.close()
|
|
365
|
+
super().close()
|
|
366
|
+
|
|
367
|
+
def delete(self):
|
|
368
|
+
"""Clear the in-memory buffer."""
|
|
369
|
+
self._buffer = BytesIO(b"")
|
|
370
|
+
|
|
371
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
372
|
+
return self._buffer.truncate(size)
|
|
373
|
+
|
|
374
|
+
def getvalue(self) -> bytes:
|
|
375
|
+
return self._buffer.getvalue()
|
|
376
|
+
|
|
377
|
+
def to_locator(self) -> Dict[str, Any]:
|
|
378
|
+
"""
|
|
379
|
+
Serialize to locator with base64-encoded data.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
{"location": "memory", "data": "<base64>", "entity_type": "...", "filetype": "..."}
|
|
383
|
+
"""
|
|
384
|
+
data = self._buffer.getvalue()
|
|
385
|
+
locator = {
|
|
386
|
+
"location": ContentsLocation.MEMORY.value,
|
|
387
|
+
"data": base64.b64encode(data).decode('utf-8') if data else None
|
|
388
|
+
}
|
|
389
|
+
if self._entity_type:
|
|
390
|
+
locator["entity_type"] = self._entity_type
|
|
391
|
+
if self._filetype:
|
|
392
|
+
locator["filetype"] = self._filetype
|
|
393
|
+
return locator
|
|
394
|
+
|
|
395
|
+
@staticmethod
|
|
396
|
+
def _from_locator(locator: Dict[str, Any]) -> 'MemoryContents':
|
|
397
|
+
"""
|
|
398
|
+
Create MemoryContents from a locator dictionary.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
locator: Locator dictionary with base64-encoded data
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
New MemoryContents instance
|
|
405
|
+
"""
|
|
406
|
+
data = locator.get("data")
|
|
407
|
+
initial_data = base64.b64decode(data) if data else None
|
|
408
|
+
return MemoryContents(
|
|
409
|
+
entity_type=locator.get("entity_type"),
|
|
410
|
+
filetype=locator.get("filetype"),
|
|
411
|
+
initial_data=initial_data
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
class WorkspaceAttachmentContents(Contents):
|
|
416
|
+
"""
|
|
417
|
+
Contents implementation for Octostar workspace attachments.
|
|
418
|
+
|
|
419
|
+
Uses octostar-api utilities (read_attachment, get_attachment_url) to:
|
|
420
|
+
- Lazy load from workspace storage via read_attachment()
|
|
421
|
+
- Stream efficiently with HTTP Range requests (no need to download entire file)
|
|
422
|
+
- Support true seeking with configurable chunk sizes
|
|
423
|
+
- Buffer modifications in memory (write back handled at NiFi entity level)
|
|
424
|
+
|
|
425
|
+
Presigned URL Handling:
|
|
426
|
+
- URLs are obtained via get_attachment_url() and cached
|
|
427
|
+
- On 403 (Forbidden) responses, URLs are refreshed and requests retried
|
|
428
|
+
- Follows the same retry pattern as octostar-api read_file utilities
|
|
429
|
+
"""
|
|
430
|
+
|
|
431
|
+
DEFAULT_CHUNK_SIZE = 8192
|
|
432
|
+
DEFAULT_URL_TIMEOUT = 120
|
|
433
|
+
|
|
434
|
+
def __init__(
|
|
435
|
+
self,
|
|
436
|
+
entity_type: Optional[str] = None,
|
|
437
|
+
filetype: Optional[str] = None,
|
|
438
|
+
*,
|
|
439
|
+
workspace_id: str,
|
|
440
|
+
entity_id: str,
|
|
441
|
+
client,
|
|
442
|
+
initial_data: Optional[bytes] = None,
|
|
443
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
|
444
|
+
**kwargs
|
|
445
|
+
):
|
|
446
|
+
super().__init__(entity_type, filetype, **kwargs)
|
|
447
|
+
self._workspace_id = workspace_id
|
|
448
|
+
self._entity_id = entity_id
|
|
449
|
+
self._client = client
|
|
450
|
+
self._chunk_size = chunk_size
|
|
451
|
+
|
|
452
|
+
self._buffer: Optional[BytesIO] = None
|
|
453
|
+
self._fully_loaded = False
|
|
454
|
+
self._modified = False
|
|
455
|
+
self._position = 0
|
|
456
|
+
self._size: Optional[int] = None
|
|
457
|
+
self._presigned_url: Optional[str] = None
|
|
458
|
+
self._http_client: Optional[httpx.Client] = None
|
|
459
|
+
|
|
460
|
+
if initial_data is not None:
|
|
461
|
+
self._buffer = BytesIO(initial_data)
|
|
462
|
+
self._fully_loaded = True
|
|
463
|
+
self._size = len(initial_data)
|
|
464
|
+
|
|
465
|
+
def _get_presigned_url(self) -> str:
|
|
466
|
+
"""Get or refresh the presigned URL for the attachment."""
|
|
467
|
+
from octostar.utils.workspace import get_attachment_url
|
|
468
|
+
|
|
469
|
+
return get_attachment_url.sync(
|
|
470
|
+
os_workspace=self._workspace_id,
|
|
471
|
+
os_entity_uid=self._entity_id,
|
|
472
|
+
client=self._client
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
def _ensure_http_client(self):
|
|
476
|
+
"""Ensure HTTP client is initialized."""
|
|
477
|
+
if not self._http_client:
|
|
478
|
+
self._http_client = httpx.Client(timeout=self.DEFAULT_URL_TIMEOUT)
|
|
479
|
+
|
|
480
|
+
def _fetch_size(self) -> int:
|
|
481
|
+
"""
|
|
482
|
+
Fetch the size of the remote attachment using HEAD request.
|
|
483
|
+
|
|
484
|
+
Handles presigned URL expiration with automatic refresh and retry.
|
|
485
|
+
"""
|
|
486
|
+
if self._size is not None:
|
|
487
|
+
return self._size
|
|
488
|
+
|
|
489
|
+
if not self._presigned_url:
|
|
490
|
+
self._presigned_url = self._get_presigned_url()
|
|
491
|
+
|
|
492
|
+
self._ensure_http_client()
|
|
493
|
+
max_retries = 3
|
|
494
|
+
|
|
495
|
+
for attempt in range(max_retries):
|
|
496
|
+
response = self._http_client.head(self._presigned_url)
|
|
497
|
+
if response.status_code == 200:
|
|
498
|
+
self._size = int(response.headers.get('content-length', 0))
|
|
499
|
+
return self._size
|
|
500
|
+
elif response.status_code == 403:
|
|
501
|
+
_logger.debug(
|
|
502
|
+
f"Presigned URL expired while fetching size, refreshing... (attempt {attempt + 1}/{max_retries})"
|
|
503
|
+
)
|
|
504
|
+
self._presigned_url = self._get_presigned_url()
|
|
505
|
+
continue
|
|
506
|
+
else:
|
|
507
|
+
response.raise_for_status()
|
|
508
|
+
|
|
509
|
+
raise ConnectionError(
|
|
510
|
+
f"Failed to fetch file size after {max_retries} attempts (URL kept expiring)"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
def _read_range(self, start: int, end: int) -> bytes:
|
|
514
|
+
"""
|
|
515
|
+
Read a specific byte range using HTTP Range request.
|
|
516
|
+
|
|
517
|
+
Handles presigned URL expiration with automatic refresh and retry.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
start: Start byte (inclusive)
|
|
521
|
+
end: End byte (inclusive)
|
|
522
|
+
|
|
523
|
+
Returns:
|
|
524
|
+
Bytes from the specified range
|
|
525
|
+
"""
|
|
526
|
+
if not self._presigned_url:
|
|
527
|
+
self._presigned_url = self._get_presigned_url()
|
|
528
|
+
|
|
529
|
+
self._ensure_http_client()
|
|
530
|
+
|
|
531
|
+
headers = {"Range": f"bytes={start}-{end}"}
|
|
532
|
+
max_retries = 3
|
|
533
|
+
|
|
534
|
+
for attempt in range(max_retries):
|
|
535
|
+
response = self._http_client.get(self._presigned_url, headers=headers)
|
|
536
|
+
|
|
537
|
+
if response.status_code == 206:
|
|
538
|
+
return response.content
|
|
539
|
+
elif response.status_code == 416:
|
|
540
|
+
return b""
|
|
541
|
+
elif response.status_code == 403:
|
|
542
|
+
_logger.debug(
|
|
543
|
+
f"Presigned URL expired at byte {start}, refreshing... (attempt {attempt + 1}/{max_retries})"
|
|
544
|
+
)
|
|
545
|
+
self._presigned_url = self._get_presigned_url()
|
|
546
|
+
continue
|
|
547
|
+
else:
|
|
548
|
+
response.raise_for_status()
|
|
549
|
+
return response.content
|
|
550
|
+
|
|
551
|
+
raise ConnectionError(
|
|
552
|
+
f"Failed to read range {start}-{end} after {max_retries} attempts (URL kept expiring)"
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
def _load_full(self):
|
|
556
|
+
"""Load the entire attachment into memory buffer using read_attachment()."""
|
|
557
|
+
if self._fully_loaded:
|
|
558
|
+
return
|
|
559
|
+
|
|
560
|
+
from octostar.utils.workspace import read_attachment
|
|
561
|
+
|
|
562
|
+
data = read_attachment.sync(
|
|
563
|
+
os_workspace=self._workspace_id,
|
|
564
|
+
os_entity_uid=self._entity_id,
|
|
565
|
+
decode=False,
|
|
566
|
+
stream=False,
|
|
567
|
+
client=self._client
|
|
568
|
+
)
|
|
569
|
+
self._buffer = BytesIO(data or b"")
|
|
570
|
+
self._fully_loaded = True
|
|
571
|
+
self._size = len(data) if data else 0
|
|
572
|
+
self._position = 0
|
|
573
|
+
|
|
574
|
+
def read(self, size: int = -1) -> bytes:
|
|
575
|
+
if self._fully_loaded:
|
|
576
|
+
if not self._buffer:
|
|
577
|
+
self._load_full()
|
|
578
|
+
return self._buffer.read(size)
|
|
579
|
+
|
|
580
|
+
# Otherwise, use HTTP Range requests for efficient streaming
|
|
581
|
+
if size == -1:
|
|
582
|
+
# Read all from current position
|
|
583
|
+
self._fetch_size()
|
|
584
|
+
if self._position >= self._size:
|
|
585
|
+
return b""
|
|
586
|
+
size = self._size - self._position
|
|
587
|
+
|
|
588
|
+
if size <= 0:
|
|
589
|
+
return b""
|
|
590
|
+
|
|
591
|
+
# Fetch the size if we don't know it
|
|
592
|
+
self._fetch_size()
|
|
593
|
+
|
|
594
|
+
# Adjust size if it goes beyond the end
|
|
595
|
+
if self._position + size > self._size:
|
|
596
|
+
size = self._size - self._position
|
|
597
|
+
|
|
598
|
+
if size <= 0:
|
|
599
|
+
return b""
|
|
600
|
+
|
|
601
|
+
# Read using HTTP Range request
|
|
602
|
+
end_byte = self._position + size - 1
|
|
603
|
+
data = self._read_range(self._position, end_byte)
|
|
604
|
+
self._position += len(data)
|
|
605
|
+
|
|
606
|
+
return data
|
|
607
|
+
|
|
608
|
+
def write(self, b: bytes) -> int:
|
|
609
|
+
if not self._buffer:
|
|
610
|
+
self._load_full()
|
|
611
|
+
|
|
612
|
+
n = self._buffer.write(b)
|
|
613
|
+
self._modified = True
|
|
614
|
+
return n
|
|
615
|
+
|
|
616
|
+
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
617
|
+
# Calculate new position
|
|
618
|
+
if whence == SEEK_SET:
|
|
619
|
+
new_pos = offset
|
|
620
|
+
elif whence == SEEK_CUR:
|
|
621
|
+
if self._buffer and self._fully_loaded:
|
|
622
|
+
new_pos = self._buffer.tell() + offset
|
|
623
|
+
else:
|
|
624
|
+
new_pos = self._position + offset
|
|
625
|
+
elif whence == SEEK_END:
|
|
626
|
+
self._fetch_size()
|
|
627
|
+
new_pos = self._size + offset
|
|
628
|
+
else:
|
|
629
|
+
raise ValueError(f"Invalid whence value: {whence}")
|
|
630
|
+
|
|
631
|
+
if new_pos < 0:
|
|
632
|
+
raise ValueError("Negative seek position")
|
|
633
|
+
|
|
634
|
+
# If fully loaded, use buffer seek
|
|
635
|
+
if self._buffer and self._fully_loaded:
|
|
636
|
+
return self._buffer.seek(new_pos, SEEK_SET)
|
|
637
|
+
|
|
638
|
+
# Otherwise, just update position (HTTP Range will handle it)
|
|
639
|
+
self._position = new_pos
|
|
640
|
+
return self._position
|
|
641
|
+
|
|
642
|
+
def tell(self) -> int:
|
|
643
|
+
if self._buffer and self._fully_loaded:
|
|
644
|
+
return self._buffer.tell()
|
|
645
|
+
return self._position
|
|
646
|
+
|
|
647
|
+
def flush(self):
|
|
648
|
+
"""Flush the internal buffer and write to workspace if modified."""
|
|
649
|
+
if self._buffer:
|
|
650
|
+
self._buffer.flush()
|
|
651
|
+
|
|
652
|
+
if not self._modified or not self._buffer:
|
|
653
|
+
return
|
|
654
|
+
|
|
655
|
+
if not self._entity_type or not self._filetype:
|
|
656
|
+
raise ValueError("entity_type and filetype required to flush to workspace")
|
|
657
|
+
|
|
658
|
+
from octostar.utils.workspace import write_attachment
|
|
659
|
+
|
|
660
|
+
current_pos = self._buffer.tell()
|
|
661
|
+
self._buffer.seek(0, SEEK_SET)
|
|
662
|
+
data = self._buffer.read()
|
|
663
|
+
self._buffer.seek(current_pos, SEEK_SET)
|
|
664
|
+
|
|
665
|
+
write_attachment.sync(
|
|
666
|
+
os_workspace=self._workspace_id,
|
|
667
|
+
os_entity_uid=self._entity_id,
|
|
668
|
+
entity_type=self._entity_type,
|
|
669
|
+
filetype=self._filetype,
|
|
670
|
+
file=data,
|
|
671
|
+
client=self._client
|
|
672
|
+
)
|
|
673
|
+
self._modified = False
|
|
674
|
+
|
|
675
|
+
def close(self):
|
|
676
|
+
if not self._closed:
|
|
677
|
+
if self._modified:
|
|
678
|
+
self.flush()
|
|
679
|
+
if self._buffer:
|
|
680
|
+
self._buffer.close()
|
|
681
|
+
if self._http_client:
|
|
682
|
+
self._http_client.close()
|
|
683
|
+
self._http_client = None
|
|
684
|
+
super().close()
|
|
685
|
+
|
|
686
|
+
def delete(self):
|
|
687
|
+
"""Delete the entity from the workspace using delete_entity()."""
|
|
688
|
+
from octostar.utils.workspace import delete_entity
|
|
689
|
+
|
|
690
|
+
delete_entity.sync(
|
|
691
|
+
os_entity_uid=self._entity_id,
|
|
692
|
+
client=self._client
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
696
|
+
if not self._buffer:
|
|
697
|
+
self._load_full()
|
|
698
|
+
self._modified = True
|
|
699
|
+
return self._buffer.truncate(size)
|
|
700
|
+
|
|
701
|
+
def getvalue(self) -> bytes:
|
|
702
|
+
if not self._buffer or not self._fully_loaded:
|
|
703
|
+
self._load_full()
|
|
704
|
+
return self._buffer.getvalue()
|
|
705
|
+
|
|
706
|
+
def to_locator(self) -> Dict[str, Any]:
|
|
707
|
+
"""
|
|
708
|
+
Serialize to locator with workspace and entity ID.
|
|
709
|
+
|
|
710
|
+
Returns:
|
|
711
|
+
{"location": "workspace_attachment", "pointer": "workspace_id/entity_id",
|
|
712
|
+
"entity_type": "...", "filetype": "..."}
|
|
713
|
+
"""
|
|
714
|
+
if self._workspace_id and self._entity_id:
|
|
715
|
+
pointer = f"{self._workspace_id}/{self._entity_id}"
|
|
716
|
+
else:
|
|
717
|
+
pointer = None
|
|
718
|
+
|
|
719
|
+
locator = {
|
|
720
|
+
"location": ContentsLocation.WORKSPACE_ATTACHMENT.value,
|
|
721
|
+
"pointer": pointer
|
|
722
|
+
}
|
|
723
|
+
if self._entity_type:
|
|
724
|
+
locator["entity_type"] = self._entity_type
|
|
725
|
+
if self._filetype:
|
|
726
|
+
locator["filetype"] = self._filetype
|
|
727
|
+
return locator
|
|
728
|
+
|
|
729
|
+
@staticmethod
|
|
730
|
+
def _from_locator(locator: Dict[str, Any], client=None) -> 'WorkspaceAttachmentContents':
|
|
731
|
+
"""
|
|
732
|
+
Create WorkspaceAttachmentContents from a locator dictionary.
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
locator: Locator dictionary with pointer "workspace_id/entity_id"
|
|
736
|
+
client: Octostar client for remote operations
|
|
737
|
+
|
|
738
|
+
Returns:
|
|
739
|
+
New WorkspaceAttachmentContents instance
|
|
740
|
+
"""
|
|
741
|
+
pointer = locator.get("pointer")
|
|
742
|
+
workspace_id = None
|
|
743
|
+
entity_id = None
|
|
744
|
+
|
|
745
|
+
if pointer:
|
|
746
|
+
parts = pointer.split("/")
|
|
747
|
+
if len(parts) >= 2:
|
|
748
|
+
workspace_id = parts[0]
|
|
749
|
+
entity_id = parts[-1]
|
|
750
|
+
|
|
751
|
+
return WorkspaceAttachmentContents(
|
|
752
|
+
entity_type=locator.get("entity_type"),
|
|
753
|
+
filetype=locator.get("filetype"),
|
|
754
|
+
workspace_id=workspace_id,
|
|
755
|
+
entity_id=entity_id,
|
|
756
|
+
client=client
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
class TemporaryAttachmentContents(Contents):
|
|
761
|
+
"""
|
|
762
|
+
Contents implementation for Octostar temporary blob storage.
|
|
763
|
+
|
|
764
|
+
Uses octostar-api utilities (read_temporary_blob, write_temporary_blob,
|
|
765
|
+
delete_temporary_blob) to store files in the user's temporary S3 bucket.
|
|
766
|
+
|
|
767
|
+
Temporary blobs are keyed by filename (not workspace/entity), and are not
|
|
768
|
+
associated with any workspace entity. Use WorkspaceAttachmentContents for that.
|
|
769
|
+
"""
|
|
770
|
+
|
|
771
|
+
def __init__(
|
|
772
|
+
self,
|
|
773
|
+
entity_type: Optional[str] = None,
|
|
774
|
+
filetype: Optional[str] = None,
|
|
775
|
+
*,
|
|
776
|
+
filename: str,
|
|
777
|
+
client,
|
|
778
|
+
initial_data: Optional[bytes] = None,
|
|
779
|
+
**kwargs
|
|
780
|
+
):
|
|
781
|
+
super().__init__(entity_type, filetype, **kwargs)
|
|
782
|
+
self._filename = filename
|
|
783
|
+
self._client = client
|
|
784
|
+
|
|
785
|
+
self._buffer: Optional[BytesIO] = None
|
|
786
|
+
self._fully_loaded = False
|
|
787
|
+
self._modified = False
|
|
788
|
+
|
|
789
|
+
if initial_data is not None:
|
|
790
|
+
self._buffer = BytesIO(initial_data)
|
|
791
|
+
self._fully_loaded = True
|
|
792
|
+
|
|
793
|
+
def _load_full(self):
|
|
794
|
+
"""Load the entire blob into memory using read_temporary_blob()."""
|
|
795
|
+
if self._fully_loaded:
|
|
796
|
+
return
|
|
797
|
+
|
|
798
|
+
from octostar.utils.workspace import read_temporary_blob
|
|
799
|
+
|
|
800
|
+
data = read_temporary_blob.sync(
|
|
801
|
+
filename=self._filename,
|
|
802
|
+
decode=False,
|
|
803
|
+
client=self._client
|
|
804
|
+
)
|
|
805
|
+
self._buffer = BytesIO(data or b"")
|
|
806
|
+
self._fully_loaded = True
|
|
807
|
+
|
|
808
|
+
def read(self, size: int = -1) -> bytes:
|
|
809
|
+
if not self._buffer:
|
|
810
|
+
self._load_full()
|
|
811
|
+
return self._buffer.read(size)
|
|
812
|
+
|
|
813
|
+
def write(self, b: bytes) -> int:
|
|
814
|
+
if not self._buffer:
|
|
815
|
+
self._load_full()
|
|
816
|
+
n = self._buffer.write(b)
|
|
817
|
+
self._modified = True
|
|
818
|
+
return n
|
|
819
|
+
|
|
820
|
+
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
821
|
+
if not self._buffer:
|
|
822
|
+
self._load_full()
|
|
823
|
+
return self._buffer.seek(offset, whence)
|
|
824
|
+
|
|
825
|
+
def tell(self) -> int:
|
|
826
|
+
if not self._buffer:
|
|
827
|
+
self._load_full()
|
|
828
|
+
return self._buffer.tell()
|
|
829
|
+
|
|
830
|
+
def flush(self):
|
|
831
|
+
"""Flush the internal buffer and write to temp bucket if modified."""
|
|
832
|
+
if self._buffer:
|
|
833
|
+
self._buffer.flush()
|
|
834
|
+
|
|
835
|
+
if not self._modified or not self._buffer:
|
|
836
|
+
return
|
|
837
|
+
|
|
838
|
+
from octostar.utils.workspace import write_temporary_blob
|
|
839
|
+
|
|
840
|
+
current_pos = self._buffer.tell()
|
|
841
|
+
self._buffer.seek(0, SEEK_SET)
|
|
842
|
+
data = self._buffer.read()
|
|
843
|
+
self._buffer.seek(current_pos, SEEK_SET)
|
|
844
|
+
|
|
845
|
+
write_temporary_blob.sync(
|
|
846
|
+
filename=self._filename,
|
|
847
|
+
file=data,
|
|
848
|
+
client=self._client
|
|
849
|
+
)
|
|
850
|
+
self._modified = False
|
|
851
|
+
|
|
852
|
+
def close(self):
|
|
853
|
+
if not self._closed:
|
|
854
|
+
if self._modified:
|
|
855
|
+
self.flush()
|
|
856
|
+
if self._buffer:
|
|
857
|
+
self._buffer.close()
|
|
858
|
+
super().close()
|
|
859
|
+
|
|
860
|
+
def delete(self):
|
|
861
|
+
"""Delete the blob from the temporary bucket."""
|
|
862
|
+
from octostar.utils.workspace import delete_temporary_blob
|
|
863
|
+
|
|
864
|
+
delete_temporary_blob.sync(
|
|
865
|
+
filename=self._filename,
|
|
866
|
+
client=self._client
|
|
867
|
+
)
|
|
868
|
+
|
|
869
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
870
|
+
if not self._buffer:
|
|
871
|
+
self._load_full()
|
|
872
|
+
self._modified = True
|
|
873
|
+
return self._buffer.truncate(size)
|
|
874
|
+
|
|
875
|
+
def getvalue(self) -> bytes:
|
|
876
|
+
if not self._buffer or not self._fully_loaded:
|
|
877
|
+
self._load_full()
|
|
878
|
+
return self._buffer.getvalue()
|
|
879
|
+
|
|
880
|
+
def to_locator(self) -> Dict[str, Any]:
|
|
881
|
+
"""
|
|
882
|
+
Serialize to locator with filename.
|
|
883
|
+
|
|
884
|
+
Returns:
|
|
885
|
+
{"location": "temporary_attachment", "filename": "..."}
|
|
886
|
+
"""
|
|
887
|
+
locator = {
|
|
888
|
+
"location": ContentsLocation.TEMPORARY_ATTACHMENT.value,
|
|
889
|
+
"filename": self._filename
|
|
890
|
+
}
|
|
891
|
+
if self._entity_type:
|
|
892
|
+
locator["entity_type"] = self._entity_type
|
|
893
|
+
if self._filetype:
|
|
894
|
+
locator["filetype"] = self._filetype
|
|
895
|
+
return locator
|
|
896
|
+
|
|
897
|
+
@staticmethod
|
|
898
|
+
def _from_locator(locator: Dict[str, Any], client=None) -> 'TemporaryAttachmentContents':
|
|
899
|
+
"""
|
|
900
|
+
Create TemporaryAttachmentContents from a locator dictionary.
|
|
901
|
+
|
|
902
|
+
Args:
|
|
903
|
+
locator: Locator dictionary with filename
|
|
904
|
+
client: Octostar client
|
|
905
|
+
|
|
906
|
+
Returns:
|
|
907
|
+
New TemporaryAttachmentContents instance
|
|
908
|
+
"""
|
|
909
|
+
return TemporaryAttachmentContents(
|
|
910
|
+
entity_type=locator.get("entity_type"),
|
|
911
|
+
filetype=locator.get("filetype"),
|
|
912
|
+
filename=locator["filename"],
|
|
913
|
+
client=client
|
|
914
|
+
)
|
|
@@ -26,6 +26,7 @@ from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
|
|
|
26
26
|
from ..core.timestamp import now, string_to_datetime
|
|
27
27
|
from .fastapi import DefaultErrorRoute, Route
|
|
28
28
|
from ..ontology.inheritance import is_child_concept as is_child_concept_fn, get_label_keys
|
|
29
|
+
from .contents import Contents, MemoryContents, WorkspaceAttachmentContents, TemporaryAttachmentContents, ContentsLocation
|
|
29
30
|
|
|
30
31
|
RELATIONSHIP_ENTITY_NAME = "os_relationship"
|
|
31
32
|
LOCAL_RELATIONSHIP_ENTITY_NAME = "os_workspace_relationship"
|
|
@@ -47,11 +48,6 @@ def safe_async_run(coro):
|
|
|
47
48
|
return asyncio.run(coro)
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
class NifiContentsPointerLocationModel(Enum):
|
|
51
|
-
LOCAL = "local"
|
|
52
|
-
ATTACHMENT = "attachment"
|
|
53
|
-
|
|
54
|
-
|
|
55
51
|
class NifiProxyEntityModel(BaseModel):
|
|
56
52
|
entity_id: str
|
|
57
53
|
entity_type: str
|
|
@@ -72,10 +68,6 @@ class NifiEntityModel(BaseModel):
|
|
|
72
68
|
relationships: List[str]
|
|
73
69
|
label_keys: List[str]
|
|
74
70
|
|
|
75
|
-
class ContentsPointerModel(BaseModel):
|
|
76
|
-
location: NifiContentsPointerLocationModel
|
|
77
|
-
pointer: Optional[str] = None
|
|
78
|
-
|
|
79
71
|
jwt: str
|
|
80
72
|
ontology_name: str
|
|
81
73
|
ontology_info: OntologyInfoModel
|
|
@@ -84,7 +76,7 @@ class NifiEntityModel(BaseModel):
|
|
|
84
76
|
nifi_attributes: dict = Field(default_factory=dict)
|
|
85
77
|
config: dict = Field(default_factory=dict)
|
|
86
78
|
metrics: dict = Field(default_factory=dict)
|
|
87
|
-
contents_pointer: Optional[
|
|
79
|
+
contents_pointer: Optional[dict] = None
|
|
88
80
|
is_temporary: bool = False
|
|
89
81
|
exception: dict = Field(default_factory=dict)
|
|
90
82
|
last_processor_name: Optional[str] = None
|
|
@@ -103,7 +95,7 @@ class NifiEntityModel(BaseModel):
|
|
|
103
95
|
record: RecordModel
|
|
104
96
|
annotations: Dict[str, Any] = Field(default_factory=dict)
|
|
105
97
|
children: List[Union[NifiOTMRelationshipProxyModel, NifiProxyEntityModel]] = []
|
|
106
|
-
contents: Optional[
|
|
98
|
+
contents: Optional[Dict[str, Any]] = None
|
|
107
99
|
|
|
108
100
|
|
|
109
101
|
NifiEntityModel.model_rebuild()
|
|
@@ -295,12 +287,10 @@ class NifiContextManager(object):
|
|
|
295
287
|
return client, curr_user_ontology
|
|
296
288
|
|
|
297
289
|
def receive_input(self, json_data, processor_name) -> List["NifiEntityBatch"]:
|
|
298
|
-
def _safe_decode(contents):
|
|
299
|
-
return base64.b64decode(contents) if contents else None
|
|
300
|
-
|
|
301
290
|
entities = []
|
|
302
291
|
all_independent_uids = [e["record"]["entity_id"] for e in json_data]
|
|
303
292
|
for elem in json_data:
|
|
293
|
+
contents = Contents.from_locator(elem.get("contents"), client=self.client)
|
|
304
294
|
entities.append(
|
|
305
295
|
NifiEntity(
|
|
306
296
|
self,
|
|
@@ -309,7 +299,7 @@ class NifiContextManager(object):
|
|
|
309
299
|
elem["annotations"],
|
|
310
300
|
all_independent_uids,
|
|
311
301
|
elem["children"],
|
|
312
|
-
|
|
302
|
+
contents,
|
|
313
303
|
)
|
|
314
304
|
)
|
|
315
305
|
entities = sorted(
|
|
@@ -463,10 +453,13 @@ class NifiContextManager(object):
|
|
|
463
453
|
fetch_concept_relationships = {}
|
|
464
454
|
# FIND FILES TO WRITE
|
|
465
455
|
for entity in entities:
|
|
466
|
-
if entity.is_child_concept("
|
|
456
|
+
if entity.is_child_concept("os_attachable"):
|
|
467
457
|
has_write_flag = entity.sync_params.get(NifiContextManager.SyncFlag.WRITE_CONTENTS)
|
|
468
|
-
|
|
469
|
-
if
|
|
458
|
+
is_temporary = entity.request.get("is_temporary")
|
|
459
|
+
if is_temporary:
|
|
460
|
+
if entity._contents and not isinstance(entity._contents, WorkspaceAttachmentContents):
|
|
461
|
+
files_to_write.append(entity)
|
|
462
|
+
elif has_write_flag:
|
|
470
463
|
if entity.contents:
|
|
471
464
|
files_to_write.append(entity)
|
|
472
465
|
# FIND ENTITIES TO UPSERT
|
|
@@ -495,6 +488,9 @@ class NifiContextManager(object):
|
|
|
495
488
|
# WRITE FILES
|
|
496
489
|
if files_to_write:
|
|
497
490
|
for file in files_to_write:
|
|
491
|
+
if not file.contents:
|
|
492
|
+
continue
|
|
493
|
+
old_contents = file._contents
|
|
498
494
|
new_file_record = write_file.sync(
|
|
499
495
|
file.write_os_workspace,
|
|
500
496
|
"./" + file.record["os_item_name"],
|
|
@@ -510,11 +506,24 @@ class NifiContextManager(object):
|
|
|
510
506
|
file.record["entity_label"] = file.label
|
|
511
507
|
file.request["is_temporary"] = False
|
|
512
508
|
file.request["entity_timestamp"] = file.record["os_last_updated_at"]
|
|
513
|
-
file._contents =
|
|
509
|
+
file._contents = WorkspaceAttachmentContents(
|
|
510
|
+
workspace_id=file.record['os_workspace'],
|
|
511
|
+
entity_id=file.record['os_entity_uid'],
|
|
512
|
+
client=self.client,
|
|
513
|
+
entity_type=file.record["os_concept"],
|
|
514
|
+
filetype=file.record["os_item_content_type"]
|
|
515
|
+
)
|
|
514
516
|
file.request["contents_pointer"] = {
|
|
515
|
-
"location":
|
|
516
|
-
"pointer": f"{file.record['os_workspace']}/{file.record['os_entity_uid']}"
|
|
517
|
+
"location": ContentsLocation.WORKSPACE_ATTACHMENT.value,
|
|
518
|
+
"pointer": f"{file.record['os_workspace']}/{file.record['os_entity_uid']}",
|
|
519
|
+
"entity_type": file.record["os_concept"],
|
|
520
|
+
"filetype": file.record["os_item_content_type"]
|
|
517
521
|
}
|
|
522
|
+
if isinstance(old_contents, TemporaryAttachmentContents):
|
|
523
|
+
try:
|
|
524
|
+
old_contents.delete()
|
|
525
|
+
except Exception:
|
|
526
|
+
pass
|
|
518
527
|
# UPSERT ENTITIES
|
|
519
528
|
if entities_to_upsert:
|
|
520
529
|
new_entities = upsert_entities.sync(
|
|
@@ -692,7 +701,7 @@ class NifiEntity(object):
|
|
|
692
701
|
c["annotations"],
|
|
693
702
|
all_independent_uids,
|
|
694
703
|
c["children"],
|
|
695
|
-
c
|
|
704
|
+
Contents.from_locator(c.get("contents"), client=self.context.client),
|
|
696
705
|
)
|
|
697
706
|
for c in full_entity_children
|
|
698
707
|
]
|
|
@@ -711,7 +720,7 @@ class NifiEntity(object):
|
|
|
711
720
|
for i in range(len(child_uids))
|
|
712
721
|
]
|
|
713
722
|
self.children.extend(proxy_otm_children)
|
|
714
|
-
self._contents = contents
|
|
723
|
+
self._contents: Optional[Contents] = contents
|
|
715
724
|
self.drop_on_output = False
|
|
716
725
|
|
|
717
726
|
def __eq__(self, other):
|
|
@@ -741,36 +750,16 @@ class NifiEntity(object):
|
|
|
741
750
|
self._annotations = new_annotations
|
|
742
751
|
|
|
743
752
|
@property
|
|
744
|
-
def contents(self):
|
|
753
|
+
def contents(self) -> Optional[Contents]:
|
|
745
754
|
if not self._contents:
|
|
746
755
|
contents_pointer = self.contents_pointer
|
|
747
756
|
if not contents_pointer:
|
|
748
757
|
return None
|
|
749
|
-
|
|
750
|
-
self._contents = read_file.sync(
|
|
751
|
-
contents_pointer["pointer"].split("/")[0],
|
|
752
|
-
contents_pointer["pointer"].split("/")[-1],
|
|
753
|
-
False,
|
|
754
|
-
client=self.context.client,
|
|
755
|
-
)
|
|
758
|
+
self._contents = Contents.from_locator(contents_pointer, client=self.context.client)
|
|
756
759
|
return self._contents
|
|
757
760
|
|
|
758
|
-
@property
|
|
759
|
-
def contents_pointer(self):
|
|
760
|
-
contents_pointer = deepcopy(self.request.get("contents_pointer"))
|
|
761
|
-
if not self.request.get("contents_pointer"):
|
|
762
|
-
return None
|
|
763
|
-
ptr_location = contents_pointer.get("location")
|
|
764
|
-
if ptr_location == "attachment" and not contents_pointer.get("pointer"):
|
|
765
|
-
contents_pointer["pointer"] = f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
|
|
766
|
-
return contents_pointer
|
|
767
|
-
|
|
768
|
-
@contents_pointer.setter
|
|
769
|
-
def contents_pointer(self, new_value):
|
|
770
|
-
self.request["contents_pointer"] = new_value
|
|
771
|
-
|
|
772
761
|
@contents.setter
|
|
773
|
-
def contents(self, new_contents):
|
|
762
|
+
def contents(self, new_contents: Optional[Union[Contents, bytes]]):
|
|
774
763
|
self._contents = new_contents
|
|
775
764
|
|
|
776
765
|
@property
|
|
@@ -866,9 +855,6 @@ class NifiEntity(object):
|
|
|
866
855
|
return not _is_sub_fragment_recursive(fragment)
|
|
867
856
|
|
|
868
857
|
def to_json(self):
|
|
869
|
-
def _safe_encode(contents):
|
|
870
|
-
return base64.b64encode(contents) if contents else None
|
|
871
|
-
|
|
872
858
|
if self.drop_on_output:
|
|
873
859
|
return
|
|
874
860
|
proxy_entity_children = []
|
|
@@ -909,7 +895,7 @@ class NifiEntity(object):
|
|
|
909
895
|
"record": self.record,
|
|
910
896
|
"children": children,
|
|
911
897
|
"annotations": self.annotations,
|
|
912
|
-
"contents":
|
|
898
|
+
"contents": self._contents.to_locator() if self._contents else None,
|
|
913
899
|
}
|
|
914
900
|
|
|
915
901
|
def _add_entity(self, os_workspace, entity_type, fields, os_entity_uid=None):
|
|
@@ -1058,7 +1044,7 @@ class NifiEntity(object):
|
|
|
1058
1044
|
os_parent_folder,
|
|
1059
1045
|
filename,
|
|
1060
1046
|
filetype,
|
|
1061
|
-
file,
|
|
1047
|
+
file: Union[Contents, bytes],
|
|
1062
1048
|
fields={},
|
|
1063
1049
|
os_relationship_name=FILE_RELATIONSHIP_NAME,
|
|
1064
1050
|
os_relationship_type="mtm",
|
|
@@ -1080,8 +1066,20 @@ class NifiEntity(object):
|
|
|
1080
1066
|
os_entity_uid,
|
|
1081
1067
|
os_relationship_uid,
|
|
1082
1068
|
)
|
|
1083
|
-
|
|
1084
|
-
|
|
1069
|
+
if isinstance(file, Contents):
|
|
1070
|
+
child_entity._contents = file
|
|
1071
|
+
else:
|
|
1072
|
+
temp_filename = f"tmp_{child_entity.record['os_entity_uid']}"
|
|
1073
|
+
temp_contents = TemporaryAttachmentContents(
|
|
1074
|
+
entity_type=FILE_ENTITY_NAME,
|
|
1075
|
+
filetype=filetype,
|
|
1076
|
+
filename=temp_filename,
|
|
1077
|
+
client=self.context.client,
|
|
1078
|
+
initial_data=file,
|
|
1079
|
+
)
|
|
1080
|
+
temp_contents.flush()
|
|
1081
|
+
child_entity._contents = temp_contents
|
|
1082
|
+
child_entity.request["contents_pointer"] = child_entity._contents.to_locator()
|
|
1085
1083
|
return child_entity, child_rel
|
|
1086
1084
|
|
|
1087
1085
|
def add_tag(self, os_workspace, name, group, order, color, fields={}):
|
|
@@ -10,7 +10,10 @@ def now():
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def string_to_datetime(datetime_str):
|
|
13
|
-
|
|
13
|
+
parsed = dt_parser.parse(
|
|
14
14
|
datetime_str
|
|
15
15
|
or dt.datetime.fromtimestamp(0, dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
16
16
|
)
|
|
17
|
+
if parsed.tzinfo is None or parsed.tzinfo.utcoffset(parsed) is None:
|
|
18
|
+
parsed = parsed.replace(tzinfo=dt.timezone.utc)
|
|
19
|
+
return parsed
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|