streamlit-octostar-utils 0.4.1__tar.gz → 0.4.2.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/pyproject.toml +1 -1
- streamlit_octostar_utils-0.4.2.dev1/streamlit_octostar_utils/api_crafter/contents.py +760 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/nifi.py +35 -51
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/core/timestamp.py +4 -1
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/LICENSE +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/README.md +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/nlp/language.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/ontology/relationships.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev1}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Contents abstraction for NiFi entities.
|
|
3
|
+
|
|
4
|
+
Provides a FileIO-like interface for handling entity contents with support for:
|
|
5
|
+
- Full io.BufferedIOBase compatibility (read, write, seek, tell, etc.)
|
|
6
|
+
- Seeking and streaming with HTTP Range requests
|
|
7
|
+
- Multiple storage backends (memory, workspace attachments)
|
|
8
|
+
- Serialization/deserialization via from_locator()/to_locator() methods
|
|
9
|
+
- Lazy loading and efficient partial reads for large files
|
|
10
|
+
|
|
11
|
+
Storage Backends:
|
|
12
|
+
- MemoryContents: In-memory buffer (like BytesIO)
|
|
13
|
+
- WorkspaceAttachmentContents: Octostar workspace attachments with HTTP Range support
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from typing import Optional, Dict, Any, Union, BinaryIO, List
|
|
18
|
+
from enum import Enum
|
|
19
|
+
import base64
|
|
20
|
+
import httpx
|
|
21
|
+
import logging
|
|
22
|
+
from io import BytesIO, SEEK_SET, SEEK_CUR, SEEK_END
|
|
23
|
+
from copy import deepcopy
|
|
24
|
+
|
|
25
|
+
_logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ContentsLocation(Enum):
|
|
29
|
+
"""Enumeration of supported content storage locations."""
|
|
30
|
+
MEMORY = "memory"
|
|
31
|
+
WORKSPACE_ATTACHMENT = "workspace_attachment"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Contents(ABC):
|
|
35
|
+
"""
|
|
36
|
+
Abstract base class for entity contents.
|
|
37
|
+
|
|
38
|
+
Provides a FileIO-like interface with support for seeking, streaming,
|
|
39
|
+
and multiple storage backends. Each implementation manages its own
|
|
40
|
+
locator dictionary for serialization/deserialization.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
mode: str = "rb",
|
|
46
|
+
entity_type: Optional[str] = None,
|
|
47
|
+
filetype: Optional[str] = None,
|
|
48
|
+
**kwargs
|
|
49
|
+
):
|
|
50
|
+
self._mode = mode
|
|
51
|
+
self._entity_type = entity_type
|
|
52
|
+
self._filetype = filetype
|
|
53
|
+
self._closed = False
|
|
54
|
+
|
|
55
|
+
# ==================== FileIO Interface ====================
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def read(self, size: int = -1) -> bytes:
|
|
59
|
+
"""
|
|
60
|
+
Read and return up to size bytes, or all bytes if size is -1.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
size: Number of bytes to read. -1 means read all.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Bytes read from the stream.
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def write(self, b: bytes) -> int:
|
|
72
|
+
"""
|
|
73
|
+
Write bytes to the stream.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
b: Bytes to write.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Number of bytes written.
|
|
80
|
+
"""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
85
|
+
"""
|
|
86
|
+
Change stream position.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
offset: Offset relative to whence.
|
|
90
|
+
whence: SEEK_SET (0) = from start, SEEK_CUR (1) = from current, SEEK_END (2) = from end.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
New absolute position.
|
|
94
|
+
"""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def tell(self) -> int:
|
|
99
|
+
"""
|
|
100
|
+
Return current stream position.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Current position in bytes.
|
|
104
|
+
"""
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def readable(self) -> bool:
|
|
108
|
+
"""Check if stream is readable."""
|
|
109
|
+
return 'r' in self._mode or '+' in self._mode
|
|
110
|
+
|
|
111
|
+
def writable(self) -> bool:
|
|
112
|
+
"""Check if stream is writable."""
|
|
113
|
+
return 'w' in self._mode or 'a' in self._mode or '+' in self._mode
|
|
114
|
+
|
|
115
|
+
def seekable(self) -> bool:
|
|
116
|
+
"""Check if stream is seekable."""
|
|
117
|
+
return True
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
def flush(self):
|
|
121
|
+
"""Flush write buffers."""
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
@abstractmethod
|
|
125
|
+
def close(self):
|
|
126
|
+
"""Close the stream and release resources."""
|
|
127
|
+
self._closed = True
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def closed(self) -> bool:
|
|
131
|
+
"""Check if stream is closed."""
|
|
132
|
+
return self._closed
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def mode(self) -> str:
|
|
136
|
+
"""Get the file mode."""
|
|
137
|
+
return self._mode
|
|
138
|
+
|
|
139
|
+
@abstractmethod
|
|
140
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
141
|
+
"""
|
|
142
|
+
Resize the stream to the given size.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
size: New size in bytes. If None, use current position.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
New size.
|
|
149
|
+
"""
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
def __enter__(self):
|
|
153
|
+
"""Context manager entry."""
|
|
154
|
+
return self
|
|
155
|
+
|
|
156
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
157
|
+
"""Context manager exit."""
|
|
158
|
+
self.close()
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
def readline(self, size: int = -1) -> bytes:
|
|
162
|
+
"""
|
|
163
|
+
Read and return one line from the stream.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
size: Maximum number of bytes to read. -1 means no limit.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Bytes up to and including the newline character, or until EOF.
|
|
170
|
+
"""
|
|
171
|
+
result = bytearray()
|
|
172
|
+
while True:
|
|
173
|
+
if size >= 0 and len(result) >= size:
|
|
174
|
+
break
|
|
175
|
+
byte = self.read(1)
|
|
176
|
+
if not byte:
|
|
177
|
+
break
|
|
178
|
+
result.extend(byte)
|
|
179
|
+
if byte == b'\n':
|
|
180
|
+
break
|
|
181
|
+
return bytes(result)
|
|
182
|
+
|
|
183
|
+
def readlines(self, hint: int = -1) -> List[bytes]:
|
|
184
|
+
"""
|
|
185
|
+
Read and return a list of lines from the stream.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
hint: Optional size hint. If total size of lines exceeds hint, no more lines are read.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
List of lines.
|
|
192
|
+
"""
|
|
193
|
+
lines = []
|
|
194
|
+
total_size = 0
|
|
195
|
+
while True:
|
|
196
|
+
line = self.readline()
|
|
197
|
+
if not line:
|
|
198
|
+
break
|
|
199
|
+
lines.append(line)
|
|
200
|
+
total_size += len(line)
|
|
201
|
+
if hint >= 0 and total_size >= hint:
|
|
202
|
+
break
|
|
203
|
+
return lines
|
|
204
|
+
|
|
205
|
+
def writelines(self, lines: List[bytes]):
|
|
206
|
+
"""
|
|
207
|
+
Write a list of lines to the stream.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
lines: List of byte strings to write.
|
|
211
|
+
"""
|
|
212
|
+
for line in lines:
|
|
213
|
+
self.write(line)
|
|
214
|
+
|
|
215
|
+
def readinto(self, b: bytearray) -> int:
|
|
216
|
+
"""
|
|
217
|
+
Read bytes into a pre-allocated buffer.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
b: Pre-allocated bytearray to read into.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Number of bytes read.
|
|
224
|
+
"""
|
|
225
|
+
data = self.read(len(b))
|
|
226
|
+
n = len(data)
|
|
227
|
+
b[:n] = data
|
|
228
|
+
return n
|
|
229
|
+
|
|
230
|
+
def read1(self, size: int = -1) -> bytes:
|
|
231
|
+
"""
|
|
232
|
+
Read up to size bytes with at most one read() call to the underlying stream.
|
|
233
|
+
|
|
234
|
+
For most implementations, this is the same as read(). Subclasses may override
|
|
235
|
+
for optimization.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
size: Number of bytes to read. -1 means read all available.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Bytes read from the stream.
|
|
242
|
+
"""
|
|
243
|
+
return self.read(size)
|
|
244
|
+
|
|
245
|
+
# ==================== Locator Interface ====================
|
|
246
|
+
|
|
247
|
+
@abstractmethod
|
|
248
|
+
def to_locator(self) -> Dict[str, Any]:
|
|
249
|
+
"""
|
|
250
|
+
Serialize contents to a locator dictionary.
|
|
251
|
+
|
|
252
|
+
The locator contains:
|
|
253
|
+
- location: The storage backend type
|
|
254
|
+
- Additional backend-specific parameters
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Dictionary describing how to locate/access this content.
|
|
258
|
+
"""
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def from_locator(locator: Optional[Dict[str, Any]], client=None) -> Optional['Contents']:
|
|
263
|
+
"""
|
|
264
|
+
Deserialize contents from a locator dictionary.
|
|
265
|
+
|
|
266
|
+
Factory method that creates the appropriate Contents subclass based on the
|
|
267
|
+
locator's "location" field. This replaces the old base64.b64decode logic in NiFi.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
locator: Locator dictionary (e.g., from request.contents_pointer)
|
|
271
|
+
client: Optional Octostar client for remote operations
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Contents instance or None if locator is None/empty.
|
|
275
|
+
|
|
276
|
+
Raises:
|
|
277
|
+
ValueError: If the location type is unknown
|
|
278
|
+
"""
|
|
279
|
+
if not locator:
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
location = locator.get("location")
|
|
283
|
+
|
|
284
|
+
match location:
|
|
285
|
+
case ContentsLocation.MEMORY.value:
|
|
286
|
+
return MemoryContents._from_locator(locator)
|
|
287
|
+
case ContentsLocation.WORKSPACE_ATTACHMENT.value:
|
|
288
|
+
return WorkspaceAttachmentContents._from_locator(locator, client)
|
|
289
|
+
case _:
|
|
290
|
+
raise ValueError(f"Unknown contents location type: {location}")
|
|
291
|
+
|
|
292
|
+
# ==================== Utility Methods ====================
|
|
293
|
+
|
|
294
|
+
def read_all(self) -> bytes:
|
|
295
|
+
"""Read all contents and return as bytes."""
|
|
296
|
+
current_pos = self.tell()
|
|
297
|
+
self.seek(0, SEEK_SET)
|
|
298
|
+
data = self.read()
|
|
299
|
+
self.seek(current_pos, SEEK_SET)
|
|
300
|
+
return data
|
|
301
|
+
|
|
302
|
+
def getvalue(self) -> bytes:
|
|
303
|
+
"""
|
|
304
|
+
Return the entire contents without moving the position.
|
|
305
|
+
|
|
306
|
+
This matches BytesIO.getvalue() behavior.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Entire contents as bytes.
|
|
310
|
+
"""
|
|
311
|
+
return self.read_all()
|
|
312
|
+
|
|
313
|
+
def write_all(self, data: bytes):
|
|
314
|
+
"""Write all data, replacing existing contents."""
|
|
315
|
+
self.seek(0, SEEK_SET)
|
|
316
|
+
self.truncate(0)
|
|
317
|
+
self.write(data)
|
|
318
|
+
self.flush()
|
|
319
|
+
|
|
320
|
+
def __len__(self) -> int:
|
|
321
|
+
"""
|
|
322
|
+
Return the length of the contents.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Total size in bytes.
|
|
326
|
+
"""
|
|
327
|
+
current_pos = self.tell()
|
|
328
|
+
self.seek(0, SEEK_END)
|
|
329
|
+
length = self.tell()
|
|
330
|
+
self.seek(current_pos, SEEK_SET)
|
|
331
|
+
return length
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class MemoryContents(Contents):
|
|
335
|
+
"""In-memory contents implementation using BytesIO."""
|
|
336
|
+
|
|
337
|
+
def __init__(
|
|
338
|
+
self,
|
|
339
|
+
mode: str = "r+b",
|
|
340
|
+
entity_type: Optional[str] = None,
|
|
341
|
+
filetype: Optional[str] = None,
|
|
342
|
+
*,
|
|
343
|
+
initial_data: Optional[bytes] = None,
|
|
344
|
+
**kwargs
|
|
345
|
+
):
|
|
346
|
+
super().__init__(mode, entity_type, filetype, **kwargs)
|
|
347
|
+
self._buffer = BytesIO(initial_data or b"")
|
|
348
|
+
|
|
349
|
+
def read(self, size: int = -1) -> bytes:
|
|
350
|
+
if not self.readable():
|
|
351
|
+
raise IOError("Contents not readable")
|
|
352
|
+
return self._buffer.read(size)
|
|
353
|
+
|
|
354
|
+
def write(self, b: bytes) -> int:
|
|
355
|
+
if not self.writable():
|
|
356
|
+
raise IOError("Contents not writable")
|
|
357
|
+
return self._buffer.write(b)
|
|
358
|
+
|
|
359
|
+
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
360
|
+
return self._buffer.seek(offset, whence)
|
|
361
|
+
|
|
362
|
+
def tell(self) -> int:
|
|
363
|
+
return self._buffer.tell()
|
|
364
|
+
|
|
365
|
+
def flush(self):
|
|
366
|
+
self._buffer.flush()
|
|
367
|
+
|
|
368
|
+
def close(self):
|
|
369
|
+
if not self._closed:
|
|
370
|
+
self._buffer.close()
|
|
371
|
+
super().close()
|
|
372
|
+
|
|
373
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
374
|
+
return self._buffer.truncate(size)
|
|
375
|
+
|
|
376
|
+
def getvalue(self) -> bytes:
|
|
377
|
+
return self._buffer.getvalue()
|
|
378
|
+
|
|
379
|
+
def to_locator(self) -> Dict[str, Any]:
|
|
380
|
+
"""
|
|
381
|
+
Serialize to locator with base64-encoded data.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
{"location": "memory", "data": "<base64>", "entity_type": "...", "filetype": "..."}
|
|
385
|
+
"""
|
|
386
|
+
data = self._buffer.getvalue()
|
|
387
|
+
locator = {
|
|
388
|
+
"location": ContentsLocation.MEMORY.value,
|
|
389
|
+
"data": base64.b64encode(data).decode('utf-8') if data else None
|
|
390
|
+
}
|
|
391
|
+
if self._entity_type:
|
|
392
|
+
locator["entity_type"] = self._entity_type
|
|
393
|
+
if self._filetype:
|
|
394
|
+
locator["filetype"] = self._filetype
|
|
395
|
+
return locator
|
|
396
|
+
|
|
397
|
+
@staticmethod
|
|
398
|
+
def _from_locator(locator: Dict[str, Any]) -> 'MemoryContents':
|
|
399
|
+
"""
|
|
400
|
+
Create MemoryContents from a locator dictionary.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
locator: Locator dictionary with base64-encoded data
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
New MemoryContents instance
|
|
407
|
+
"""
|
|
408
|
+
data = locator.get("data")
|
|
409
|
+
initial_data = base64.b64decode(data) if data else None
|
|
410
|
+
return MemoryContents(
|
|
411
|
+
entity_type=locator.get("entity_type"),
|
|
412
|
+
filetype=locator.get("filetype"),
|
|
413
|
+
initial_data=initial_data
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class WorkspaceAttachmentContents(Contents):
|
|
418
|
+
"""
|
|
419
|
+
Contents implementation for Octostar workspace attachments.
|
|
420
|
+
|
|
421
|
+
Uses octostar-api utilities (read_attachment, get_attachment_url) to:
|
|
422
|
+
- Lazy load from workspace storage via read_attachment()
|
|
423
|
+
- Stream efficiently with HTTP Range requests (no need to download entire file)
|
|
424
|
+
- Support true seeking with configurable chunk sizes
|
|
425
|
+
- Buffer modifications in memory (write back handled at NiFi entity level)
|
|
426
|
+
|
|
427
|
+
Presigned URL Handling:
|
|
428
|
+
- URLs are obtained via get_attachment_url() and cached
|
|
429
|
+
- On 403 (Forbidden) responses, URLs are refreshed and requests retried
|
|
430
|
+
- Follows the same retry pattern as octostar-api read_file utilities
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
DEFAULT_CHUNK_SIZE = 8192
|
|
434
|
+
DEFAULT_URL_TIMEOUT = 120
|
|
435
|
+
|
|
436
|
+
def __init__(
|
|
437
|
+
self,
|
|
438
|
+
mode: str = "rb",
|
|
439
|
+
entity_type: Optional[str] = None,
|
|
440
|
+
filetype: Optional[str] = None,
|
|
441
|
+
*,
|
|
442
|
+
workspace_id: str,
|
|
443
|
+
entity_id: str,
|
|
444
|
+
client,
|
|
445
|
+
initial_data: Optional[bytes] = None,
|
|
446
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
|
447
|
+
**kwargs
|
|
448
|
+
):
|
|
449
|
+
super().__init__(mode, entity_type, filetype, **kwargs)
|
|
450
|
+
self._workspace_id = workspace_id
|
|
451
|
+
self._entity_id = entity_id
|
|
452
|
+
self._client = client
|
|
453
|
+
self._chunk_size = chunk_size
|
|
454
|
+
|
|
455
|
+
self._buffer: Optional[BytesIO] = None
|
|
456
|
+
self._fully_loaded = False
|
|
457
|
+
self._modified = False
|
|
458
|
+
self._position = 0
|
|
459
|
+
self._size: Optional[int] = None
|
|
460
|
+
self._presigned_url: Optional[str] = None
|
|
461
|
+
self._http_client: Optional[httpx.Client] = None
|
|
462
|
+
|
|
463
|
+
if initial_data is not None:
|
|
464
|
+
self._buffer = BytesIO(initial_data)
|
|
465
|
+
self._fully_loaded = True
|
|
466
|
+
self._size = len(initial_data)
|
|
467
|
+
|
|
468
|
+
def _get_presigned_url(self) -> str:
|
|
469
|
+
"""Get or refresh the presigned URL for the attachment."""
|
|
470
|
+
from octostar.utils.workspace import get_attachment_url
|
|
471
|
+
|
|
472
|
+
return get_attachment_url.sync(
|
|
473
|
+
os_workspace=self._workspace_id,
|
|
474
|
+
os_entity_uid=self._entity_id,
|
|
475
|
+
client=self._client
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
def _ensure_http_client(self):
|
|
479
|
+
"""Ensure HTTP client is initialized."""
|
|
480
|
+
if not self._http_client:
|
|
481
|
+
self._http_client = httpx.Client(timeout=self.DEFAULT_URL_TIMEOUT)
|
|
482
|
+
|
|
483
|
+
def _fetch_size(self) -> int:
|
|
484
|
+
"""
|
|
485
|
+
Fetch the size of the remote attachment using HEAD request.
|
|
486
|
+
|
|
487
|
+
Handles presigned URL expiration with automatic refresh and retry.
|
|
488
|
+
"""
|
|
489
|
+
if self._size is not None:
|
|
490
|
+
return self._size
|
|
491
|
+
|
|
492
|
+
if not self._presigned_url:
|
|
493
|
+
self._presigned_url = self._get_presigned_url()
|
|
494
|
+
|
|
495
|
+
self._ensure_http_client()
|
|
496
|
+
max_retries = 3
|
|
497
|
+
|
|
498
|
+
for attempt in range(max_retries):
|
|
499
|
+
response = self._http_client.head(self._presigned_url)
|
|
500
|
+
if response.status_code == 200:
|
|
501
|
+
self._size = int(response.headers.get('content-length', 0))
|
|
502
|
+
return self._size
|
|
503
|
+
elif response.status_code == 403:
|
|
504
|
+
_logger.debug(
|
|
505
|
+
f"Presigned URL expired while fetching size, refreshing... (attempt {attempt + 1}/{max_retries})"
|
|
506
|
+
)
|
|
507
|
+
self._presigned_url = self._get_presigned_url()
|
|
508
|
+
continue
|
|
509
|
+
else:
|
|
510
|
+
response.raise_for_status()
|
|
511
|
+
|
|
512
|
+
raise ConnectionError(
|
|
513
|
+
f"Failed to fetch file size after {max_retries} attempts (URL kept expiring)"
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
def _read_range(self, start: int, end: int) -> bytes:
|
|
517
|
+
"""
|
|
518
|
+
Read a specific byte range using HTTP Range request.
|
|
519
|
+
|
|
520
|
+
Handles presigned URL expiration with automatic refresh and retry.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
start: Start byte (inclusive)
|
|
524
|
+
end: End byte (inclusive)
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
Bytes from the specified range
|
|
528
|
+
"""
|
|
529
|
+
if not self._presigned_url:
|
|
530
|
+
self._presigned_url = self._get_presigned_url()
|
|
531
|
+
|
|
532
|
+
self._ensure_http_client()
|
|
533
|
+
|
|
534
|
+
headers = {"Range": f"bytes={start}-{end}"}
|
|
535
|
+
max_retries = 3
|
|
536
|
+
|
|
537
|
+
for attempt in range(max_retries):
|
|
538
|
+
response = self._http_client.get(self._presigned_url, headers=headers)
|
|
539
|
+
|
|
540
|
+
if response.status_code == 206:
|
|
541
|
+
return response.content
|
|
542
|
+
elif response.status_code == 416:
|
|
543
|
+
return b""
|
|
544
|
+
elif response.status_code == 403:
|
|
545
|
+
_logger.debug(
|
|
546
|
+
f"Presigned URL expired at byte {start}, refreshing... (attempt {attempt + 1}/{max_retries})"
|
|
547
|
+
)
|
|
548
|
+
self._presigned_url = self._get_presigned_url()
|
|
549
|
+
continue
|
|
550
|
+
else:
|
|
551
|
+
response.raise_for_status()
|
|
552
|
+
return response.content
|
|
553
|
+
|
|
554
|
+
raise ConnectionError(
|
|
555
|
+
f"Failed to read range {start}-{end} after {max_retries} attempts (URL kept expiring)"
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
def _load_full(self):
|
|
559
|
+
"""Load the entire attachment into memory buffer using read_attachment()."""
|
|
560
|
+
if self._fully_loaded:
|
|
561
|
+
return
|
|
562
|
+
|
|
563
|
+
from octostar.utils.workspace import read_attachment
|
|
564
|
+
|
|
565
|
+
data = read_attachment.sync(
|
|
566
|
+
os_workspace=self._workspace_id,
|
|
567
|
+
os_entity_uid=self._entity_id,
|
|
568
|
+
decode=False,
|
|
569
|
+
stream=False,
|
|
570
|
+
client=self._client
|
|
571
|
+
)
|
|
572
|
+
self._buffer = BytesIO(data or b"")
|
|
573
|
+
self._fully_loaded = True
|
|
574
|
+
self._size = len(data) if data else 0
|
|
575
|
+
self._position = 0
|
|
576
|
+
|
|
577
|
+
def read(self, size: int = -1) -> bytes:
|
|
578
|
+
if not self.readable():
|
|
579
|
+
raise IOError("Contents not readable")
|
|
580
|
+
|
|
581
|
+
# If writable or already fully loaded, use buffer
|
|
582
|
+
if self.writable() or self._fully_loaded:
|
|
583
|
+
if not self._buffer:
|
|
584
|
+
self._load_full()
|
|
585
|
+
return self._buffer.read(size)
|
|
586
|
+
|
|
587
|
+
# Otherwise, use HTTP Range requests for efficient streaming
|
|
588
|
+
if size == -1:
|
|
589
|
+
# Read all from current position
|
|
590
|
+
self._fetch_size()
|
|
591
|
+
if self._position >= self._size:
|
|
592
|
+
return b""
|
|
593
|
+
size = self._size - self._position
|
|
594
|
+
|
|
595
|
+
if size <= 0:
|
|
596
|
+
return b""
|
|
597
|
+
|
|
598
|
+
# Fetch the size if we don't know it
|
|
599
|
+
self._fetch_size()
|
|
600
|
+
|
|
601
|
+
# Adjust size if it goes beyond the end
|
|
602
|
+
if self._position + size > self._size:
|
|
603
|
+
size = self._size - self._position
|
|
604
|
+
|
|
605
|
+
if size <= 0:
|
|
606
|
+
return b""
|
|
607
|
+
|
|
608
|
+
# Read using HTTP Range request
|
|
609
|
+
end_byte = self._position + size - 1
|
|
610
|
+
data = self._read_range(self._position, end_byte)
|
|
611
|
+
self._position += len(data)
|
|
612
|
+
|
|
613
|
+
return data
|
|
614
|
+
|
|
615
|
+
def write(self, b: bytes) -> int:
|
|
616
|
+
if not self.writable():
|
|
617
|
+
raise IOError("Contents not writable")
|
|
618
|
+
|
|
619
|
+
if not self._buffer:
|
|
620
|
+
self._load_full()
|
|
621
|
+
|
|
622
|
+
n = self._buffer.write(b)
|
|
623
|
+
self._modified = True
|
|
624
|
+
return n
|
|
625
|
+
|
|
626
|
+
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
627
|
+
# Calculate new position
|
|
628
|
+
if whence == SEEK_SET:
|
|
629
|
+
new_pos = offset
|
|
630
|
+
elif whence == SEEK_CUR:
|
|
631
|
+
if self._buffer and self._fully_loaded:
|
|
632
|
+
new_pos = self._buffer.tell() + offset
|
|
633
|
+
else:
|
|
634
|
+
new_pos = self._position + offset
|
|
635
|
+
elif whence == SEEK_END:
|
|
636
|
+
self._fetch_size()
|
|
637
|
+
new_pos = self._size + offset
|
|
638
|
+
else:
|
|
639
|
+
raise ValueError(f"Invalid whence value: {whence}")
|
|
640
|
+
|
|
641
|
+
if new_pos < 0:
|
|
642
|
+
raise ValueError("Negative seek position")
|
|
643
|
+
|
|
644
|
+
# If fully loaded, use buffer seek
|
|
645
|
+
if self._buffer and self._fully_loaded:
|
|
646
|
+
return self._buffer.seek(new_pos, SEEK_SET)
|
|
647
|
+
|
|
648
|
+
# Otherwise, just update position (HTTP Range will handle it)
|
|
649
|
+
self._position = new_pos
|
|
650
|
+
return self._position
|
|
651
|
+
|
|
652
|
+
def tell(self) -> int:
|
|
653
|
+
if self._buffer and self._fully_loaded:
|
|
654
|
+
return self._buffer.tell()
|
|
655
|
+
return self._position
|
|
656
|
+
|
|
657
|
+
def flush(self):
|
|
658
|
+
"""Flush the internal buffer and write to workspace if modified."""
|
|
659
|
+
if self._buffer:
|
|
660
|
+
self._buffer.flush()
|
|
661
|
+
|
|
662
|
+
if not self._modified or not self._buffer:
|
|
663
|
+
return
|
|
664
|
+
|
|
665
|
+
if not self._entity_type or not self._filetype:
|
|
666
|
+
raise ValueError("entity_type and filetype required to flush to workspace")
|
|
667
|
+
|
|
668
|
+
from octostar.utils.workspace import write_attachment
|
|
669
|
+
|
|
670
|
+
current_pos = self._buffer.tell()
|
|
671
|
+
self._buffer.seek(0, SEEK_SET)
|
|
672
|
+
data = self._buffer.read()
|
|
673
|
+
self._buffer.seek(current_pos, SEEK_SET)
|
|
674
|
+
|
|
675
|
+
write_attachment.sync(
|
|
676
|
+
os_workspace=self._workspace_id,
|
|
677
|
+
os_entity_uid=self._entity_id,
|
|
678
|
+
entity_type=self._entity_type,
|
|
679
|
+
filetype=self._filetype,
|
|
680
|
+
file=data,
|
|
681
|
+
client=self._client
|
|
682
|
+
)
|
|
683
|
+
self._modified = False
|
|
684
|
+
|
|
685
|
+
def close(self):
|
|
686
|
+
if not self._closed:
|
|
687
|
+
# Flush any pending writes before closing
|
|
688
|
+
if self._modified:
|
|
689
|
+
self.flush()
|
|
690
|
+
|
|
691
|
+
if self._buffer:
|
|
692
|
+
self._buffer.close()
|
|
693
|
+
if self._http_client:
|
|
694
|
+
self._http_client.close()
|
|
695
|
+
self._http_client = None
|
|
696
|
+
super().close()
|
|
697
|
+
|
|
698
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
699
|
+
if not self._buffer:
|
|
700
|
+
self._load_full()
|
|
701
|
+
self._modified = True
|
|
702
|
+
return self._buffer.truncate(size)
|
|
703
|
+
|
|
704
|
+
def getvalue(self) -> bytes:
|
|
705
|
+
if not self._buffer or not self._fully_loaded:
|
|
706
|
+
self._load_full()
|
|
707
|
+
return self._buffer.getvalue()
|
|
708
|
+
|
|
709
|
+
def to_locator(self) -> Dict[str, Any]:
|
|
710
|
+
"""
|
|
711
|
+
Serialize to locator with workspace and entity ID.
|
|
712
|
+
|
|
713
|
+
Returns:
|
|
714
|
+
{"location": "workspace_attachment", "pointer": "workspace_id/entity_id",
|
|
715
|
+
"entity_type": "...", "filetype": "..."}
|
|
716
|
+
"""
|
|
717
|
+
if self._workspace_id and self._entity_id:
|
|
718
|
+
pointer = f"{self._workspace_id}/{self._entity_id}"
|
|
719
|
+
else:
|
|
720
|
+
pointer = None
|
|
721
|
+
|
|
722
|
+
locator = {
|
|
723
|
+
"location": ContentsLocation.WORKSPACE_ATTACHMENT.value,
|
|
724
|
+
"pointer": pointer
|
|
725
|
+
}
|
|
726
|
+
if self._entity_type:
|
|
727
|
+
locator["entity_type"] = self._entity_type
|
|
728
|
+
if self._filetype:
|
|
729
|
+
locator["filetype"] = self._filetype
|
|
730
|
+
return locator
|
|
731
|
+
|
|
732
|
+
@staticmethod
|
|
733
|
+
def _from_locator(locator: Dict[str, Any], client=None) -> 'WorkspaceAttachmentContents':
|
|
734
|
+
"""
|
|
735
|
+
Create WorkspaceAttachmentContents from a locator dictionary.
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
locator: Locator dictionary with pointer "workspace_id/entity_id"
|
|
739
|
+
client: Octostar client for remote operations
|
|
740
|
+
|
|
741
|
+
Returns:
|
|
742
|
+
New WorkspaceAttachmentContents instance
|
|
743
|
+
"""
|
|
744
|
+
pointer = locator.get("pointer")
|
|
745
|
+
workspace_id = None
|
|
746
|
+
entity_id = None
|
|
747
|
+
|
|
748
|
+
if pointer:
|
|
749
|
+
parts = pointer.split("/")
|
|
750
|
+
if len(parts) >= 2:
|
|
751
|
+
workspace_id = parts[0]
|
|
752
|
+
entity_id = parts[-1]
|
|
753
|
+
|
|
754
|
+
return WorkspaceAttachmentContents(
|
|
755
|
+
entity_type=locator.get("entity_type"),
|
|
756
|
+
filetype=locator.get("filetype"),
|
|
757
|
+
workspace_id=workspace_id,
|
|
758
|
+
entity_id=entity_id,
|
|
759
|
+
client=client
|
|
760
|
+
)
|
|
@@ -26,6 +26,7 @@ from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
|
|
|
26
26
|
from ..core.timestamp import now, string_to_datetime
|
|
27
27
|
from .fastapi import DefaultErrorRoute, Route
|
|
28
28
|
from ..ontology.inheritance import is_child_concept as is_child_concept_fn, get_label_keys
|
|
29
|
+
from .contents import Contents, MemoryContents, WorkspaceAttachmentContents, ContentsLocation
|
|
29
30
|
|
|
30
31
|
RELATIONSHIP_ENTITY_NAME = "os_relationship"
|
|
31
32
|
LOCAL_RELATIONSHIP_ENTITY_NAME = "os_workspace_relationship"
|
|
@@ -47,11 +48,6 @@ def safe_async_run(coro):
|
|
|
47
48
|
return asyncio.run(coro)
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
class NifiContentsPointerLocationModel(Enum):
|
|
51
|
-
LOCAL = "local"
|
|
52
|
-
ATTACHMENT = "attachment"
|
|
53
|
-
|
|
54
|
-
|
|
55
51
|
class NifiProxyEntityModel(BaseModel):
|
|
56
52
|
entity_id: str
|
|
57
53
|
entity_type: str
|
|
@@ -72,10 +68,6 @@ class NifiEntityModel(BaseModel):
|
|
|
72
68
|
relationships: List[str]
|
|
73
69
|
label_keys: List[str]
|
|
74
70
|
|
|
75
|
-
class ContentsPointerModel(BaseModel):
|
|
76
|
-
location: NifiContentsPointerLocationModel
|
|
77
|
-
pointer: Optional[str] = None
|
|
78
|
-
|
|
79
71
|
jwt: str
|
|
80
72
|
ontology_name: str
|
|
81
73
|
ontology_info: OntologyInfoModel
|
|
@@ -84,7 +76,7 @@ class NifiEntityModel(BaseModel):
|
|
|
84
76
|
nifi_attributes: dict = Field(default_factory=dict)
|
|
85
77
|
config: dict = Field(default_factory=dict)
|
|
86
78
|
metrics: dict = Field(default_factory=dict)
|
|
87
|
-
contents_pointer: Optional[
|
|
79
|
+
contents_pointer: Optional[dict] = None
|
|
88
80
|
is_temporary: bool = False
|
|
89
81
|
exception: dict = Field(default_factory=dict)
|
|
90
82
|
last_processor_name: Optional[str] = None
|
|
@@ -103,7 +95,7 @@ class NifiEntityModel(BaseModel):
|
|
|
103
95
|
record: RecordModel
|
|
104
96
|
annotations: Dict[str, Any] = Field(default_factory=dict)
|
|
105
97
|
children: List[Union[NifiOTMRelationshipProxyModel, NifiProxyEntityModel]] = []
|
|
106
|
-
contents: Optional[
|
|
98
|
+
contents: Optional[Dict[str, Any]] = None
|
|
107
99
|
|
|
108
100
|
|
|
109
101
|
NifiEntityModel.model_rebuild()
|
|
@@ -295,12 +287,10 @@ class NifiContextManager(object):
|
|
|
295
287
|
return client, curr_user_ontology
|
|
296
288
|
|
|
297
289
|
def receive_input(self, json_data, processor_name) -> List["NifiEntityBatch"]:
|
|
298
|
-
def _safe_decode(contents):
|
|
299
|
-
return base64.b64decode(contents) if contents else None
|
|
300
|
-
|
|
301
290
|
entities = []
|
|
302
291
|
all_independent_uids = [e["record"]["entity_id"] for e in json_data]
|
|
303
292
|
for elem in json_data:
|
|
293
|
+
contents = Contents.from_locator(elem.get("contents"), client=self.client)
|
|
304
294
|
entities.append(
|
|
305
295
|
NifiEntity(
|
|
306
296
|
self,
|
|
@@ -309,7 +299,7 @@ class NifiContextManager(object):
|
|
|
309
299
|
elem["annotations"],
|
|
310
300
|
all_independent_uids,
|
|
311
301
|
elem["children"],
|
|
312
|
-
|
|
302
|
+
contents,
|
|
313
303
|
)
|
|
314
304
|
)
|
|
315
305
|
entities = sorted(
|
|
@@ -467,7 +457,7 @@ class NifiContextManager(object):
|
|
|
467
457
|
has_write_flag = entity.sync_params.get(NifiContextManager.SyncFlag.WRITE_CONTENTS)
|
|
468
458
|
is_temp_with_pointer = entity.request.get("is_temporary") and entity.contents_pointer
|
|
469
459
|
if has_write_flag or is_temp_with_pointer:
|
|
470
|
-
if entity.contents:
|
|
460
|
+
if entity.contents: # Contents instance check
|
|
471
461
|
files_to_write.append(entity)
|
|
472
462
|
# FIND ENTITIES TO UPSERT
|
|
473
463
|
self._find_entities_to_upsert(entities, entities_to_upsert, reserved_fields)
|
|
@@ -495,6 +485,10 @@ class NifiContextManager(object):
|
|
|
495
485
|
# WRITE FILES
|
|
496
486
|
if files_to_write:
|
|
497
487
|
for file in files_to_write:
|
|
488
|
+
if not file.contents:
|
|
489
|
+
continue
|
|
490
|
+
|
|
491
|
+
# Pass Contents instance directly — write_file uses duck typing
|
|
498
492
|
new_file_record = write_file.sync(
|
|
499
493
|
file.write_os_workspace,
|
|
500
494
|
"./" + file.record["os_item_name"],
|
|
@@ -510,10 +504,18 @@ class NifiContextManager(object):
|
|
|
510
504
|
file.record["entity_label"] = file.label
|
|
511
505
|
file.request["is_temporary"] = False
|
|
512
506
|
file.request["entity_timestamp"] = file.record["os_last_updated_at"]
|
|
513
|
-
file._contents =
|
|
507
|
+
file._contents = WorkspaceAttachmentContents(
|
|
508
|
+
workspace_id=file.record['os_workspace'],
|
|
509
|
+
entity_id=file.record['os_entity_uid'],
|
|
510
|
+
client=self.client,
|
|
511
|
+
entity_type=file.record["os_concept"],
|
|
512
|
+
filetype=file.record["os_item_content_type"]
|
|
513
|
+
)
|
|
514
514
|
file.request["contents_pointer"] = {
|
|
515
|
-
"location":
|
|
516
|
-
"pointer": f"{file.record['os_workspace']}/{file.record['os_entity_uid']}"
|
|
515
|
+
"location": ContentsLocation.WORKSPACE_ATTACHMENT.value,
|
|
516
|
+
"pointer": f"{file.record['os_workspace']}/{file.record['os_entity_uid']}",
|
|
517
|
+
"entity_type": file.record["os_concept"],
|
|
518
|
+
"filetype": file.record["os_item_content_type"]
|
|
517
519
|
}
|
|
518
520
|
# UPSERT ENTITIES
|
|
519
521
|
if entities_to_upsert:
|
|
@@ -692,7 +694,7 @@ class NifiEntity(object):
|
|
|
692
694
|
c["annotations"],
|
|
693
695
|
all_independent_uids,
|
|
694
696
|
c["children"],
|
|
695
|
-
c
|
|
697
|
+
Contents.from_locator(c.get("contents"), client=self.context.client),
|
|
696
698
|
)
|
|
697
699
|
for c in full_entity_children
|
|
698
700
|
]
|
|
@@ -711,7 +713,7 @@ class NifiEntity(object):
|
|
|
711
713
|
for i in range(len(child_uids))
|
|
712
714
|
]
|
|
713
715
|
self.children.extend(proxy_otm_children)
|
|
714
|
-
self._contents = contents
|
|
716
|
+
self._contents: Optional[Contents] = contents
|
|
715
717
|
self.drop_on_output = False
|
|
716
718
|
|
|
717
719
|
def __eq__(self, other):
|
|
@@ -741,36 +743,16 @@ class NifiEntity(object):
|
|
|
741
743
|
self._annotations = new_annotations
|
|
742
744
|
|
|
743
745
|
@property
|
|
744
|
-
def contents(self):
|
|
746
|
+
def contents(self) -> Optional[Contents]:
|
|
745
747
|
if not self._contents:
|
|
746
748
|
contents_pointer = self.contents_pointer
|
|
747
749
|
if not contents_pointer:
|
|
748
750
|
return None
|
|
749
|
-
|
|
750
|
-
self._contents = read_file.sync(
|
|
751
|
-
contents_pointer["pointer"].split("/")[0],
|
|
752
|
-
contents_pointer["pointer"].split("/")[-1],
|
|
753
|
-
False,
|
|
754
|
-
client=self.context.client,
|
|
755
|
-
)
|
|
751
|
+
self._contents = Contents.from_locator(contents_pointer, client=self.context.client)
|
|
756
752
|
return self._contents
|
|
757
753
|
|
|
758
|
-
@property
|
|
759
|
-
def contents_pointer(self):
|
|
760
|
-
contents_pointer = deepcopy(self.request.get("contents_pointer"))
|
|
761
|
-
if not self.request.get("contents_pointer"):
|
|
762
|
-
return None
|
|
763
|
-
ptr_location = contents_pointer.get("location")
|
|
764
|
-
if ptr_location == "attachment" and not contents_pointer.get("pointer"):
|
|
765
|
-
contents_pointer["pointer"] = f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
|
|
766
|
-
return contents_pointer
|
|
767
|
-
|
|
768
|
-
@contents_pointer.setter
|
|
769
|
-
def contents_pointer(self, new_value):
|
|
770
|
-
self.request["contents_pointer"] = new_value
|
|
771
|
-
|
|
772
754
|
@contents.setter
|
|
773
|
-
def contents(self, new_contents):
|
|
755
|
+
def contents(self, new_contents: Optional[Union[Contents, bytes]]):
|
|
774
756
|
self._contents = new_contents
|
|
775
757
|
|
|
776
758
|
@property
|
|
@@ -866,9 +848,6 @@ class NifiEntity(object):
|
|
|
866
848
|
return not _is_sub_fragment_recursive(fragment)
|
|
867
849
|
|
|
868
850
|
def to_json(self):
|
|
869
|
-
def _safe_encode(contents):
|
|
870
|
-
return base64.b64encode(contents) if contents else None
|
|
871
|
-
|
|
872
851
|
if self.drop_on_output:
|
|
873
852
|
return
|
|
874
853
|
proxy_entity_children = []
|
|
@@ -909,7 +888,7 @@ class NifiEntity(object):
|
|
|
909
888
|
"record": self.record,
|
|
910
889
|
"children": children,
|
|
911
890
|
"annotations": self.annotations,
|
|
912
|
-
"contents":
|
|
891
|
+
"contents": self._contents.to_locator() if self._contents else None,
|
|
913
892
|
}
|
|
914
893
|
|
|
915
894
|
def _add_entity(self, os_workspace, entity_type, fields, os_entity_uid=None):
|
|
@@ -1058,7 +1037,7 @@ class NifiEntity(object):
|
|
|
1058
1037
|
os_parent_folder,
|
|
1059
1038
|
filename,
|
|
1060
1039
|
filetype,
|
|
1061
|
-
file,
|
|
1040
|
+
file: Union[Contents, bytes],
|
|
1062
1041
|
fields={},
|
|
1063
1042
|
os_relationship_name=FILE_RELATIONSHIP_NAME,
|
|
1064
1043
|
os_relationship_type="mtm",
|
|
@@ -1080,8 +1059,13 @@ class NifiEntity(object):
|
|
|
1080
1059
|
os_entity_uid,
|
|
1081
1060
|
os_relationship_uid,
|
|
1082
1061
|
)
|
|
1083
|
-
|
|
1084
|
-
|
|
1062
|
+
if isinstance(file, Contents):
|
|
1063
|
+
child_entity._contents = file
|
|
1064
|
+
else:
|
|
1065
|
+
child_entity._contents = MemoryContents(
|
|
1066
|
+
entity_type=FILE_ENTITY_NAME, filetype=filetype, initial_data=file
|
|
1067
|
+
)
|
|
1068
|
+
child_entity.request["contents_pointer"] = child_entity._contents.to_locator()
|
|
1085
1069
|
return child_entity, child_rel
|
|
1086
1070
|
|
|
1087
1071
|
def add_tag(self, os_workspace, name, group, order, color, fields={}):
|
|
@@ -10,7 +10,10 @@ def now():
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def string_to_datetime(datetime_str):
|
|
13
|
-
|
|
13
|
+
parsed = dt_parser.parse(
|
|
14
14
|
datetime_str
|
|
15
15
|
or dt.datetime.fromtimestamp(0, dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
16
16
|
)
|
|
17
|
+
if parsed.tzinfo is None or parsed.tzinfo.utcoffset(parsed) is None:
|
|
18
|
+
parsed = parsed.replace(tzinfo=dt.timezone.utc)
|
|
19
|
+
return parsed
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|