streamlit-octostar-utils 0.4.1__tar.gz → 0.4.2.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/pyproject.toml +1 -1
  3. streamlit_octostar_utils-0.4.2.dev2/streamlit_octostar_utils/api_crafter/contents.py +914 -0
  4. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/nifi.py +51 -53
  5. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/timestamp.py +4 -1
  6. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/LICENSE +0 -0
  7. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/README.md +0 -0
  8. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/__init__.py +0 -0
  9. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  10. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
  11. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  12. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
  13. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  14. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  15. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  16. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  17. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  18. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  19. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  20. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  21. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  22. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  23. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/__init__.py +0 -0
  24. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/dict.py +0 -0
  25. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/filetypes.py +0 -0
  26. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  27. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  28. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  29. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  30. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/language.py +0 -0
  31. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/nlp/ner.py +0 -0
  32. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  33. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/client.py +0 -0
  34. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/context.py +0 -0
  35. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  36. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  37. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
  38. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  39. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/ontology/validation.py +0 -0
  40. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/style/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/style/common.py +0 -0
  42. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/__init__.py +0 -0
  43. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  44. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  45. {streamlit_octostar_utils-0.4.1 → streamlit_octostar_utils-0.4.2.dev2}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.4.1
3
+ Version: 0.4.2.dev2
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.4.1"
8
+ version = "0.4.2-dev.2"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -0,0 +1,914 @@
1
+ """
2
+ Contents abstraction for NiFi entities.
3
+
4
+ Provides a FileIO-like interface for handling entity contents with support for:
5
+ - Full io.BufferedIOBase compatibility (read, write, seek, tell, etc.)
6
+ - Seeking and streaming with HTTP Range requests
7
+ - Multiple storage backends (memory, workspace attachments)
8
+ - Serialization/deserialization via from_locator()/to_locator() methods
9
+ - Lazy loading and efficient partial reads for large files
10
+
11
+ Storage Backends:
12
+ - MemoryContents: In-memory buffer (like BytesIO)
13
+ - WorkspaceAttachmentContents: Octostar workspace attachments with HTTP Range support
14
+ - TemporaryAttachmentContents: Octostar temporary blob storage (user temp bucket)
15
+ """
16
+
17
+ from abc import ABC, abstractmethod
18
+ from typing import Optional, Dict, Any, Union, BinaryIO, List
19
+ from enum import Enum
20
+ import base64
21
+ import httpx
22
+ import logging
23
+ from io import BytesIO, SEEK_SET, SEEK_CUR, SEEK_END
24
+ from copy import deepcopy
25
+
26
+ _logger = logging.getLogger(__name__)
27
+
28
+
29
+ class ContentsLocation(Enum):
30
+ """Enumeration of supported content storage locations."""
31
+ MEMORY = "memory"
32
+ WORKSPACE_ATTACHMENT = "workspace_attachment"
33
+ TEMPORARY_ATTACHMENT = "temporary_attachment"
34
+
35
+
36
+ class Contents(ABC):
37
+ """
38
+ Abstract base class for entity contents.
39
+
40
+ Provides a FileIO-like interface with support for seeking, streaming,
41
+ and multiple storage backends. Each implementation manages its own
42
+ locator dictionary for serialization/deserialization.
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ entity_type: Optional[str] = None,
48
+ filetype: Optional[str] = None,
49
+ **kwargs
50
+ ):
51
+ self._entity_type = entity_type
52
+ self._filetype = filetype
53
+ self._closed = False
54
+
55
+ # ==================== FileIO Interface ====================
56
+
57
+ @abstractmethod
58
+ def read(self, size: int = -1) -> bytes:
59
+ """
60
+ Read and return up to size bytes, or all bytes if size is -1.
61
+
62
+ Args:
63
+ size: Number of bytes to read. -1 means read all.
64
+
65
+ Returns:
66
+ Bytes read from the stream.
67
+ """
68
+ pass
69
+
70
+ @abstractmethod
71
+ def write(self, b: bytes) -> int:
72
+ """
73
+ Write bytes to the stream.
74
+
75
+ Args:
76
+ b: Bytes to write.
77
+
78
+ Returns:
79
+ Number of bytes written.
80
+ """
81
+ pass
82
+
83
+ @abstractmethod
84
+ def seek(self, offset: int, whence: int = SEEK_SET) -> int:
85
+ """
86
+ Change stream position.
87
+
88
+ Args:
89
+ offset: Offset relative to whence.
90
+ whence: SEEK_SET (0) = from start, SEEK_CUR (1) = from current, SEEK_END (2) = from end.
91
+
92
+ Returns:
93
+ New absolute position.
94
+ """
95
+ pass
96
+
97
+ @abstractmethod
98
+ def tell(self) -> int:
99
+ """
100
+ Return current stream position.
101
+
102
+ Returns:
103
+ Current position in bytes.
104
+ """
105
+ pass
106
+
107
+ def readable(self) -> bool:
108
+ return True
109
+
110
+ def writable(self) -> bool:
111
+ return True
112
+
113
+ def seekable(self) -> bool:
114
+ return True
115
+
116
+ @abstractmethod
117
+ def flush(self):
118
+ """Flush write buffers."""
119
+ pass
120
+
121
+ @abstractmethod
122
+ def close(self):
123
+ """Close the stream and release resources."""
124
+ self._closed = True
125
+
126
+ @abstractmethod
127
+ def delete(self):
128
+ """Delete the contents from their storage backend."""
129
+ pass
130
+
131
+ @property
132
+ def closed(self) -> bool:
133
+ """Check if stream is closed."""
134
+ return self._closed
135
+
136
+ @abstractmethod
137
+ def truncate(self, size: Optional[int] = None) -> int:
138
+ """
139
+ Resize the stream to the given size.
140
+
141
+ Args:
142
+ size: New size in bytes. If None, use current position.
143
+
144
+ Returns:
145
+ New size.
146
+ """
147
+ pass
148
+
149
+ def __enter__(self):
150
+ """Context manager entry."""
151
+ return self
152
+
153
+ def __exit__(self, exc_type, exc_val, exc_tb):
154
+ """Context manager exit."""
155
+ self.close()
156
+ return False
157
+
158
+ def readline(self, size: int = -1) -> bytes:
159
+ """
160
+ Read and return one line from the stream.
161
+
162
+ Args:
163
+ size: Maximum number of bytes to read. -1 means no limit.
164
+
165
+ Returns:
166
+ Bytes up to and including the newline character, or until EOF.
167
+ """
168
+ result = bytearray()
169
+ while True:
170
+ if size >= 0 and len(result) >= size:
171
+ break
172
+ byte = self.read(1)
173
+ if not byte:
174
+ break
175
+ result.extend(byte)
176
+ if byte == b'\n':
177
+ break
178
+ return bytes(result)
179
+
180
+ def readlines(self, hint: int = -1) -> List[bytes]:
181
+ """
182
+ Read and return a list of lines from the stream.
183
+
184
+ Args:
185
+ hint: Optional size hint. If total size of lines exceeds hint, no more lines are read.
186
+
187
+ Returns:
188
+ List of lines.
189
+ """
190
+ lines = []
191
+ total_size = 0
192
+ while True:
193
+ line = self.readline()
194
+ if not line:
195
+ break
196
+ lines.append(line)
197
+ total_size += len(line)
198
+ if hint >= 0 and total_size >= hint:
199
+ break
200
+ return lines
201
+
202
+ def writelines(self, lines: List[bytes]):
203
+ """
204
+ Write a list of lines to the stream.
205
+
206
+ Args:
207
+ lines: List of byte strings to write.
208
+ """
209
+ for line in lines:
210
+ self.write(line)
211
+
212
+ def readinto(self, b: bytearray) -> int:
213
+ """
214
+ Read bytes into a pre-allocated buffer.
215
+
216
+ Args:
217
+ b: Pre-allocated bytearray to read into.
218
+
219
+ Returns:
220
+ Number of bytes read.
221
+ """
222
+ data = self.read(len(b))
223
+ n = len(data)
224
+ b[:n] = data
225
+ return n
226
+
227
+ def read1(self, size: int = -1) -> bytes:
228
+ """
229
+ Read up to size bytes with at most one read() call to the underlying stream.
230
+
231
+ For most implementations, this is the same as read(). Subclasses may override
232
+ for optimization.
233
+
234
+ Args:
235
+ size: Number of bytes to read. -1 means read all available.
236
+
237
+ Returns:
238
+ Bytes read from the stream.
239
+ """
240
+ return self.read(size)
241
+
242
+ # ==================== Locator Interface ====================
243
+
244
+ @abstractmethod
245
+ def to_locator(self) -> Dict[str, Any]:
246
+ """
247
+ Serialize contents to a locator dictionary.
248
+
249
+ The locator contains:
250
+ - location: The storage backend type
251
+ - Additional backend-specific parameters
252
+
253
+ Returns:
254
+ Dictionary describing how to locate/access this content.
255
+ """
256
+ pass
257
+
258
+ @staticmethod
259
+ def from_locator(locator: Optional[Dict[str, Any]], client=None) -> Optional['Contents']:
260
+ """
261
+ Deserialize contents from a locator dictionary.
262
+
263
+ Factory method that creates the appropriate Contents subclass based on the
264
+ locator's "location" field. This replaces the old base64.b64decode logic in NiFi.
265
+
266
+ Args:
267
+ locator: Locator dictionary (e.g., from request.contents_pointer)
268
+ client: Optional Octostar client for remote operations
269
+
270
+ Returns:
271
+ Contents instance or None if locator is None/empty.
272
+
273
+ Raises:
274
+ ValueError: If the location type is unknown
275
+ """
276
+ if not locator:
277
+ return None
278
+
279
+ location = locator.get("location")
280
+
281
+ match location:
282
+ case ContentsLocation.MEMORY.value:
283
+ return MemoryContents._from_locator(locator)
284
+ case ContentsLocation.WORKSPACE_ATTACHMENT.value:
285
+ return WorkspaceAttachmentContents._from_locator(locator, client)
286
+ case ContentsLocation.TEMPORARY_ATTACHMENT.value:
287
+ return TemporaryAttachmentContents._from_locator(locator, client)
288
+ case _:
289
+ raise ValueError(f"Unknown contents location type: {location}")
290
+
291
+ # ==================== Utility Methods ====================
292
+
293
+ def read_all(self) -> bytes:
294
+ """Read all contents and return as bytes."""
295
+ current_pos = self.tell()
296
+ self.seek(0, SEEK_SET)
297
+ data = self.read()
298
+ self.seek(current_pos, SEEK_SET)
299
+ return data
300
+
301
+ def getvalue(self) -> bytes:
302
+ """
303
+ Return the entire contents without moving the position.
304
+
305
+ This matches BytesIO.getvalue() behavior.
306
+
307
+ Returns:
308
+ Entire contents as bytes.
309
+ """
310
+ return self.read_all()
311
+
312
+ def write_all(self, data: bytes):
313
+ """Write all data, replacing existing contents."""
314
+ self.seek(0, SEEK_SET)
315
+ self.truncate(0)
316
+ self.write(data)
317
+ self.flush()
318
+
319
+ def __len__(self) -> int:
320
+ """
321
+ Return the length of the contents.
322
+
323
+ Returns:
324
+ Total size in bytes.
325
+ """
326
+ current_pos = self.tell()
327
+ self.seek(0, SEEK_END)
328
+ length = self.tell()
329
+ self.seek(current_pos, SEEK_SET)
330
+ return length
331
+
332
+
333
+ class MemoryContents(Contents):
334
+ """In-memory contents implementation using BytesIO."""
335
+
336
+ def __init__(
337
+ self,
338
+ entity_type: Optional[str] = None,
339
+ filetype: Optional[str] = None,
340
+ *,
341
+ initial_data: Optional[bytes] = None,
342
+ **kwargs
343
+ ):
344
+ super().__init__(entity_type, filetype, **kwargs)
345
+ self._buffer = BytesIO(initial_data or b"")
346
+
347
+ def read(self, size: int = -1) -> bytes:
348
+ return self._buffer.read(size)
349
+
350
+ def write(self, b: bytes) -> int:
351
+ return self._buffer.write(b)
352
+
353
+ def seek(self, offset: int, whence: int = SEEK_SET) -> int:
354
+ return self._buffer.seek(offset, whence)
355
+
356
+ def tell(self) -> int:
357
+ return self._buffer.tell()
358
+
359
+ def flush(self):
360
+ self._buffer.flush()
361
+
362
+ def close(self):
363
+ if not self._closed:
364
+ self._buffer.close()
365
+ super().close()
366
+
367
+ def delete(self):
368
+ """Clear the in-memory buffer."""
369
+ self._buffer = BytesIO(b"")
370
+
371
+ def truncate(self, size: Optional[int] = None) -> int:
372
+ return self._buffer.truncate(size)
373
+
374
+ def getvalue(self) -> bytes:
375
+ return self._buffer.getvalue()
376
+
377
+ def to_locator(self) -> Dict[str, Any]:
378
+ """
379
+ Serialize to locator with base64-encoded data.
380
+
381
+ Returns:
382
+ {"location": "memory", "data": "<base64>", "entity_type": "...", "filetype": "..."}
383
+ """
384
+ data = self._buffer.getvalue()
385
+ locator = {
386
+ "location": ContentsLocation.MEMORY.value,
387
+ "data": base64.b64encode(data).decode('utf-8') if data else None
388
+ }
389
+ if self._entity_type:
390
+ locator["entity_type"] = self._entity_type
391
+ if self._filetype:
392
+ locator["filetype"] = self._filetype
393
+ return locator
394
+
395
+ @staticmethod
396
+ def _from_locator(locator: Dict[str, Any]) -> 'MemoryContents':
397
+ """
398
+ Create MemoryContents from a locator dictionary.
399
+
400
+ Args:
401
+ locator: Locator dictionary with base64-encoded data
402
+
403
+ Returns:
404
+ New MemoryContents instance
405
+ """
406
+ data = locator.get("data")
407
+ initial_data = base64.b64decode(data) if data else None
408
+ return MemoryContents(
409
+ entity_type=locator.get("entity_type"),
410
+ filetype=locator.get("filetype"),
411
+ initial_data=initial_data
412
+ )
413
+
414
+
415
+ class WorkspaceAttachmentContents(Contents):
416
+ """
417
+ Contents implementation for Octostar workspace attachments.
418
+
419
+ Uses octostar-api utilities (read_attachment, get_attachment_url) to:
420
+ - Lazy load from workspace storage via read_attachment()
421
+ - Stream efficiently with HTTP Range requests (no need to download entire file)
422
+ - Support true seeking with configurable chunk sizes
423
+ - Buffer modifications in memory (write back handled at NiFi entity level)
424
+
425
+ Presigned URL Handling:
426
+ - URLs are obtained via get_attachment_url() and cached
427
+ - On 403 (Forbidden) responses, URLs are refreshed and requests retried
428
+ - Follows the same retry pattern as octostar-api read_file utilities
429
+ """
430
+
431
+ DEFAULT_CHUNK_SIZE = 8192
432
+ DEFAULT_URL_TIMEOUT = 120
433
+
434
+ def __init__(
435
+ self,
436
+ entity_type: Optional[str] = None,
437
+ filetype: Optional[str] = None,
438
+ *,
439
+ workspace_id: str,
440
+ entity_id: str,
441
+ client,
442
+ initial_data: Optional[bytes] = None,
443
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
444
+ **kwargs
445
+ ):
446
+ super().__init__(entity_type, filetype, **kwargs)
447
+ self._workspace_id = workspace_id
448
+ self._entity_id = entity_id
449
+ self._client = client
450
+ self._chunk_size = chunk_size
451
+
452
+ self._buffer: Optional[BytesIO] = None
453
+ self._fully_loaded = False
454
+ self._modified = False
455
+ self._position = 0
456
+ self._size: Optional[int] = None
457
+ self._presigned_url: Optional[str] = None
458
+ self._http_client: Optional[httpx.Client] = None
459
+
460
+ if initial_data is not None:
461
+ self._buffer = BytesIO(initial_data)
462
+ self._fully_loaded = True
463
+ self._size = len(initial_data)
464
+
465
+ def _get_presigned_url(self) -> str:
466
+ """Get or refresh the presigned URL for the attachment."""
467
+ from octostar.utils.workspace import get_attachment_url
468
+
469
+ return get_attachment_url.sync(
470
+ os_workspace=self._workspace_id,
471
+ os_entity_uid=self._entity_id,
472
+ client=self._client
473
+ )
474
+
475
+ def _ensure_http_client(self):
476
+ """Ensure HTTP client is initialized."""
477
+ if not self._http_client:
478
+ self._http_client = httpx.Client(timeout=self.DEFAULT_URL_TIMEOUT)
479
+
480
+ def _fetch_size(self) -> int:
481
+ """
482
+ Fetch the size of the remote attachment using HEAD request.
483
+
484
+ Handles presigned URL expiration with automatic refresh and retry.
485
+ """
486
+ if self._size is not None:
487
+ return self._size
488
+
489
+ if not self._presigned_url:
490
+ self._presigned_url = self._get_presigned_url()
491
+
492
+ self._ensure_http_client()
493
+ max_retries = 3
494
+
495
+ for attempt in range(max_retries):
496
+ response = self._http_client.head(self._presigned_url)
497
+ if response.status_code == 200:
498
+ self._size = int(response.headers.get('content-length', 0))
499
+ return self._size
500
+ elif response.status_code == 403:
501
+ _logger.debug(
502
+ f"Presigned URL expired while fetching size, refreshing... (attempt {attempt + 1}/{max_retries})"
503
+ )
504
+ self._presigned_url = self._get_presigned_url()
505
+ continue
506
+ else:
507
+ response.raise_for_status()
508
+
509
+ raise ConnectionError(
510
+ f"Failed to fetch file size after {max_retries} attempts (URL kept expiring)"
511
+ )
512
+
513
+ def _read_range(self, start: int, end: int) -> bytes:
514
+ """
515
+ Read a specific byte range using HTTP Range request.
516
+
517
+ Handles presigned URL expiration with automatic refresh and retry.
518
+
519
+ Args:
520
+ start: Start byte (inclusive)
521
+ end: End byte (inclusive)
522
+
523
+ Returns:
524
+ Bytes from the specified range
525
+ """
526
+ if not self._presigned_url:
527
+ self._presigned_url = self._get_presigned_url()
528
+
529
+ self._ensure_http_client()
530
+
531
+ headers = {"Range": f"bytes={start}-{end}"}
532
+ max_retries = 3
533
+
534
+ for attempt in range(max_retries):
535
+ response = self._http_client.get(self._presigned_url, headers=headers)
536
+
537
+ if response.status_code == 206:
538
+ return response.content
539
+ elif response.status_code == 416:
540
+ return b""
541
+ elif response.status_code == 403:
542
+ _logger.debug(
543
+ f"Presigned URL expired at byte {start}, refreshing... (attempt {attempt + 1}/{max_retries})"
544
+ )
545
+ self._presigned_url = self._get_presigned_url()
546
+ continue
547
+ else:
548
+ response.raise_for_status()
549
+ return response.content
550
+
551
+ raise ConnectionError(
552
+ f"Failed to read range {start}-{end} after {max_retries} attempts (URL kept expiring)"
553
+ )
554
+
555
+ def _load_full(self):
556
+ """Load the entire attachment into memory buffer using read_attachment()."""
557
+ if self._fully_loaded:
558
+ return
559
+
560
+ from octostar.utils.workspace import read_attachment
561
+
562
+ data = read_attachment.sync(
563
+ os_workspace=self._workspace_id,
564
+ os_entity_uid=self._entity_id,
565
+ decode=False,
566
+ stream=False,
567
+ client=self._client
568
+ )
569
+ self._buffer = BytesIO(data or b"")
570
+ self._fully_loaded = True
571
+ self._size = len(data) if data else 0
572
+ self._position = 0
573
+
574
+ def read(self, size: int = -1) -> bytes:
575
+ if self._fully_loaded:
576
+ if not self._buffer:
577
+ self._load_full()
578
+ return self._buffer.read(size)
579
+
580
+ # Otherwise, use HTTP Range requests for efficient streaming
581
+ if size == -1:
582
+ # Read all from current position
583
+ self._fetch_size()
584
+ if self._position >= self._size:
585
+ return b""
586
+ size = self._size - self._position
587
+
588
+ if size <= 0:
589
+ return b""
590
+
591
+ # Fetch the size if we don't know it
592
+ self._fetch_size()
593
+
594
+ # Adjust size if it goes beyond the end
595
+ if self._position + size > self._size:
596
+ size = self._size - self._position
597
+
598
+ if size <= 0:
599
+ return b""
600
+
601
+ # Read using HTTP Range request
602
+ end_byte = self._position + size - 1
603
+ data = self._read_range(self._position, end_byte)
604
+ self._position += len(data)
605
+
606
+ return data
607
+
608
+ def write(self, b: bytes) -> int:
609
+ if not self._buffer:
610
+ self._load_full()
611
+
612
+ n = self._buffer.write(b)
613
+ self._modified = True
614
+ return n
615
+
616
+ def seek(self, offset: int, whence: int = SEEK_SET) -> int:
617
+ # Calculate new position
618
+ if whence == SEEK_SET:
619
+ new_pos = offset
620
+ elif whence == SEEK_CUR:
621
+ if self._buffer and self._fully_loaded:
622
+ new_pos = self._buffer.tell() + offset
623
+ else:
624
+ new_pos = self._position + offset
625
+ elif whence == SEEK_END:
626
+ self._fetch_size()
627
+ new_pos = self._size + offset
628
+ else:
629
+ raise ValueError(f"Invalid whence value: {whence}")
630
+
631
+ if new_pos < 0:
632
+ raise ValueError("Negative seek position")
633
+
634
+ # If fully loaded, use buffer seek
635
+ if self._buffer and self._fully_loaded:
636
+ return self._buffer.seek(new_pos, SEEK_SET)
637
+
638
+ # Otherwise, just update position (HTTP Range will handle it)
639
+ self._position = new_pos
640
+ return self._position
641
+
642
+ def tell(self) -> int:
643
+ if self._buffer and self._fully_loaded:
644
+ return self._buffer.tell()
645
+ return self._position
646
+
647
+ def flush(self):
648
+ """Flush the internal buffer and write to workspace if modified."""
649
+ if self._buffer:
650
+ self._buffer.flush()
651
+
652
+ if not self._modified or not self._buffer:
653
+ return
654
+
655
+ if not self._entity_type or not self._filetype:
656
+ raise ValueError("entity_type and filetype required to flush to workspace")
657
+
658
+ from octostar.utils.workspace import write_attachment
659
+
660
+ current_pos = self._buffer.tell()
661
+ self._buffer.seek(0, SEEK_SET)
662
+ data = self._buffer.read()
663
+ self._buffer.seek(current_pos, SEEK_SET)
664
+
665
+ write_attachment.sync(
666
+ os_workspace=self._workspace_id,
667
+ os_entity_uid=self._entity_id,
668
+ entity_type=self._entity_type,
669
+ filetype=self._filetype,
670
+ file=data,
671
+ client=self._client
672
+ )
673
+ self._modified = False
674
+
675
+ def close(self):
676
+ if not self._closed:
677
+ if self._modified:
678
+ self.flush()
679
+ if self._buffer:
680
+ self._buffer.close()
681
+ if self._http_client:
682
+ self._http_client.close()
683
+ self._http_client = None
684
+ super().close()
685
+
686
+ def delete(self):
687
+ """Delete the entity from the workspace using delete_entity()."""
688
+ from octostar.utils.workspace import delete_entity
689
+
690
+ delete_entity.sync(
691
+ os_entity_uid=self._entity_id,
692
+ client=self._client
693
+ )
694
+
695
+ def truncate(self, size: Optional[int] = None) -> int:
696
+ if not self._buffer:
697
+ self._load_full()
698
+ self._modified = True
699
+ return self._buffer.truncate(size)
700
+
701
+ def getvalue(self) -> bytes:
702
+ if not self._buffer or not self._fully_loaded:
703
+ self._load_full()
704
+ return self._buffer.getvalue()
705
+
706
+ def to_locator(self) -> Dict[str, Any]:
707
+ """
708
+ Serialize to locator with workspace and entity ID.
709
+
710
+ Returns:
711
+ {"location": "workspace_attachment", "pointer": "workspace_id/entity_id",
712
+ "entity_type": "...", "filetype": "..."}
713
+ """
714
+ if self._workspace_id and self._entity_id:
715
+ pointer = f"{self._workspace_id}/{self._entity_id}"
716
+ else:
717
+ pointer = None
718
+
719
+ locator = {
720
+ "location": ContentsLocation.WORKSPACE_ATTACHMENT.value,
721
+ "pointer": pointer
722
+ }
723
+ if self._entity_type:
724
+ locator["entity_type"] = self._entity_type
725
+ if self._filetype:
726
+ locator["filetype"] = self._filetype
727
+ return locator
728
+
729
+ @staticmethod
730
+ def _from_locator(locator: Dict[str, Any], client=None) -> 'WorkspaceAttachmentContents':
731
+ """
732
+ Create WorkspaceAttachmentContents from a locator dictionary.
733
+
734
+ Args:
735
+ locator: Locator dictionary with pointer "workspace_id/entity_id"
736
+ client: Octostar client for remote operations
737
+
738
+ Returns:
739
+ New WorkspaceAttachmentContents instance
740
+ """
741
+ pointer = locator.get("pointer")
742
+ workspace_id = None
743
+ entity_id = None
744
+
745
+ if pointer:
746
+ parts = pointer.split("/")
747
+ if len(parts) >= 2:
748
+ workspace_id = parts[0]
749
+ entity_id = parts[-1]
750
+
751
+ return WorkspaceAttachmentContents(
752
+ entity_type=locator.get("entity_type"),
753
+ filetype=locator.get("filetype"),
754
+ workspace_id=workspace_id,
755
+ entity_id=entity_id,
756
+ client=client
757
+ )
758
+
759
+
760
+ class TemporaryAttachmentContents(Contents):
761
+ """
762
+ Contents implementation for Octostar temporary blob storage.
763
+
764
+ Uses octostar-api utilities (read_temporary_blob, write_temporary_blob,
765
+ delete_temporary_blob) to store files in the user's temporary S3 bucket.
766
+
767
+ Temporary blobs are keyed by filename (not workspace/entity), and are not
768
+ associated with any workspace entity. Use WorkspaceAttachmentContents for that.
769
+ """
770
+
771
+ def __init__(
772
+ self,
773
+ entity_type: Optional[str] = None,
774
+ filetype: Optional[str] = None,
775
+ *,
776
+ filename: str,
777
+ client,
778
+ initial_data: Optional[bytes] = None,
779
+ **kwargs
780
+ ):
781
+ super().__init__(entity_type, filetype, **kwargs)
782
+ self._filename = filename
783
+ self._client = client
784
+
785
+ self._buffer: Optional[BytesIO] = None
786
+ self._fully_loaded = False
787
+ self._modified = False
788
+
789
+ if initial_data is not None:
790
+ self._buffer = BytesIO(initial_data)
791
+ self._fully_loaded = True
792
+
793
+ def _load_full(self):
794
+ """Load the entire blob into memory using read_temporary_blob()."""
795
+ if self._fully_loaded:
796
+ return
797
+
798
+ from octostar.utils.workspace import read_temporary_blob
799
+
800
+ data = read_temporary_blob.sync(
801
+ filename=self._filename,
802
+ decode=False,
803
+ client=self._client
804
+ )
805
+ self._buffer = BytesIO(data or b"")
806
+ self._fully_loaded = True
807
+
808
+ def read(self, size: int = -1) -> bytes:
809
+ if not self._buffer:
810
+ self._load_full()
811
+ return self._buffer.read(size)
812
+
813
+ def write(self, b: bytes) -> int:
814
+ if not self._buffer:
815
+ self._load_full()
816
+ n = self._buffer.write(b)
817
+ self._modified = True
818
+ return n
819
+
820
+ def seek(self, offset: int, whence: int = SEEK_SET) -> int:
821
+ if not self._buffer:
822
+ self._load_full()
823
+ return self._buffer.seek(offset, whence)
824
+
825
+ def tell(self) -> int:
826
+ if not self._buffer:
827
+ self._load_full()
828
+ return self._buffer.tell()
829
+
830
+ def flush(self):
831
+ """Flush the internal buffer and write to temp bucket if modified."""
832
+ if self._buffer:
833
+ self._buffer.flush()
834
+
835
+ if not self._modified or not self._buffer:
836
+ return
837
+
838
+ from octostar.utils.workspace import write_temporary_blob
839
+
840
+ current_pos = self._buffer.tell()
841
+ self._buffer.seek(0, SEEK_SET)
842
+ data = self._buffer.read()
843
+ self._buffer.seek(current_pos, SEEK_SET)
844
+
845
+ write_temporary_blob.sync(
846
+ filename=self._filename,
847
+ file=data,
848
+ client=self._client
849
+ )
850
+ self._modified = False
851
+
852
+ def close(self):
853
+ if not self._closed:
854
+ if self._modified:
855
+ self.flush()
856
+ if self._buffer:
857
+ self._buffer.close()
858
+ super().close()
859
+
860
+ def delete(self):
861
+ """Delete the blob from the temporary bucket."""
862
+ from octostar.utils.workspace import delete_temporary_blob
863
+
864
+ delete_temporary_blob.sync(
865
+ filename=self._filename,
866
+ client=self._client
867
+ )
868
+
869
+ def truncate(self, size: Optional[int] = None) -> int:
870
+ if not self._buffer:
871
+ self._load_full()
872
+ self._modified = True
873
+ return self._buffer.truncate(size)
874
+
875
+ def getvalue(self) -> bytes:
876
+ if not self._buffer or not self._fully_loaded:
877
+ self._load_full()
878
+ return self._buffer.getvalue()
879
+
880
+ def to_locator(self) -> Dict[str, Any]:
881
+ """
882
+ Serialize to locator with filename.
883
+
884
+ Returns:
885
+ {"location": "temporary_attachment", "filename": "..."}
886
+ """
887
+ locator = {
888
+ "location": ContentsLocation.TEMPORARY_ATTACHMENT.value,
889
+ "filename": self._filename
890
+ }
891
+ if self._entity_type:
892
+ locator["entity_type"] = self._entity_type
893
+ if self._filetype:
894
+ locator["filetype"] = self._filetype
895
+ return locator
896
+
897
+ @staticmethod
898
+ def _from_locator(locator: Dict[str, Any], client=None) -> 'TemporaryAttachmentContents':
899
+ """
900
+ Create TemporaryAttachmentContents from a locator dictionary.
901
+
902
+ Args:
903
+ locator: Locator dictionary with filename
904
+ client: Octostar client
905
+
906
+ Returns:
907
+ New TemporaryAttachmentContents instance
908
+ """
909
+ return TemporaryAttachmentContents(
910
+ entity_type=locator.get("entity_type"),
911
+ filetype=locator.get("filetype"),
912
+ filename=locator["filename"],
913
+ client=client
914
+ )
@@ -26,6 +26,7 @@ from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
26
26
  from ..core.timestamp import now, string_to_datetime
27
27
  from .fastapi import DefaultErrorRoute, Route
28
28
  from ..ontology.inheritance import is_child_concept as is_child_concept_fn, get_label_keys
29
+ from .contents import Contents, MemoryContents, WorkspaceAttachmentContents, TemporaryAttachmentContents, ContentsLocation
29
30
 
30
31
  RELATIONSHIP_ENTITY_NAME = "os_relationship"
31
32
  LOCAL_RELATIONSHIP_ENTITY_NAME = "os_workspace_relationship"
@@ -47,11 +48,6 @@ def safe_async_run(coro):
47
48
  return asyncio.run(coro)
48
49
 
49
50
 
50
- class NifiContentsPointerLocationModel(Enum):
51
- LOCAL = "local"
52
- ATTACHMENT = "attachment"
53
-
54
-
55
51
  class NifiProxyEntityModel(BaseModel):
56
52
  entity_id: str
57
53
  entity_type: str
@@ -72,10 +68,6 @@ class NifiEntityModel(BaseModel):
72
68
  relationships: List[str]
73
69
  label_keys: List[str]
74
70
 
75
- class ContentsPointerModel(BaseModel):
76
- location: NifiContentsPointerLocationModel
77
- pointer: Optional[str] = None
78
-
79
71
  jwt: str
80
72
  ontology_name: str
81
73
  ontology_info: OntologyInfoModel
@@ -84,7 +76,7 @@ class NifiEntityModel(BaseModel):
84
76
  nifi_attributes: dict = Field(default_factory=dict)
85
77
  config: dict = Field(default_factory=dict)
86
78
  metrics: dict = Field(default_factory=dict)
87
- contents_pointer: Optional[ContentsPointerModel] = None
79
+ contents_pointer: Optional[dict] = None
88
80
  is_temporary: bool = False
89
81
  exception: dict = Field(default_factory=dict)
90
82
  last_processor_name: Optional[str] = None
@@ -103,7 +95,7 @@ class NifiEntityModel(BaseModel):
103
95
  record: RecordModel
104
96
  annotations: Dict[str, Any] = Field(default_factory=dict)
105
97
  children: List[Union[NifiOTMRelationshipProxyModel, NifiProxyEntityModel]] = []
106
- contents: Optional[bytes] = None
98
+ contents: Optional[Dict[str, Any]] = None
107
99
 
108
100
 
109
101
  NifiEntityModel.model_rebuild()
@@ -295,12 +287,10 @@ class NifiContextManager(object):
295
287
  return client, curr_user_ontology
296
288
 
297
289
  def receive_input(self, json_data, processor_name) -> List["NifiEntityBatch"]:
298
- def _safe_decode(contents):
299
- return base64.b64decode(contents) if contents else None
300
-
301
290
  entities = []
302
291
  all_independent_uids = [e["record"]["entity_id"] for e in json_data]
303
292
  for elem in json_data:
293
+ contents = Contents.from_locator(elem.get("contents"), client=self.client)
304
294
  entities.append(
305
295
  NifiEntity(
306
296
  self,
@@ -309,7 +299,7 @@ class NifiContextManager(object):
309
299
  elem["annotations"],
310
300
  all_independent_uids,
311
301
  elem["children"],
312
- _safe_decode(elem.get("contents")),
302
+ contents,
313
303
  )
314
304
  )
315
305
  entities = sorted(
@@ -463,10 +453,13 @@ class NifiContextManager(object):
463
453
  fetch_concept_relationships = {}
464
454
  # FIND FILES TO WRITE
465
455
  for entity in entities:
466
- if entity.is_child_concept("os_file"):
456
+ if entity.is_child_concept("os_attachable"):
467
457
  has_write_flag = entity.sync_params.get(NifiContextManager.SyncFlag.WRITE_CONTENTS)
468
- is_temp_with_pointer = entity.request.get("is_temporary") and entity.contents_pointer
469
- if has_write_flag or is_temp_with_pointer:
458
+ is_temporary = entity.request.get("is_temporary")
459
+ if is_temporary:
460
+ if entity._contents and not isinstance(entity._contents, WorkspaceAttachmentContents):
461
+ files_to_write.append(entity)
462
+ elif has_write_flag:
470
463
  if entity.contents:
471
464
  files_to_write.append(entity)
472
465
  # FIND ENTITIES TO UPSERT
@@ -495,6 +488,9 @@ class NifiContextManager(object):
495
488
  # WRITE FILES
496
489
  if files_to_write:
497
490
  for file in files_to_write:
491
+ if not file.contents:
492
+ continue
493
+ old_contents = file._contents
498
494
  new_file_record = write_file.sync(
499
495
  file.write_os_workspace,
500
496
  "./" + file.record["os_item_name"],
@@ -510,11 +506,24 @@ class NifiContextManager(object):
510
506
  file.record["entity_label"] = file.label
511
507
  file.request["is_temporary"] = False
512
508
  file.request["entity_timestamp"] = file.record["os_last_updated_at"]
513
- file._contents = None
509
+ file._contents = WorkspaceAttachmentContents(
510
+ workspace_id=file.record['os_workspace'],
511
+ entity_id=file.record['os_entity_uid'],
512
+ client=self.client,
513
+ entity_type=file.record["os_concept"],
514
+ filetype=file.record["os_item_content_type"]
515
+ )
514
516
  file.request["contents_pointer"] = {
515
- "location": NifiContentsPointerLocationModel.ATTACHMENT.value,
516
- "pointer": f"{file.record['os_workspace']}/{file.record['os_entity_uid']}"
517
+ "location": ContentsLocation.WORKSPACE_ATTACHMENT.value,
518
+ "pointer": f"{file.record['os_workspace']}/{file.record['os_entity_uid']}",
519
+ "entity_type": file.record["os_concept"],
520
+ "filetype": file.record["os_item_content_type"]
517
521
  }
522
+ if isinstance(old_contents, TemporaryAttachmentContents):
523
+ try:
524
+ old_contents.delete()
525
+ except Exception:
526
+ pass
518
527
  # UPSERT ENTITIES
519
528
  if entities_to_upsert:
520
529
  new_entities = upsert_entities.sync(
@@ -692,7 +701,7 @@ class NifiEntity(object):
692
701
  c["annotations"],
693
702
  all_independent_uids,
694
703
  c["children"],
695
- c["contents"],
704
+ Contents.from_locator(c.get("contents"), client=self.context.client),
696
705
  )
697
706
  for c in full_entity_children
698
707
  ]
@@ -711,7 +720,7 @@ class NifiEntity(object):
711
720
  for i in range(len(child_uids))
712
721
  ]
713
722
  self.children.extend(proxy_otm_children)
714
- self._contents = contents
723
+ self._contents: Optional[Contents] = contents
715
724
  self.drop_on_output = False
716
725
 
717
726
  def __eq__(self, other):
@@ -741,36 +750,16 @@ class NifiEntity(object):
741
750
  self._annotations = new_annotations
742
751
 
743
752
  @property
744
- def contents(self):
753
+ def contents(self) -> Optional[Contents]:
745
754
  if not self._contents:
746
755
  contents_pointer = self.contents_pointer
747
756
  if not contents_pointer:
748
757
  return None
749
- if contents_pointer["location"] == "attachment":
750
- self._contents = read_file.sync(
751
- contents_pointer["pointer"].split("/")[0],
752
- contents_pointer["pointer"].split("/")[-1],
753
- False,
754
- client=self.context.client,
755
- )
758
+ self._contents = Contents.from_locator(contents_pointer, client=self.context.client)
756
759
  return self._contents
757
760
 
758
- @property
759
- def contents_pointer(self):
760
- contents_pointer = deepcopy(self.request.get("contents_pointer"))
761
- if not self.request.get("contents_pointer"):
762
- return None
763
- ptr_location = contents_pointer.get("location")
764
- if ptr_location == "attachment" and not contents_pointer.get("pointer"):
765
- contents_pointer["pointer"] = f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
766
- return contents_pointer
767
-
768
- @contents_pointer.setter
769
- def contents_pointer(self, new_value):
770
- self.request["contents_pointer"] = new_value
771
-
772
761
  @contents.setter
773
- def contents(self, new_contents):
762
+ def contents(self, new_contents: Optional[Union[Contents, bytes]]):
774
763
  self._contents = new_contents
775
764
 
776
765
  @property
@@ -866,9 +855,6 @@ class NifiEntity(object):
866
855
  return not _is_sub_fragment_recursive(fragment)
867
856
 
868
857
  def to_json(self):
869
- def _safe_encode(contents):
870
- return base64.b64encode(contents) if contents else None
871
-
872
858
  if self.drop_on_output:
873
859
  return
874
860
  proxy_entity_children = []
@@ -909,7 +895,7 @@ class NifiEntity(object):
909
895
  "record": self.record,
910
896
  "children": children,
911
897
  "annotations": self.annotations,
912
- "contents": _safe_encode(self._contents),
898
+ "contents": self._contents.to_locator() if self._contents else None,
913
899
  }
914
900
 
915
901
  def _add_entity(self, os_workspace, entity_type, fields, os_entity_uid=None):
@@ -1058,7 +1044,7 @@ class NifiEntity(object):
1058
1044
  os_parent_folder,
1059
1045
  filename,
1060
1046
  filetype,
1061
- file,
1047
+ file: Union[Contents, bytes],
1062
1048
  fields={},
1063
1049
  os_relationship_name=FILE_RELATIONSHIP_NAME,
1064
1050
  os_relationship_type="mtm",
@@ -1080,8 +1066,20 @@ class NifiEntity(object):
1080
1066
  os_entity_uid,
1081
1067
  os_relationship_uid,
1082
1068
  )
1083
- child_entity._contents = file
1084
- child_entity.request["contents_pointer"] = NifiEntityModel.RequestModel.ContentsPointerModel(location="local")
1069
+ if isinstance(file, Contents):
1070
+ child_entity._contents = file
1071
+ else:
1072
+ temp_filename = f"tmp_{child_entity.record['os_entity_uid']}"
1073
+ temp_contents = TemporaryAttachmentContents(
1074
+ entity_type=FILE_ENTITY_NAME,
1075
+ filetype=filetype,
1076
+ filename=temp_filename,
1077
+ client=self.context.client,
1078
+ initial_data=file,
1079
+ )
1080
+ temp_contents.flush()
1081
+ child_entity._contents = temp_contents
1082
+ child_entity.request["contents_pointer"] = child_entity._contents.to_locator()
1085
1083
  return child_entity, child_rel
1086
1084
 
1087
1085
  def add_tag(self, os_workspace, name, group, order, color, fields={}):
@@ -10,7 +10,10 @@ def now():
10
10
 
11
11
 
12
12
  def string_to_datetime(datetime_str):
13
- return dt_parser.parse(
13
+ parsed = dt_parser.parse(
14
14
  datetime_str
15
15
  or dt.datetime.fromtimestamp(0, dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
16
16
  )
17
+ if parsed.tzinfo is None or parsed.tzinfo.utcoffset(parsed) is None:
18
+ parsed = parsed.replace(tzinfo=dt.timezone.utc)
19
+ return parsed